blob: e5f3e0c02d7e8f987b7af3b944af8b6b5c844c18 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586 * SAX2 defaulted attributes handling *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlDetectSAX2:
592 * @ctxt: an XML parser context
593 *
594 * Do the SAX2 detection and specific intialization
595 */
596static void
597xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
598 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000600 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
601 ((ctxt->sax->startElementNs != NULL) ||
602 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000603#else
604 ctxt->sax2 = 1;
605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000606
607 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
608 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
609 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000610 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
611 (ctxt->str_xml_ns == NULL)) {
612 xmlErrMemory(ctxt, NULL);
613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614}
615
Daniel Veillarde57ec792003-09-10 10:50:59 +0000616typedef struct _xmlDefAttrs xmlDefAttrs;
617typedef xmlDefAttrs *xmlDefAttrsPtr;
618struct _xmlDefAttrs {
619 int nbAttrs; /* number of defaulted attributes on that element */
620 int maxAttrs; /* the size of the array */
621 const xmlChar *values[4]; /* array of localname/prefix/values */
622};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000623
624/**
625 * xmlAddDefAttrs:
626 * @ctxt: an XML parser context
627 * @fullname: the element fullname
628 * @fullattr: the attribute fullname
629 * @value: the attribute value
630 *
631 * Add a defaulted attribute for an element
632 */
633static void
634xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
635 const xmlChar *fullname,
636 const xmlChar *fullattr,
637 const xmlChar *value) {
638 xmlDefAttrsPtr defaults;
639 int len;
640 const xmlChar *name;
641 const xmlChar *prefix;
642
643 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000644 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000645 if (ctxt->attsDefault == NULL)
646 goto mem_error;
647 }
648
649 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000650 * split the element name into prefix:localname , the string found
651 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000652 */
653 name = xmlSplitQName3(fullname, &len);
654 if (name == NULL) {
655 name = xmlDictLookup(ctxt->dict, fullname, -1);
656 prefix = NULL;
657 } else {
658 name = xmlDictLookup(ctxt->dict, name, -1);
659 prefix = xmlDictLookup(ctxt->dict, fullname, len);
660 }
661
662 /*
663 * make sure there is some storage
664 */
665 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
666 if (defaults == NULL) {
667 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000668 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 if (defaults == NULL)
670 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000672 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000673 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
674 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000675 xmlDefAttrsPtr temp;
676
677 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000681 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000682 defaults->maxAttrs *= 2;
683 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
684 }
685
686 /*
687 * plit the element name into prefix:localname , the string found
688 * are within the DTD and hen not associated to namespace names.
689 */
690 name = xmlSplitQName3(fullattr, &len);
691 if (name == NULL) {
692 name = xmlDictLookup(ctxt->dict, fullattr, -1);
693 prefix = NULL;
694 } else {
695 name = xmlDictLookup(ctxt->dict, name, -1);
696 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
697 }
698
699 defaults->values[4 * defaults->nbAttrs] = name;
700 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
701 /* intern the string and precompute the end */
702 len = xmlStrlen(value);
703 value = xmlDictLookup(ctxt->dict, value, len);
704 defaults->values[4 * defaults->nbAttrs + 2] = value;
705 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
706 defaults->nbAttrs++;
707
708 return;
709
710mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000711 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000712 return;
713}
714
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000715/**
716 * xmlAddSpecialAttr:
717 * @ctxt: an XML parser context
718 * @fullname: the element fullname
719 * @fullattr: the attribute fullname
720 * @type: the attribute type
721 *
722 * Register that this attribute is not CDATA
723 */
724static void
725xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
726 const xmlChar *fullname,
727 const xmlChar *fullattr,
728 int type)
729{
730 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000731 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000732 if (ctxt->attsSpecial == NULL)
733 goto mem_error;
734 }
735
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000736 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
737 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000738 return;
739
740mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000741 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000742 return;
743}
744
Daniel Veillard4432df22003-09-28 18:58:27 +0000745/**
746 * xmlCheckLanguageID:
747 * @lang: pointer to the string value
748 *
749 * Checks that the value conforms to the LanguageID production:
750 *
751 * NOTE: this is somewhat deprecated, those productions were removed from
752 * the XML Second edition.
753 *
754 * [33] LanguageID ::= Langcode ('-' Subcode)*
755 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
756 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
757 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
758 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
759 * [38] Subcode ::= ([a-z] | [A-Z])+
760 *
761 * Returns 1 if correct 0 otherwise
762 **/
763int
764xmlCheckLanguageID(const xmlChar * lang)
765{
766 const xmlChar *cur = lang;
767
768 if (cur == NULL)
769 return (0);
770 if (((cur[0] == 'i') && (cur[1] == '-')) ||
771 ((cur[0] == 'I') && (cur[1] == '-'))) {
772 /*
773 * IANA code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
780 ((cur[0] == 'X') && (cur[1] == '-'))) {
781 /*
782 * User code
783 */
784 cur += 2;
785 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
789 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
790 /*
791 * ISO639
792 */
793 cur++;
794 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
795 ((cur[0] >= 'a') && (cur[0] <= 'z')))
796 cur++;
797 else
798 return (0);
799 } else
800 return (0);
801 while (cur[0] != 0) { /* non input consuming */
802 if (cur[0] != '-')
803 return (0);
804 cur++;
805 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 else
809 return (0);
810 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
811 ((cur[0] >= 'a') && (cur[0] <= 'z')))
812 cur++;
813 }
814 return (1);
815}
816
Owen Taylor3473f882001-02-23 17:55:21 +0000817/************************************************************************
818 * *
819 * Parser stacks related functions and macros *
820 * *
821 ************************************************************************/
822
823xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
824 const xmlChar ** str);
825
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826#ifdef SAX2
827/**
828 * nsPush:
829 * @ctxt: an XML parser context
830 * @prefix: the namespace prefix or NULL
831 * @URL: the namespace name
832 *
833 * Pushes a new parser namespace on top of the ns stack
834 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000835 * Returns -1 in case of error, -2 if the namespace should be discarded
836 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000837 */
838static int
839nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
840{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000841 if (ctxt->options & XML_PARSE_NSCLEAN) {
842 int i;
843 for (i = 0;i < ctxt->nsNr;i += 2) {
844 if (ctxt->nsTab[i] == prefix) {
845 /* in scope */
846 if (ctxt->nsTab[i + 1] == URL)
847 return(-2);
848 /* out of scope keep it */
849 break;
850 }
851 }
852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
854 ctxt->nsMax = 10;
855 ctxt->nsNr = 0;
856 ctxt->nsTab = (const xmlChar **)
857 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
858 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000859 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000860 ctxt->nsMax = 0;
861 return (-1);
862 }
863 } else if (ctxt->nsNr >= ctxt->nsMax) {
864 ctxt->nsMax *= 2;
865 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000866 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000867 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
868 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000869 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000870 ctxt->nsMax /= 2;
871 return (-1);
872 }
873 }
874 ctxt->nsTab[ctxt->nsNr++] = prefix;
875 ctxt->nsTab[ctxt->nsNr++] = URL;
876 return (ctxt->nsNr);
877}
878/**
879 * nsPop:
880 * @ctxt: an XML parser context
881 * @nr: the number to pop
882 *
883 * Pops the top @nr parser prefix/namespace from the ns stack
884 *
885 * Returns the number of namespaces removed
886 */
887static int
888nsPop(xmlParserCtxtPtr ctxt, int nr)
889{
890 int i;
891
892 if (ctxt->nsTab == NULL) return(0);
893 if (ctxt->nsNr < nr) {
894 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
895 nr = ctxt->nsNr;
896 }
897 if (ctxt->nsNr <= 0)
898 return (0);
899
900 for (i = 0;i < nr;i++) {
901 ctxt->nsNr--;
902 ctxt->nsTab[ctxt->nsNr] = NULL;
903 }
904 return(nr);
905}
906#endif
907
908static int
909xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
910 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000912 int maxatts;
913
914 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000916 atts = (const xmlChar **)
917 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000919 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
921 if (attallocs == NULL) goto mem_error;
922 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000923 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 } else if (nr + 5 > ctxt->maxatts) {
925 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000926 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
927 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
931 (maxatts / 5) * sizeof(int));
932 if (attallocs == NULL) goto mem_error;
933 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000934 ctxt->maxatts = maxatts;
935 }
936 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000939 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000940}
941
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * inputPush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000951int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
953{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000954 if ((ctxt == NULL) || (value == NULL))
955 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956 if (ctxt->inputNr >= ctxt->inputMax) {
957 ctxt->inputMax *= 2;
958 ctxt->inputTab =
959 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
960 ctxt->inputMax *
961 sizeof(ctxt->inputTab[0]));
962 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000963 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 return (0);
965 }
966 }
967 ctxt->inputTab[ctxt->inputNr] = value;
968 ctxt->input = value;
969 return (ctxt->inputNr++);
970}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000973 * @ctxt: an XML parser context
974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000978 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000979xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000980inputPop(xmlParserCtxtPtr ctxt)
981{
982 xmlParserInputPtr ret;
983
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000984 if (ctxt == NULL)
985 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +0000987 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000988 ctxt->inputNr--;
989 if (ctxt->inputNr > 0)
990 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
991 else
992 ctxt->input = NULL;
993 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +0000994 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000995 return (ret);
996}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000997/**
998 * nodePush:
999 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001000 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001001 *
1002 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001003 *
1004 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001005 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001006int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1008{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001009 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001011 xmlNodePtr *tmp;
1012
1013 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1014 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001017 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 return (0);
1019 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001020 ctxt->nodeTab = tmp;
1021 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001022 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001025 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1026 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001027 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 return(0);
1029 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030 ctxt->nodeTab[ctxt->nodeNr] = value;
1031 ctxt->node = value;
1032 return (ctxt->nodeNr++);
1033}
1034/**
1035 * nodePop:
1036 * @ctxt: an XML parser context
1037 *
1038 * Pops the top element node from the node stack
1039 *
1040 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001041 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001042xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001043nodePop(xmlParserCtxtPtr ctxt)
1044{
1045 xmlNodePtr ret;
1046
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001049 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050 ctxt->nodeNr--;
1051 if (ctxt->nodeNr > 0)
1052 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1053 else
1054 ctxt->node = NULL;
1055 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001056 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001057 return (ret);
1058}
Daniel Veillarda2351322004-06-27 12:08:10 +00001059
1060#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001061/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * nameNsPush:
1063 * @ctxt: an XML parser context
1064 * @value: the element name
1065 * @prefix: the element prefix
1066 * @URI: the element namespace name
1067 *
1068 * Pushes a new element name/prefix/URL on top of the name stack
1069 *
1070 * Returns -1 in case of error, the index in the stack otherwise
1071 */
1072static int
1073nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1074 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1075{
1076 if (ctxt->nameNr >= ctxt->nameMax) {
1077 const xmlChar * *tmp;
1078 void **tmp2;
1079 ctxt->nameMax *= 2;
1080 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1081 ctxt->nameMax *
1082 sizeof(ctxt->nameTab[0]));
1083 if (tmp == NULL) {
1084 ctxt->nameMax /= 2;
1085 goto mem_error;
1086 }
1087 ctxt->nameTab = tmp;
1088 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1089 ctxt->nameMax * 3 *
1090 sizeof(ctxt->pushTab[0]));
1091 if (tmp2 == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
1094 }
1095 ctxt->pushTab = tmp2;
1096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1100 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001101 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (ctxt->nameNr++);
1103mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001104 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 return (-1);
1106}
1107/**
1108 * nameNsPop:
1109 * @ctxt: an XML parser context
1110 *
1111 * Pops the top element/prefix/URI name from the name stack
1112 *
1113 * Returns the name just removed
1114 */
1115static const xmlChar *
1116nameNsPop(xmlParserCtxtPtr ctxt)
1117{
1118 const xmlChar *ret;
1119
1120 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001121 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 ctxt->nameNr--;
1123 if (ctxt->nameNr > 0)
1124 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1125 else
1126 ctxt->name = NULL;
1127 ret = ctxt->nameTab[ctxt->nameNr];
1128 ctxt->nameTab[ctxt->nameNr] = NULL;
1129 return (ret);
1130}
Daniel Veillarda2351322004-06-27 12:08:10 +00001131#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132
1133/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 * namePush:
1135 * @ctxt: an XML parser context
1136 * @value: the element name
1137 *
1138 * Pushes a new element name on top of the name stack
1139 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001143namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001145 if (ctxt == NULL) return (-1);
1146
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001151 ctxt->nameMax *
1152 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 if (tmp == NULL) {
1154 ctxt->nameMax /= 2;
1155 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158 }
1159 ctxt->nameTab[ctxt->nameNr] = value;
1160 ctxt->name = value;
1161 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001163 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001165}
1166/**
1167 * namePop:
1168 * @ctxt: an XML parser context
1169 *
1170 * Pops the top element name from the name stack
1171 *
1172 * Returns the name just removed
1173 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001174const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001175namePop(xmlParserCtxtPtr ctxt)
1176{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001177 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001179 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1180 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 ctxt->nameNr--;
1182 if (ctxt->nameNr > 0)
1183 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1184 else
1185 ctxt->name = NULL;
1186 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001187 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001188 return (ret);
1189}
Owen Taylor3473f882001-02-23 17:55:21 +00001190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 if (ctxt->spaceNr >= ctxt->spaceMax) {
1193 ctxt->spaceMax *= 2;
1194 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1195 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1196 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 return(0);
1199 }
1200 }
1201 ctxt->spaceTab[ctxt->spaceNr] = val;
1202 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1203 return(ctxt->spaceNr++);
1204}
1205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 int ret;
1208 if (ctxt->spaceNr <= 0) return(0);
1209 ctxt->spaceNr--;
1210 if (ctxt->spaceNr > 0)
1211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1212 else
1213 ctxt->space = NULL;
1214 ret = ctxt->spaceTab[ctxt->spaceNr];
1215 ctxt->spaceTab[ctxt->spaceNr] = -1;
1216 return(ret);
1217}
1218
1219/*
1220 * Macros for accessing the content. Those should be used only by the parser,
1221 * and not exported.
1222 *
1223 * Dirty macros, i.e. one often need to make assumption on the context to
1224 * use them
1225 *
1226 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1227 * To be used with extreme caution since operations consuming
1228 * characters may move the input buffer to a different location !
1229 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1230 * This should be used internally by the parser
1231 * only to compare to ASCII values otherwise it would break when
1232 * running with UTF-8 encoding.
1233 * RAW same as CUR but in the input buffer, bypass any token
1234 * extraction that may have been done
1235 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1236 * to compare on ASCII based substring.
1237 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001238 * strings without newlines within the parser.
1239 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1240 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001241 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1242 *
1243 * NEXT Skip to the next character, this does the proper decoding
1244 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * CUR_CHAR(l) returns the current unicode character (int), set l
1247 * to the number of xmlChars used for the encoding [0-5].
1248 * CUR_SCHAR same but operate on a string instead of the context
1249 * COPY_BUF copy the current unicode char to the target buffer, increment
1250 * the index
1251 * GROW, SHRINK handling of input buffers
1252 */
1253
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254#define RAW (*ctxt->input->cur)
1255#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001256#define NXT(val) ctxt->input->cur[(val)]
1257#define CUR_PTR ctxt->input->cur
1258
Daniel Veillarda07050d2003-10-19 14:46:32 +00001259#define CMP4( s, c1, c2, c3, c4 ) \
1260 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1261 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1262#define CMP5( s, c1, c2, c3, c4, c5 ) \
1263 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1264#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1265 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1266#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1267 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1268#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1269 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1270#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1271 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1272 ((unsigned char *) s)[ 8 ] == c9 )
1273#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1274 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1275 ((unsigned char *) s)[ 9 ] == c10 )
1276
Owen Taylor3473f882001-02-23 17:55:21 +00001277#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001278 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001280 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillard0b787f32004-03-26 17:29:53 +00001285#define SKIPL(val) do { \
1286 int skipl; \
1287 for(skipl=0; skipl<val; skipl++) { \
1288 if (*(ctxt->input->cur) == '\n') { \
1289 ctxt->input->line++; ctxt->input->col = 1; \
1290 } else ctxt->input->col++; \
1291 ctxt->nbChars++; \
1292 ctxt->input->cur++; \
1293 } \
1294 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1295 if ((*ctxt->input->cur == 0) && \
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1297 xmlPopInput(ctxt); \
1298 } while (0)
1299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001301 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1302 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001303 xmlSHRINK (ctxt);
1304
1305static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1306 xmlParserInputShrink(ctxt->input);
1307 if ((*ctxt->input->cur == 0) &&
1308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311
Daniel Veillarda880b122003-04-21 21:36:41 +00001312#define GROW if ((ctxt->progressive == 0) && \
1313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001314 xmlGROW (ctxt);
1315
1316static void xmlGROW (xmlParserCtxtPtr ctxt) {
1317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1318 if ((*ctxt->input->cur == 0) &&
1319 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1320 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001321}
Owen Taylor3473f882001-02-23 17:55:21 +00001322
1323#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1324
1325#define NEXT xmlNextChar(ctxt)
1326
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001328 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001329 ctxt->input->cur++; \
1330 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001331 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1333 }
1334
Owen Taylor3473f882001-02-23 17:55:21 +00001335#define NEXTL(l) do { \
1336 if (*(ctxt->input->cur) == '\n') { \
1337 ctxt->input->line++; ctxt->input->col = 1; \
1338 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001339 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001340 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 } while (0)
1342
1343#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1344#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1345
1346#define COPY_BUF(l,b,i,v) \
1347 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350/**
1351 * xmlSkipBlankChars:
1352 * @ctxt: the XML parser context
1353 *
1354 * skip all blanks character found at that point in the input streams.
1355 * It pops up finished entities in the process if allowable at that point.
1356 *
1357 * Returns the number of space chars skipped
1358 */
1359
1360int
1361xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001362 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * It's Okay to use CUR/NEXT here since all the blanks are on
1366 * the ASCII range.
1367 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001368 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1369 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001370 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001374 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001375 if (*cur == '\n') {
1376 ctxt->input->line++; ctxt->input->col = 1;
1377 }
1378 cur++;
1379 res++;
1380 if (*cur == 0) {
1381 ctxt->input->cur = cur;
1382 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1383 cur = ctxt->input->cur;
1384 }
1385 }
1386 ctxt->input->cur = cur;
1387 } else {
1388 int cur;
1389 do {
1390 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001391 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001392 NEXT;
1393 cur = CUR;
1394 res++;
1395 }
1396 while ((cur == 0) && (ctxt->inputNr > 1) &&
1397 (ctxt->instate != XML_PARSER_COMMENT)) {
1398 xmlPopInput(ctxt);
1399 cur = CUR;
1400 }
1401 /*
1402 * Need to handle support of entities branching here
1403 */
1404 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1405 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1406 }
Owen Taylor3473f882001-02-23 17:55:21 +00001407 return(res);
1408}
1409
1410/************************************************************************
1411 * *
1412 * Commodity functions to handle entities *
1413 * *
1414 ************************************************************************/
1415
1416/**
1417 * xmlPopInput:
1418 * @ctxt: an XML parser context
1419 *
1420 * xmlPopInput: the current input pointed by ctxt->input came to an end
1421 * pop it and return the next char.
1422 *
1423 * Returns the current xmlChar in the parser context
1424 */
1425xmlChar
1426xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001427 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 if (xmlParserDebugEntities)
1429 xmlGenericError(xmlGenericErrorContext,
1430 "Popping input %d\n", ctxt->inputNr);
1431 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001432 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001433 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1434 return(xmlPopInput(ctxt));
1435 return(CUR);
1436}
1437
1438/**
1439 * xmlPushInput:
1440 * @ctxt: an XML parser context
1441 * @input: an XML parser input fragment (entity, XML fragment ...).
1442 *
1443 * xmlPushInput: switch to a new input stream which is stacked on top
1444 * of the previous one(s).
1445 */
1446void
1447xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1448 if (input == NULL) return;
1449
1450 if (xmlParserDebugEntities) {
1451 if ((ctxt->input != NULL) && (ctxt->input->filename))
1452 xmlGenericError(xmlGenericErrorContext,
1453 "%s(%d): ", ctxt->input->filename,
1454 ctxt->input->line);
1455 xmlGenericError(xmlGenericErrorContext,
1456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1457 }
1458 inputPush(ctxt, input);
1459 GROW;
1460}
1461
1462/**
1463 * xmlParseCharRef:
1464 * @ctxt: an XML parser context
1465 *
1466 * parse Reference declarations
1467 *
1468 * [66] CharRef ::= '&#' [0-9]+ ';' |
1469 * '&#x' [0-9a-fA-F]+ ';'
1470 *
1471 * [ WFC: Legal Character ]
1472 * Characters referred to using character references must match the
1473 * production for Char.
1474 *
1475 * Returns the value parsed (as an int), 0 in case of error
1476 */
1477int
1478xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001479 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001481 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1485 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (NXT(2) == 'x')) {
1488 SKIP(3);
1489 GROW;
1490 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001491 if (count++ > 20) {
1492 count = 0;
1493 GROW;
1494 }
1495 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001496 val = val * 16 + (CUR - '0');
1497 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1498 val = val * 16 + (CUR - 'a') + 10;
1499 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1500 val = val * 16 + (CUR - 'A') + 10;
1501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 val = 0;
1504 break;
1505 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001506 if (val > 0x10FFFF)
1507 outofrange = val;
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509 NEXT;
1510 count++;
1511 }
1512 if (RAW == ';') {
1513 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001514 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 ctxt->nbChars ++;
1516 ctxt->input->cur++;
1517 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 SKIP(2);
1520 GROW;
1521 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001522 if (count++ > 20) {
1523 count = 0;
1524 GROW;
1525 }
1526 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = val * 10 + (CUR - '0');
1528 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001529 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 val = 0;
1531 break;
1532 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001533 if (val > 0x10FFFF)
1534 outofrange = val;
1535
Owen Taylor3473f882001-02-23 17:55:21 +00001536 NEXT;
1537 count++;
1538 }
1539 if (RAW == ';') {
1540 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001541 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 ctxt->nbChars ++;
1543 ctxt->input->cur++;
1544 }
1545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001547 }
1548
1549 /*
1550 * [ WFC: Legal Character ]
1551 * Characters referred to using character references must match the
1552 * production for Char.
1553 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001554 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(val);
1556 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001557 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1558 "xmlParseCharRef: invalid xmlChar value %d\n",
1559 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001560 }
1561 return(0);
1562}
1563
1564/**
1565 * xmlParseStringCharRef:
1566 * @ctxt: an XML parser context
1567 * @str: a pointer to an index in the string
1568 *
1569 * parse Reference declarations, variant parsing from a string rather
1570 * than an an input flow.
1571 *
1572 * [66] CharRef ::= '&#' [0-9]+ ';' |
1573 * '&#x' [0-9a-fA-F]+ ';'
1574 *
1575 * [ WFC: Legal Character ]
1576 * Characters referred to using character references must match the
1577 * production for Char.
1578 *
1579 * Returns the value parsed (as an int), 0 in case of error, str will be
1580 * updated to the current value of the index
1581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001582static int
Owen Taylor3473f882001-02-23 17:55:21 +00001583xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1584 const xmlChar *ptr;
1585 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001586 unsigned int val = 0;
1587 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001588
1589 if ((str == NULL) || (*str == NULL)) return(0);
1590 ptr = *str;
1591 cur = *ptr;
1592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1593 ptr += 3;
1594 cur = *ptr;
1595 while (cur != ';') { /* Non input consuming loop */
1596 if ((cur >= '0') && (cur <= '9'))
1597 val = val * 16 + (cur - '0');
1598 else if ((cur >= 'a') && (cur <= 'f'))
1599 val = val * 16 + (cur - 'a') + 10;
1600 else if ((cur >= 'A') && (cur <= 'F'))
1601 val = val * 16 + (cur - 'A') + 10;
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else if ((cur == '&') && (ptr[1] == '#')){
1616 ptr += 2;
1617 cur = *ptr;
1618 while (cur != ';') { /* Non input consuming loops */
1619 if ((cur >= '0') && (cur <= '9'))
1620 val = val * 10 + (cur - '0');
1621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001622 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 val = 0;
1624 break;
1625 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if (val > 0x10FFFF)
1627 outofrange = val;
1628
Owen Taylor3473f882001-02-23 17:55:21 +00001629 ptr++;
1630 cur = *ptr;
1631 }
1632 if (cur == ';')
1633 ptr++;
1634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001635 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 return(0);
1637 }
1638 *str = ptr;
1639
1640 /*
1641 * [ WFC: Legal Character ]
1642 * Characters referred to using character references must match the
1643 * production for Char.
1644 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001645 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return(val);
1647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001648 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1649 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1650 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 }
1652 return(0);
1653}
1654
1655/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001656 * xmlNewBlanksWrapperInputStream:
1657 * @ctxt: an XML parser context
1658 * @entity: an Entity pointer
1659 *
1660 * Create a new input stream for wrapping
1661 * blanks around a PEReference
1662 *
1663 * Returns the new input stream or NULL
1664 */
1665
1666static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1667
Daniel Veillardf4862f02002-09-10 11:13:43 +00001668static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001669xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1670 xmlParserInputPtr input;
1671 xmlChar *buffer;
1672 size_t length;
1673 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1675 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001676 return(NULL);
1677 }
1678 if (xmlParserDebugEntities)
1679 xmlGenericError(xmlGenericErrorContext,
1680 "new blanks wrapper for entity: %s\n", entity->name);
1681 input = xmlNewInputStream(ctxt);
1682 if (input == NULL) {
1683 return(NULL);
1684 }
1685 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001686 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001689 return(NULL);
1690 }
1691 buffer [0] = ' ';
1692 buffer [1] = '%';
1693 buffer [length-3] = ';';
1694 buffer [length-2] = ' ';
1695 buffer [length-1] = 0;
1696 memcpy(buffer + 2, entity->name, length - 5);
1697 input->free = deallocblankswrapper;
1698 input->base = buffer;
1699 input->cur = buffer;
1700 input->length = length;
1701 input->end = &buffer[length];
1702 return(input);
1703}
1704
1705/**
Owen Taylor3473f882001-02-23 17:55:21 +00001706 * xmlParserHandlePEReference:
1707 * @ctxt: the parser context
1708 *
1709 * [69] PEReference ::= '%' Name ';'
1710 *
1711 * [ WFC: No Recursion ]
1712 * A parsed entity must not contain a recursive
1713 * reference to itself, either directly or indirectly.
1714 *
1715 * [ WFC: Entity Declared ]
1716 * In a document without any DTD, a document with only an internal DTD
1717 * subset which contains no parameter entity references, or a document
1718 * with "standalone='yes'", ... ... The declaration of a parameter
1719 * entity must precede any reference to it...
1720 *
1721 * [ VC: Entity Declared ]
1722 * In a document with an external subset or external parameter entities
1723 * with "standalone='no'", ... ... The declaration of a parameter entity
1724 * must precede any reference to it...
1725 *
1726 * [ WFC: In DTD ]
1727 * Parameter-entity references may only appear in the DTD.
1728 * NOTE: misleading but this is handled.
1729 *
1730 * A PEReference may have been detected in the current input stream
1731 * the handling is done accordingly to
1732 * http://www.w3.org/TR/REC-xml#entproc
1733 * i.e.
1734 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001735 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001736 */
1737void
1738xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlEntityPtr entity = NULL;
1741 xmlParserInputPtr input;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (RAW != '%') return;
1744 switch(ctxt->instate) {
1745 case XML_PARSER_CDATA_SECTION:
1746 return;
1747 case XML_PARSER_COMMENT:
1748 return;
1749 case XML_PARSER_START_TAG:
1750 return;
1751 case XML_PARSER_END_TAG:
1752 return;
1753 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001754 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001755 return;
1756 case XML_PARSER_PROLOG:
1757 case XML_PARSER_START:
1758 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001759 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001760 return;
1761 case XML_PARSER_ENTITY_DECL:
1762 case XML_PARSER_CONTENT:
1763 case XML_PARSER_ATTRIBUTE_VALUE:
1764 case XML_PARSER_PI:
1765 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001766 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /* we just ignore it there */
1768 return;
1769 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001770 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001771 return;
1772 case XML_PARSER_ENTITY_VALUE:
1773 /*
1774 * NOTE: in the case of entity values, we don't do the
1775 * substitution here since we need the literal
1776 * entity value to be able to save the internal
1777 * subset of the document.
1778 * This will be handled by xmlStringDecodeEntities
1779 */
1780 return;
1781 case XML_PARSER_DTD:
1782 /*
1783 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1784 * In the internal DTD subset, parameter-entity references
1785 * can occur only where markup declarations can occur, not
1786 * within markup declarations.
1787 * In that case this is handled in xmlParseMarkupDecl
1788 */
1789 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1790 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001791 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 break;
1794 case XML_PARSER_IGNORE:
1795 return;
1796 }
1797
1798 NEXT;
1799 name = xmlParseName(ctxt);
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001804 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 } else {
1806 if (RAW == ';') {
1807 NEXT;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1809 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1810 if (entity == NULL) {
1811
1812 /*
1813 * [ WFC: Entity Declared ]
1814 * In a document without any DTD, a document with only an
1815 * internal DTD subset which contains no parameter entity
1816 * references, or a document with "standalone='yes'", ...
1817 * ... The declaration of a parameter entity must precede
1818 * any reference to it...
1819 */
1820 if ((ctxt->standalone == 1) ||
1821 ((ctxt->hasExternalSubset == 0) &&
1822 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001823 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001824 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 } else {
1826 /*
1827 * [ VC: Entity Declared ]
1828 * In a document with an external subset or external
1829 * parameter entities with "standalone='no'", ...
1830 * ... The declaration of a parameter entity must precede
1831 * any reference to it...
1832 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001833 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1834 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1835 "PEReference: %%%s; not found\n",
1836 name);
1837 } else
1838 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1839 "PEReference: %%%s; not found\n",
1840 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 ctxt->valid = 0;
1842 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001843 } else if (ctxt->input->free != deallocblankswrapper) {
1844 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1845 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
1847 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1848 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 xmlChar start[4];
1850 xmlCharEncoding enc;
1851
Owen Taylor3473f882001-02-23 17:55:21 +00001852 /*
1853 * handle the extra spaces added before and after
1854 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 */
1857 input = xmlNewEntityInputStream(ctxt, entity);
1858 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001859
1860 /*
1861 * Get the 4 first bytes and decode the charset
1862 * if enc != XML_CHAR_ENCODING_NONE
1863 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001864 * Note that, since we may have some non-UTF8
1865 * encoding (like UTF16, bug 135229), the 'length'
1866 * is not known, but we can calculate based upon
1867 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001868 */
1869 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001870 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001871 start[0] = RAW;
1872 start[1] = NXT(1);
1873 start[2] = NXT(2);
1874 start[3] = NXT(3);
1875 enc = xmlDetectCharEncoding(start, 4);
1876 if (enc != XML_CHAR_ENCODING_NONE) {
1877 xmlSwitchEncoding(ctxt, enc);
1878 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001879 }
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001882 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1883 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlParseTextDecl(ctxt);
1885 }
Owen Taylor3473f882001-02-23 17:55:21 +00001886 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1888 "PEReference: %s is not a parameter entity\n",
1889 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 }
1892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001893 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
1896}
1897
1898/*
1899 * Macro used to grow the current buffer.
1900 */
1901#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001906 if (tmp == NULL) goto mem_error; \
1907 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001908}
1909
1910/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001911 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * @ctxt: the parser context
1913 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1916 * @end: an end marker xmlChar, 0 if none
1917 * @end2: an end marker xmlChar, 0 if none
1918 * @end3: an end marker xmlChar, 0 if none
1919 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001920 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001921 *
1922 * [67] Reference ::= EntityRef | CharRef
1923 *
1924 * [69] PEReference ::= '%' Name ';'
1925 *
1926 * Returns A newly allocated string with the substitution done. The caller
1927 * must deallocate it !
1928 */
1929xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001930xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1931 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 xmlChar *buffer = NULL;
1933 int buffer_size = 0;
1934
1935 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001936 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 xmlEntityPtr ent;
1938 int c,l;
1939 int nbchars = 0;
1940
Daniel Veillarda82b1822004-11-08 16:24:57 +00001941 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001942 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 return(NULL);
1948 }
1949
1950 /*
1951 * allocate a translation buffer.
1952 */
1953 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001954 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001955 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001956
1957 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001958 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001959 * we are operating on already parsed values.
1960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 while ((c != 0) && (c != end) && /* non input consuming loop */
1966 (c != end2) && (c != end3)) {
1967
1968 if (c == 0) break;
1969 if ((c == '&') && (str[1] == '#')) {
1970 int val = xmlParseStringCharRef(ctxt, &str);
1971 if (val != 0) {
1972 COPY_BUF(0,buffer,nbchars,val);
1973 }
1974 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1975 if (xmlParserDebugEntities)
1976 xmlGenericError(xmlGenericErrorContext,
1977 "String decoding Entity Reference: %.30s\n",
1978 str);
1979 ent = xmlParseStringEntityRef(ctxt, &str);
1980 if ((ent != NULL) &&
1981 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1982 if (ent->content != NULL) {
1983 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001985 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1986 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 }
1988 } else if ((ent != NULL) && (ent->content != NULL)) {
1989 xmlChar *rep;
1990
1991 ctxt->depth++;
1992 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1993 0, 0, 0);
1994 ctxt->depth--;
1995 if (rep != NULL) {
1996 current = rep;
1997 while (*current != 0) { /* non input consuming loop */
1998 buffer[nbchars++] = *current++;
1999 if (nbchars >
2000 buffer_size - XML_PARSER_BUFFER_SIZE) {
2001 growBuffer(buffer);
2002 }
2003 }
2004 xmlFree(rep);
2005 }
2006 } else if (ent != NULL) {
2007 int i = xmlStrlen(ent->name);
2008 const xmlChar *cur = ent->name;
2009
2010 buffer[nbchars++] = '&';
2011 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2012 growBuffer(buffer);
2013 }
2014 for (;i > 0;i--)
2015 buffer[nbchars++] = *cur++;
2016 buffer[nbchars++] = ';';
2017 }
2018 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2019 if (xmlParserDebugEntities)
2020 xmlGenericError(xmlGenericErrorContext,
2021 "String decoding PE Reference: %.30s\n", str);
2022 ent = xmlParseStringPEReference(ctxt, &str);
2023 if (ent != NULL) {
2024 xmlChar *rep;
2025
2026 ctxt->depth++;
2027 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2028 0, 0, 0);
2029 ctxt->depth--;
2030 if (rep != NULL) {
2031 current = rep;
2032 while (*current != 0) { /* non input consuming loop */
2033 buffer[nbchars++] = *current++;
2034 if (nbchars >
2035 buffer_size - XML_PARSER_BUFFER_SIZE) {
2036 growBuffer(buffer);
2037 }
2038 }
2039 xmlFree(rep);
2040 }
2041 }
2042 } else {
2043 COPY_BUF(l,buffer,nbchars,c);
2044 str += l;
2045 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2046 growBuffer(buffer);
2047 }
2048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002049 if (str < last)
2050 c = CUR_SCHAR(str, l);
2051 else
2052 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053 }
2054 buffer[nbchars++] = 0;
2055 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002056
2057mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002058 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002059 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002060}
2061
Daniel Veillarde57ec792003-09-10 10:50:59 +00002062/**
2063 * xmlStringDecodeEntities:
2064 * @ctxt: the parser context
2065 * @str: the input string
2066 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2067 * @end: an end marker xmlChar, 0 if none
2068 * @end2: an end marker xmlChar, 0 if none
2069 * @end3: an end marker xmlChar, 0 if none
2070 *
2071 * Takes a entity string content and process to do the adequate substitutions.
2072 *
2073 * [67] Reference ::= EntityRef | CharRef
2074 *
2075 * [69] PEReference ::= '%' Name ';'
2076 *
2077 * Returns A newly allocated string with the substitution done. The caller
2078 * must deallocate it !
2079 */
2080xmlChar *
2081xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2082 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002083 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002084 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2085 end, end2, end3));
2086}
Owen Taylor3473f882001-02-23 17:55:21 +00002087
2088/************************************************************************
2089 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002090 * Commodity functions, cleanup needed ? *
2091 * *
2092 ************************************************************************/
2093
2094/**
2095 * areBlanks:
2096 * @ctxt: an XML parser context
2097 * @str: a xmlChar *
2098 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002099 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002100 *
2101 * Is this a sequence of blank chars that one can ignore ?
2102 *
2103 * Returns 1 if ignorable 0 otherwise.
2104 */
2105
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002106static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2107 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002108 int i, ret;
2109 xmlNodePtr lastChild;
2110
Daniel Veillard05c13a22001-09-09 08:38:09 +00002111 /*
2112 * Don't spend time trying to differentiate them, the same callback is
2113 * used !
2114 */
2115 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002116 return(0);
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118 /*
2119 * Check for xml:space value.
2120 */
2121 if (*(ctxt->space) == 1)
2122 return(0);
2123
2124 /*
2125 * Check that the string is made of blanks
2126 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002127 if (blank_chars == 0) {
2128 for (i = 0;i < len;i++)
2129 if (!(IS_BLANK_CH(str[i]))) return(0);
2130 }
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002133 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002134 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002135 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (ctxt->myDoc != NULL) {
2137 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2138 if (ret == 0) return(1);
2139 if (ret == 1) return(0);
2140 }
2141
2142 /*
2143 * Otherwise, heuristic :-\
2144 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002145 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if ((ctxt->node->children == NULL) &&
2147 (RAW == '<') && (NXT(1) == '/')) return(0);
2148
2149 lastChild = xmlGetLastChild(ctxt->node);
2150 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002151 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2152 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 } else if (xmlNodeIsText(lastChild))
2154 return(0);
2155 else if ((ctxt->node->children != NULL) &&
2156 (xmlNodeIsText(ctxt->node->children)))
2157 return(0);
2158 return(1);
2159}
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161/************************************************************************
2162 * *
2163 * Extra stuff for namespace support *
2164 * Relates to http://www.w3.org/TR/WD-xml-names *
2165 * *
2166 ************************************************************************/
2167
2168/**
2169 * xmlSplitQName:
2170 * @ctxt: an XML parser context
2171 * @name: an XML parser context
2172 * @prefix: a xmlChar **
2173 *
2174 * parse an UTF8 encoded XML qualified name string
2175 *
2176 * [NS 5] QName ::= (Prefix ':')? LocalPart
2177 *
2178 * [NS 6] Prefix ::= NCName
2179 *
2180 * [NS 7] LocalPart ::= NCName
2181 *
2182 * Returns the local part, and prefix is updated
2183 * to get the Prefix if any.
2184 */
2185
2186xmlChar *
2187xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2188 xmlChar buf[XML_MAX_NAMELEN + 5];
2189 xmlChar *buffer = NULL;
2190 int len = 0;
2191 int max = XML_MAX_NAMELEN;
2192 xmlChar *ret = NULL;
2193 const xmlChar *cur = name;
2194 int c;
2195
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002196 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002197 *prefix = NULL;
2198
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002199 if (cur == NULL) return(NULL);
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201#ifndef XML_XML_NAMESPACE
2202 /* xml: prefix is not really a namespace */
2203 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2204 (cur[2] == 'l') && (cur[3] == ':'))
2205 return(xmlStrdup(name));
2206#endif
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (cur[0] == ':')
2210 return(xmlStrdup(name));
2211
2212 c = *cur++;
2213 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2214 buf[len++] = c;
2215 c = *cur++;
2216 }
2217 if (len >= max) {
2218 /*
2219 * Okay someone managed to make a huge name, so he's ready to pay
2220 * for the processing speed.
2221 */
2222 max = len * 2;
2223
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002224 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002225 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229 memcpy(buffer, buf, len);
2230 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2231 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 xmlChar *tmp;
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002236 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002237 if (tmp == NULL) {
2238 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 return(NULL);
2241 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002242 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002243 }
2244 buffer[len++] = c;
2245 c = *cur++;
2246 }
2247 buffer[len] = 0;
2248 }
2249
Daniel Veillard597bc482003-07-24 16:08:28 +00002250 /* nasty but well=formed
2251 if ((c == ':') && (*cur == 0)) {
2252 return(xmlStrdup(name));
2253 } */
2254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 if (buffer == NULL)
2256 ret = xmlStrndup(buf, len);
2257 else {
2258 ret = buffer;
2259 buffer = NULL;
2260 max = XML_MAX_NAMELEN;
2261 }
2262
2263
2264 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002268 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 len = 0;
2271
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 /*
2273 * Check that the first character is proper to start
2274 * a new name
2275 */
2276 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2277 ((c >= 0x41) && (c <= 0x5A)) ||
2278 (c == '_') || (c == ':'))) {
2279 int l;
2280 int first = CUR_SCHAR(cur, l);
2281
2282 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002285 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002286 }
2287 }
2288 cur++;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2291 buf[len++] = c;
2292 c = *cur++;
2293 }
2294 if (len >= max) {
2295 /*
2296 * Okay someone managed to make a huge name, so he's ready to pay
2297 * for the processing speed.
2298 */
2299 max = len * 2;
2300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002301 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 return(NULL);
2305 }
2306 memcpy(buffer, buf, len);
2307 while (c != 0) { /* tested bigname2.xml */
2308 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 xmlChar *tmp;
2310
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002313 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002316 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 return(NULL);
2318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002319 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321 buffer[len++] = c;
2322 c = *cur++;
2323 }
2324 buffer[len] = 0;
2325 }
2326
2327 if (buffer == NULL)
2328 ret = xmlStrndup(buf, len);
2329 else {
2330 ret = buffer;
2331 }
2332 }
2333
2334 return(ret);
2335}
2336
2337/************************************************************************
2338 * *
2339 * The parser itself *
2340 * Relates to http://www.w3.org/TR/REC-xml *
2341 * *
2342 ************************************************************************/
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002346 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002347
Owen Taylor3473f882001-02-23 17:55:21 +00002348/**
2349 * xmlParseName:
2350 * @ctxt: an XML parser context
2351 *
2352 * parse an XML name.
2353 *
2354 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2355 * CombiningChar | Extender
2356 *
2357 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2358 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002359 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * Returns the Name parsed or NULL
2362 */
2363
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002365xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002366 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 int count = 0;
2369
2370 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371
2372 /*
2373 * Accelerator for simple ASCII names
2374 */
2375 in = ctxt->input->cur;
2376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2377 ((*in >= 0x41) && (*in <= 0x5A)) ||
2378 (*in == '_') || (*in == ':')) {
2379 in++;
2380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2381 ((*in >= 0x41) && (*in <= 0x5A)) ||
2382 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002383 (*in == '_') || (*in == '-') ||
2384 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002386 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002390 ctxt->nbChars += count;
2391 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002392 if (ret == NULL)
2393 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002394 return(ret);
2395 }
2396 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002399
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400/**
2401 * xmlParseNameAndCompare:
2402 * @ctxt: an XML parser context
2403 *
2404 * parse an XML name and compares for match
2405 * (specialized for endtag parsing)
2406 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002407 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2408 * and the name for mismatch
2409 */
2410
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002411static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002412xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002413 register const xmlChar *cmp = other;
2414 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002416
2417 GROW;
2418
2419 in = ctxt->input->cur;
2420 while (*in != 0 && *in == *cmp) {
2421 ++in;
2422 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002423 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002470 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002471}
2472
2473/**
2474 * xmlParseStringName:
2475 * @ctxt: an XML parser context
2476 * @str: a pointer to the string pointer (IN/OUT)
2477 *
2478 * parse an XML name.
2479 *
2480 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2481 * CombiningChar | Extender
2482 *
2483 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2484 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002485 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002486 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002487 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * is updated to the current location in the string.
2489 */
2490
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002491static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002492xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2493 xmlChar buf[XML_MAX_NAMELEN + 5];
2494 const xmlChar *cur = *str;
2495 int len = 0, l;
2496 int c;
2497
2498 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002499 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002500 (c != ':')) {
2501 return(NULL);
2502 }
2503
William M. Brack871611b2003-10-18 04:53:14 +00002504 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002505 (c == '.') || (c == '-') ||
2506 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002507 (IS_COMBINING(c)) ||
2508 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002509 COPY_BUF(l,buf,len,c);
2510 cur += l;
2511 c = CUR_SCHAR(cur, l);
2512 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2513 /*
2514 * Okay someone managed to make a huge name, so he's ready to pay
2515 * for the processing speed.
2516 */
2517 xmlChar *buffer;
2518 int max = len * 2;
2519
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002520 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002522 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 return(NULL);
2524 }
2525 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002526 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002527 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002528 (c == '.') || (c == '-') ||
2529 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002530 (IS_COMBINING(c)) ||
2531 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002533 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002534 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002538 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002540 return(NULL);
2541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002542 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002543 }
2544 COPY_BUF(l,buffer,len,c);
2545 cur += l;
2546 c = CUR_SCHAR(cur, l);
2547 }
2548 buffer[len] = 0;
2549 *str = cur;
2550 return(buffer);
2551 }
2552 }
2553 *str = cur;
2554 return(xmlStrndup(buf, len));
2555}
2556
2557/**
2558 * xmlParseNmtoken:
2559 * @ctxt: an XML parser context
2560 *
2561 * parse an XML Nmtoken.
2562 *
2563 * [7] Nmtoken ::= (NameChar)+
2564 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002565 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002566 *
2567 * Returns the Nmtoken parsed or NULL
2568 */
2569
2570xmlChar *
2571xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2572 xmlChar buf[XML_MAX_NAMELEN + 5];
2573 int len = 0, l;
2574 int c;
2575 int count = 0;
2576
2577 GROW;
2578 c = CUR_CHAR(l);
2579
William M. Brack871611b2003-10-18 04:53:14 +00002580 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002581 (c == '.') || (c == '-') ||
2582 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002583 (IS_COMBINING(c)) ||
2584 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002585 if (count++ > 100) {
2586 count = 0;
2587 GROW;
2588 }
2589 COPY_BUF(l,buf,len,c);
2590 NEXTL(l);
2591 c = CUR_CHAR(l);
2592 if (len >= XML_MAX_NAMELEN) {
2593 /*
2594 * Okay someone managed to make a huge token, so he's ready to pay
2595 * for the processing speed.
2596 */
2597 xmlChar *buffer;
2598 int max = len * 2;
2599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002606 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002607 (c == '.') || (c == '-') ||
2608 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002609 (IS_COMBINING(c)) ||
2610 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if (count++ > 100) {
2612 count = 0;
2613 GROW;
2614 }
2615 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002616 xmlChar *tmp;
2617
Owen Taylor3473f882001-02-23 17:55:21 +00002618 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002619 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 return(NULL);
2625 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002626 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 COPY_BUF(l,buffer,len,c);
2629 NEXTL(l);
2630 c = CUR_CHAR(l);
2631 }
2632 buffer[len] = 0;
2633 return(buffer);
2634 }
2635 }
2636 if (len == 0)
2637 return(NULL);
2638 return(xmlStrndup(buf, len));
2639}
2640
2641/**
2642 * xmlParseEntityValue:
2643 * @ctxt: an XML parser context
2644 * @orig: if non-NULL store a copy of the original entity value
2645 *
2646 * parse a value for ENTITY declarations
2647 *
2648 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2649 * "'" ([^%&'] | PEReference | Reference)* "'"
2650 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002651 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002652 */
2653
2654xmlChar *
2655xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2656 xmlChar *buf = NULL;
2657 int len = 0;
2658 int size = XML_PARSER_BUFFER_SIZE;
2659 int c, l;
2660 xmlChar stop;
2661 xmlChar *ret = NULL;
2662 const xmlChar *cur = NULL;
2663 xmlParserInputPtr input;
2664
2665 if (RAW == '"') stop = '"';
2666 else if (RAW == '\'') stop = '\'';
2667 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002668 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002669 return(NULL);
2670 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676
2677 /*
2678 * The content of the entity definition is copied in a buffer.
2679 */
2680
2681 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2682 input = ctxt->input;
2683 GROW;
2684 NEXT;
2685 c = CUR_CHAR(l);
2686 /*
2687 * NOTE: 4.4.5 Included in Literal
2688 * When a parameter entity reference appears in a literal entity
2689 * value, ... a single or double quote character in the replacement
2690 * text is always treated as a normal data character and will not
2691 * terminate the literal.
2692 * In practice it means we stop the loop only when back at parsing
2693 * the initial entity and the quote is found
2694 */
William M. Brack871611b2003-10-18 04:53:14 +00002695 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002696 (ctxt->input != input))) {
2697 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 xmlChar *tmp;
2699
Owen Taylor3473f882001-02-23 17:55:21 +00002700 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002701 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2702 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002703 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002704 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002707 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 COPY_BUF(l,buf,len,c);
2710 NEXTL(l);
2711 /*
2712 * Pop-up of finished entities.
2713 */
2714 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2715 xmlPopInput(ctxt);
2716
2717 GROW;
2718 c = CUR_CHAR(l);
2719 if (c == 0) {
2720 GROW;
2721 c = CUR_CHAR(l);
2722 }
2723 }
2724 buf[len] = 0;
2725
2726 /*
2727 * Raise problem w.r.t. '&' and '%' being used in non-entities
2728 * reference constructs. Note Charref will be handled in
2729 * xmlStringDecodeEntities()
2730 */
2731 cur = buf;
2732 while (*cur != 0) { /* non input consuming */
2733 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2734 xmlChar *name;
2735 xmlChar tmp = *cur;
2736
2737 cur++;
2738 name = xmlParseStringName(ctxt, &cur);
2739 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002740 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002741 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002743 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002744 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2745 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
2748 if (name != NULL)
2749 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002750 if (*cur == 0)
2751 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 cur++;
2754 }
2755
2756 /*
2757 * Then PEReference entities are substituted.
2758 */
2759 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 xmlFree(buf);
2762 } else {
2763 NEXT;
2764 /*
2765 * NOTE: 4.4.7 Bypassed
2766 * When a general entity reference appears in the EntityValue in
2767 * an entity declaration, it is bypassed and left as is.
2768 * so XML_SUBSTITUTE_REF is not set here.
2769 */
2770 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2771 0, 0, 0);
2772 if (orig != NULL)
2773 *orig = buf;
2774 else
2775 xmlFree(buf);
2776 }
2777
2778 return(ret);
2779}
2780
2781/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002782 * xmlParseAttValueComplex:
2783 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002784 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002786 *
2787 * parse a value for an attribute, this is the fallback function
2788 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002789 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002790 *
2791 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2792 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002793static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002794xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002795 xmlChar limit = 0;
2796 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 int len = 0;
2798 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002800 xmlChar *current = NULL;
2801 xmlEntityPtr ent;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (NXT(0) == '"') {
2804 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2805 limit = '"';
2806 NEXT;
2807 } else if (NXT(0) == '\'') {
2808 limit = '\'';
2809 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2810 NEXT;
2811 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815
2816 /*
2817 * allocate a translation buffer.
2818 */
2819 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002821 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002822
2823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002824 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002825 */
2826 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 while ((NXT(0) != limit) && /* checked */
2828 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002829 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002830 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002831 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (NXT(1) == '#') {
2833 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834
Owen Taylor3473f882001-02-23 17:55:21 +00002835 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002836 if (ctxt->replaceEntities) {
2837 if (len > buf_size - 10) {
2838 growBuffer(buf);
2839 }
2840 buf[len++] = '&';
2841 } else {
2842 /*
2843 * The reparsing will be done in xmlStringGetNodeList()
2844 * called by the attribute() function in SAX.c
2845 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002846 if (len > buf_size - 10) {
2847 growBuffer(buf);
2848 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002849 buf[len++] = '&';
2850 buf[len++] = '#';
2851 buf[len++] = '3';
2852 buf[len++] = '8';
2853 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002854 }
2855 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002856 if (len > buf_size - 10) {
2857 growBuffer(buf);
2858 }
Owen Taylor3473f882001-02-23 17:55:21 +00002859 len += xmlCopyChar(0, &buf[len], val);
2860 }
2861 } else {
2862 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002863 if ((ent != NULL) &&
2864 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2865 if (len > buf_size - 10) {
2866 growBuffer(buf);
2867 }
2868 if ((ctxt->replaceEntities == 0) &&
2869 (ent->content[0] == '&')) {
2870 buf[len++] = '&';
2871 buf[len++] = '#';
2872 buf[len++] = '3';
2873 buf[len++] = '8';
2874 buf[len++] = ';';
2875 } else {
2876 buf[len++] = ent->content[0];
2877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002878 } else if ((ent != NULL) &&
2879 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002880 xmlChar *rep;
2881
2882 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2883 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002884 XML_SUBSTITUTE_REF,
2885 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (rep != NULL) {
2887 current = rep;
2888 while (*current != 0) { /* non input consuming */
2889 buf[len++] = *current++;
2890 if (len > buf_size - 10) {
2891 growBuffer(buf);
2892 }
2893 }
2894 xmlFree(rep);
2895 }
2896 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002897 if (len > buf_size - 10) {
2898 growBuffer(buf);
2899 }
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (ent->content != NULL)
2901 buf[len++] = ent->content[0];
2902 }
2903 } else if (ent != NULL) {
2904 int i = xmlStrlen(ent->name);
2905 const xmlChar *cur = ent->name;
2906
2907 /*
2908 * This may look absurd but is needed to detect
2909 * entities problems
2910 */
2911 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2912 (ent->content != NULL)) {
2913 xmlChar *rep;
2914 rep = xmlStringDecodeEntities(ctxt, ent->content,
2915 XML_SUBSTITUTE_REF, 0, 0, 0);
2916 if (rep != NULL)
2917 xmlFree(rep);
2918 }
2919
2920 /*
2921 * Just output the reference
2922 */
2923 buf[len++] = '&';
2924 if (len > buf_size - i - 10) {
2925 growBuffer(buf);
2926 }
2927 for (;i > 0;i--)
2928 buf[len++] = *cur++;
2929 buf[len++] = ';';
2930 }
2931 }
2932 } else {
2933 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002934 if ((len != 0) || (!normalize)) {
2935 if ((!normalize) || (!in_space)) {
2936 COPY_BUF(l,buf,len,0x20);
2937 if (len > buf_size - 10) {
2938 growBuffer(buf);
2939 }
2940 }
2941 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002942 }
2943 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002945 COPY_BUF(l,buf,len,c);
2946 if (len > buf_size - 10) {
2947 growBuffer(buf);
2948 }
2949 }
2950 NEXTL(l);
2951 }
2952 GROW;
2953 c = CUR_CHAR(l);
2954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002955 if ((in_space) && (normalize)) {
2956 while (buf[len - 1] == 0x20) len--;
2957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002958 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2963 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002964 } else
2965 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002966 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002968
2969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972}
2973
2974/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002975 * xmlParseAttValue:
2976 * @ctxt: an XML parser context
2977 *
2978 * parse a value for an attribute
2979 * Note: the parser won't do substitution of entities here, this
2980 * will be handled later in xmlStringGetNodeList
2981 *
2982 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2983 * "'" ([^<&'] | Reference)* "'"
2984 *
2985 * 3.3.3 Attribute-Value Normalization:
2986 * Before the value of an attribute is passed to the application or
2987 * checked for validity, the XML processor must normalize it as follows:
2988 * - a character reference is processed by appending the referenced
2989 * character to the attribute value
2990 * - an entity reference is processed by recursively processing the
2991 * replacement text of the entity
2992 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2993 * appending #x20 to the normalized value, except that only a single
2994 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2995 * parsed entity or the literal entity value of an internal parsed entity
2996 * - other characters are processed by appending them to the normalized value
2997 * If the declared value is not CDATA, then the XML processor must further
2998 * process the normalized attribute value by discarding any leading and
2999 * trailing space (#x20) characters, and by replacing sequences of space
3000 * (#x20) characters by a single space (#x20) character.
3001 * All attributes for which no declaration has been read should be treated
3002 * by a non-validating parser as if declared CDATA.
3003 *
3004 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3005 */
3006
3007
3008xmlChar *
3009xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003010 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003011 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003012}
3013
3014/**
Owen Taylor3473f882001-02-23 17:55:21 +00003015 * xmlParseSystemLiteral:
3016 * @ctxt: an XML parser context
3017 *
3018 * parse an XML Literal
3019 *
3020 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3021 *
3022 * Returns the SystemLiteral parsed or NULL
3023 */
3024
3025xmlChar *
3026xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3027 xmlChar *buf = NULL;
3028 int len = 0;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int cur, l;
3031 xmlChar stop;
3032 int state = ctxt->instate;
3033 int count = 0;
3034
3035 SHRINK;
3036 if (RAW == '"') {
3037 NEXT;
3038 stop = '"';
3039 } else if (RAW == '\'') {
3040 NEXT;
3041 stop = '\'';
3042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003043 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003044 return(NULL);
3045 }
3046
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3053 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003054 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003055 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003056 xmlChar *tmp;
3057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003059 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3060 if (tmp == NULL) {
3061 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003063 ctxt->instate = (xmlParserInputState) state;
3064 return(NULL);
3065 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003066 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 }
3068 count++;
3069 if (count > 50) {
3070 GROW;
3071 count = 0;
3072 }
3073 COPY_BUF(l,buf,len,cur);
3074 NEXTL(l);
3075 cur = CUR_CHAR(l);
3076 if (cur == 0) {
3077 GROW;
3078 SHRINK;
3079 cur = CUR_CHAR(l);
3080 }
3081 }
3082 buf[len] = 0;
3083 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003084 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003085 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 } else {
3087 NEXT;
3088 }
3089 return(buf);
3090}
3091
3092/**
3093 * xmlParsePubidLiteral:
3094 * @ctxt: an XML parser context
3095 *
3096 * parse an XML public literal
3097 *
3098 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3099 *
3100 * Returns the PubidLiteral parsed or NULL.
3101 */
3102
3103xmlChar *
3104xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3105 xmlChar *buf = NULL;
3106 int len = 0;
3107 int size = XML_PARSER_BUFFER_SIZE;
3108 xmlChar cur;
3109 xmlChar stop;
3110 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003111 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003112
3113 SHRINK;
3114 if (RAW == '"') {
3115 NEXT;
3116 stop = '"';
3117 } else if (RAW == '\'') {
3118 NEXT;
3119 stop = '\'';
3120 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003121 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003122 return(NULL);
3123 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003124 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003125 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003127 return(NULL);
3128 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003129 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003130 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003131 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003132 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003133 xmlChar *tmp;
3134
Owen Taylor3473f882001-02-23 17:55:21 +00003135 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003136 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3137 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003138 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003139 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003140 return(NULL);
3141 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003142 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 buf[len++] = cur;
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 NEXT;
3151 cur = CUR;
3152 if (cur == 0) {
3153 GROW;
3154 SHRINK;
3155 cur = CUR;
3156 }
3157 }
3158 buf[len] = 0;
3159 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003161 } else {
3162 NEXT;
3163 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003164 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(buf);
3166}
3167
Daniel Veillard48b2f892001-02-25 16:11:03 +00003168void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003169
3170/*
3171 * used for the test in the inner loop of the char data testing
3172 */
3173static const unsigned char test_char_data[256] = {
3174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3175 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3178 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3179 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3180 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3181 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3182 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3183 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3184 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3185 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3186 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3187 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3188 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3189 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3206};
3207
Owen Taylor3473f882001-02-23 17:55:21 +00003208/**
3209 * xmlParseCharData:
3210 * @ctxt: an XML parser context
3211 * @cdata: int indicating whether we are within a CDATA section
3212 *
3213 * parse a CharData section.
3214 * if we are within a CDATA section ']]>' marks an end of section.
3215 *
3216 * The right angle bracket (>) may be represented using the string "&gt;",
3217 * and must, for compatibility, be escaped using "&gt;" or a character
3218 * reference when it appears in the string "]]>" in content, when that
3219 * string is not marking the end of a CDATA section.
3220 *
3221 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3222 */
3223
3224void
3225xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003226 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003227 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003228 int line = ctxt->input->line;
3229 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003230 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003231
3232 SHRINK;
3233 GROW;
3234 /*
3235 * Accelerated common case where input don't need to be
3236 * modified before passing it to the handler.
3237 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003238 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003239 in = ctxt->input->cur;
3240 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003241get_more_space:
3242 while (*in == 0x20) in++;
3243 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003244 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003245 in++;
3246 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003247 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003248 in++;
3249 }
3250 goto get_more_space;
3251 }
3252 if (*in == '<') {
3253 nbchar = in - ctxt->input->cur;
3254 if (nbchar > 0) {
3255 const xmlChar *tmp = ctxt->input->cur;
3256 ctxt->input->cur = in;
3257
Daniel Veillard34099b42004-11-04 17:34:35 +00003258 if ((ctxt->sax != NULL) &&
3259 (ctxt->sax->ignorableWhitespace !=
3260 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003261 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003262 if (ctxt->sax->ignorableWhitespace != NULL)
3263 ctxt->sax->ignorableWhitespace(ctxt->userData,
3264 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003265 } else if (ctxt->sax->characters != NULL)
3266 ctxt->sax->characters(ctxt->userData,
3267 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003268 } else if ((ctxt->sax != NULL) &&
3269 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003270 ctxt->sax->characters(ctxt->userData,
3271 tmp, nbchar);
3272 }
3273 }
3274 return;
3275 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003276
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003277get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003278 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003279 while (test_char_data[*in]) {
3280 in++;
3281 ccol++;
3282 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003283 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003284 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003285 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003286 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003287 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003288 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003289 in++;
3290 }
3291 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003292 }
3293 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003294 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003295 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003296 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003297 return;
3298 }
3299 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003300 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003301 goto get_more;
3302 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003303 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003304 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003305 if ((ctxt->sax != NULL) &&
3306 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003307 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003308 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003309 const xmlChar *tmp = ctxt->input->cur;
3310 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003311
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003312 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003313 if (ctxt->sax->ignorableWhitespace != NULL)
3314 ctxt->sax->ignorableWhitespace(ctxt->userData,
3315 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003316 } else if (ctxt->sax->characters != NULL)
3317 ctxt->sax->characters(ctxt->userData,
3318 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003319 line = ctxt->input->line;
3320 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003321 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003322 if (ctxt->sax->characters != NULL)
3323 ctxt->sax->characters(ctxt->userData,
3324 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003325 line = ctxt->input->line;
3326 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003327 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003328 }
3329 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003330 if (*in == 0xD) {
3331 in++;
3332 if (*in == 0xA) {
3333 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003334 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003335 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003336 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003337 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003338 in--;
3339 }
3340 if (*in == '<') {
3341 return;
3342 }
3343 if (*in == '&') {
3344 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003345 }
3346 SHRINK;
3347 GROW;
3348 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003349 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003350 nbchar = 0;
3351 }
Daniel Veillard50582112001-03-26 22:52:16 +00003352 ctxt->input->line = line;
3353 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003354 xmlParseCharDataComplex(ctxt, cdata);
3355}
3356
Daniel Veillard01c13b52002-12-10 15:19:08 +00003357/**
3358 * xmlParseCharDataComplex:
3359 * @ctxt: an XML parser context
3360 * @cdata: int indicating whether we are within a CDATA section
3361 *
3362 * parse a CharData section.this is the fallback function
3363 * of xmlParseCharData() when the parsing requires handling
3364 * of non-ASCII characters.
3365 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003366void
3367xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003368 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3369 int nbchar = 0;
3370 int cur, l;
3371 int count = 0;
3372
3373 SHRINK;
3374 GROW;
3375 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003376 while ((cur != '<') && /* checked */
3377 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003378 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if ((cur == ']') && (NXT(1) == ']') &&
3380 (NXT(2) == '>')) {
3381 if (cdata) break;
3382 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003383 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003384 }
3385 }
3386 COPY_BUF(l,buf,nbchar,cur);
3387 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003388 buf[nbchar] = 0;
3389
Owen Taylor3473f882001-02-23 17:55:21 +00003390 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003391 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003392 */
3393 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003394 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003395 if (ctxt->sax->ignorableWhitespace != NULL)
3396 ctxt->sax->ignorableWhitespace(ctxt->userData,
3397 buf, nbchar);
3398 } else {
3399 if (ctxt->sax->characters != NULL)
3400 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3401 }
3402 }
3403 nbchar = 0;
3404 }
3405 count++;
3406 if (count > 50) {
3407 GROW;
3408 count = 0;
3409 }
3410 NEXTL(l);
3411 cur = CUR_CHAR(l);
3412 }
3413 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003414 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003415 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003416 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003417 */
3418 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003419 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003420 if (ctxt->sax->ignorableWhitespace != NULL)
3421 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3422 } else {
3423 if (ctxt->sax->characters != NULL)
3424 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3425 }
3426 }
3427 }
3428}
3429
3430/**
3431 * xmlParseExternalID:
3432 * @ctxt: an XML parser context
3433 * @publicID: a xmlChar** receiving PubidLiteral
3434 * @strict: indicate whether we should restrict parsing to only
3435 * production [75], see NOTE below
3436 *
3437 * Parse an External ID or a Public ID
3438 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003439 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003440 * 'PUBLIC' S PubidLiteral S SystemLiteral
3441 *
3442 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3443 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3444 *
3445 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3446 *
3447 * Returns the function returns SystemLiteral and in the second
3448 * case publicID receives PubidLiteral, is strict is off
3449 * it is possible to return NULL and have publicID set.
3450 */
3451
3452xmlChar *
3453xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3454 xmlChar *URI = NULL;
3455
3456 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003457
3458 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003459 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003460 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003461 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003462 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3463 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003464 }
3465 SKIP_BLANKS;
3466 URI = xmlParseSystemLiteral(ctxt);
3467 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003470 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003471 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003474 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003475 }
3476 SKIP_BLANKS;
3477 *publicID = xmlParsePubidLiteral(ctxt);
3478 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003479 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003480 }
3481 if (strict) {
3482 /*
3483 * We don't handle [83] so "S SystemLiteral" is required.
3484 */
William M. Brack76e95df2003-10-18 16:20:14 +00003485 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003486 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003487 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003488 }
3489 } else {
3490 /*
3491 * We handle [83] so we return immediately, if
3492 * "S SystemLiteral" is not detected. From a purely parsing
3493 * point of view that's a nice mess.
3494 */
3495 const xmlChar *ptr;
3496 GROW;
3497
3498 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003499 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003500
William M. Brack76e95df2003-10-18 16:20:14 +00003501 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003502 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3503 }
3504 SKIP_BLANKS;
3505 URI = xmlParseSystemLiteral(ctxt);
3506 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003507 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003508 }
3509 }
3510 return(URI);
3511}
3512
3513/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003514 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003515 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003516 * @buf: the already parsed part of the buffer
3517 * @len: number of bytes filles in the buffer
3518 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003519 *
3520 * Skip an XML (SGML) comment <!-- .... -->
3521 * The spec says that "For compatibility, the string "--" (double-hyphen)
3522 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003523 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003524 *
3525 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3526 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003527static void
3528xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003529 int q, ql;
3530 int r, rl;
3531 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003532 xmlParserInputPtr input = ctxt->input;
3533 int count = 0;
3534
Owen Taylor3473f882001-02-23 17:55:21 +00003535 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003536 len = 0;
3537 size = XML_PARSER_BUFFER_SIZE;
3538 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3539 if (buf == NULL) {
3540 xmlErrMemory(ctxt, NULL);
3541 return;
3542 }
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
3544 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003545 if (q == 0)
3546 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003547 NEXTL(ql);
3548 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003549 if (r == 0)
3550 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003551 NEXTL(rl);
3552 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003553 if (cur == 0)
3554 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003555 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003556 ((cur != '>') ||
3557 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003558 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003559 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
3561 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003562 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003563 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003564 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3565 if (new_buf == NULL) {
3566 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003567 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003568 return;
3569 }
William M. Bracka3215c72004-07-31 16:24:01 +00003570 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003571 }
3572 COPY_BUF(ql,buf,len,q);
3573 q = r;
3574 ql = rl;
3575 r = cur;
3576 rl = l;
3577
3578 count++;
3579 if (count > 50) {
3580 GROW;
3581 count = 0;
3582 }
3583 NEXTL(l);
3584 cur = CUR_CHAR(l);
3585 if (cur == 0) {
3586 SHRINK;
3587 GROW;
3588 cur = CUR_CHAR(l);
3589 }
3590 }
3591 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003592 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003593 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003594 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003595 xmlFree(buf);
3596 } else {
3597 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003598 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3599 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003600 }
3601 NEXT;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3603 (!ctxt->disableSAX))
3604 ctxt->sax->comment(ctxt->userData, buf);
3605 xmlFree(buf);
3606 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003607 return;
3608not_terminated:
3609 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3610 "Comment not terminated\n", NULL);
3611 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003612}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003613/**
3614 * xmlParseComment:
3615 * @ctxt: an XML parser context
3616 *
3617 * Skip an XML (SGML) comment <!-- .... -->
3618 * The spec says that "For compatibility, the string "--" (double-hyphen)
3619 * must not occur within comments. "
3620 *
3621 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3622 */
3623void
3624xmlParseComment(xmlParserCtxtPtr ctxt) {
3625 xmlChar *buf = NULL;
3626 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003627 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003628 xmlParserInputState state;
3629 const xmlChar *in;
3630 int nbchar = 0, ccol;
3631
3632 /*
3633 * Check that there is a comment right here.
3634 */
3635 if ((RAW != '<') || (NXT(1) != '!') ||
3636 (NXT(2) != '-') || (NXT(3) != '-')) return;
3637
3638 state = ctxt->instate;
3639 ctxt->instate = XML_PARSER_COMMENT;
3640 SKIP(4);
3641 SHRINK;
3642 GROW;
3643
3644 /*
3645 * Accelerated common case where input don't need to be
3646 * modified before passing it to the handler.
3647 */
3648 in = ctxt->input->cur;
3649 do {
3650 if (*in == 0xA) {
3651 ctxt->input->line++; ctxt->input->col = 1;
3652 in++;
3653 while (*in == 0xA) {
3654 ctxt->input->line++; ctxt->input->col = 1;
3655 in++;
3656 }
3657 }
3658get_more:
3659 ccol = ctxt->input->col;
3660 while (((*in > '-') && (*in <= 0x7F)) ||
3661 ((*in >= 0x20) && (*in < '-')) ||
3662 (*in == 0x09)) {
3663 in++;
3664 ccol++;
3665 }
3666 ctxt->input->col = ccol;
3667 if (*in == 0xA) {
3668 ctxt->input->line++; ctxt->input->col = 1;
3669 in++;
3670 while (*in == 0xA) {
3671 ctxt->input->line++; ctxt->input->col = 1;
3672 in++;
3673 }
3674 goto get_more;
3675 }
3676 nbchar = in - ctxt->input->cur;
3677 /*
3678 * save current set of data
3679 */
3680 if (nbchar > 0) {
3681 if ((ctxt->sax != NULL) &&
3682 (ctxt->sax->comment != NULL)) {
3683 if (buf == NULL) {
3684 if ((*in == '-') && (in[1] == '-'))
3685 size = nbchar + 1;
3686 else
3687 size = XML_PARSER_BUFFER_SIZE + nbchar;
3688 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3689 if (buf == NULL) {
3690 xmlErrMemory(ctxt, NULL);
3691 ctxt->instate = state;
3692 return;
3693 }
3694 len = 0;
3695 } else if (len + nbchar + 1 >= size) {
3696 xmlChar *new_buf;
3697 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3698 new_buf = (xmlChar *) xmlRealloc(buf,
3699 size * sizeof(xmlChar));
3700 if (new_buf == NULL) {
3701 xmlFree (buf);
3702 xmlErrMemory(ctxt, NULL);
3703 ctxt->instate = state;
3704 return;
3705 }
3706 buf = new_buf;
3707 }
3708 memcpy(&buf[len], ctxt->input->cur, nbchar);
3709 len += nbchar;
3710 buf[len] = 0;
3711 }
3712 }
3713 ctxt->input->cur = in;
3714 if (*in == 0xA)
3715
3716 if (*in == 0xD) {
3717 in++;
3718 if (*in == 0xA) {
3719 ctxt->input->cur = in;
3720 in++;
3721 ctxt->input->line++; ctxt->input->col = 1;
3722 continue; /* while */
3723 }
3724 in--;
3725 }
3726 SHRINK;
3727 GROW;
3728 in = ctxt->input->cur;
3729 if (*in == '-') {
3730 if (in[1] == '-') {
3731 if (in[2] == '>') {
3732 SKIP(3);
3733 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3734 (!ctxt->disableSAX)) {
3735 if (buf != NULL)
3736 ctxt->sax->comment(ctxt->userData, buf);
3737 else
3738 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3739 }
3740 if (buf != NULL)
3741 xmlFree(buf);
3742 ctxt->instate = state;
3743 return;
3744 }
3745 if (buf != NULL)
3746 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3747 "Comment not terminated \n<!--%.50s\n",
3748 buf);
3749 else
3750 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3751 "Comment not terminated \n", NULL);
3752 in++;
3753 ctxt->input->col++;
3754 }
3755 in++;
3756 ctxt->input->col++;
3757 goto get_more;
3758 }
3759 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3760 xmlParseCommentComplex(ctxt, buf, len, size);
3761 ctxt->instate = state;
3762 return;
3763}
3764
Owen Taylor3473f882001-02-23 17:55:21 +00003765
3766/**
3767 * xmlParsePITarget:
3768 * @ctxt: an XML parser context
3769 *
3770 * parse the name of a PI
3771 *
3772 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3773 *
3774 * Returns the PITarget name or NULL
3775 */
3776
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003777const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003778xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003779 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003780
3781 name = xmlParseName(ctxt);
3782 if ((name != NULL) &&
3783 ((name[0] == 'x') || (name[0] == 'X')) &&
3784 ((name[1] == 'm') || (name[1] == 'M')) &&
3785 ((name[2] == 'l') || (name[2] == 'L'))) {
3786 int i;
3787 if ((name[0] == 'x') && (name[1] == 'm') &&
3788 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003789 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003790 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003791 return(name);
3792 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003793 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 return(name);
3795 }
3796 for (i = 0;;i++) {
3797 if (xmlW3CPIs[i] == NULL) break;
3798 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3799 return(name);
3800 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003801 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3802 "xmlParsePITarget: invalid name prefix 'xml'\n",
3803 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
3805 return(name);
3806}
3807
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003808#ifdef LIBXML_CATALOG_ENABLED
3809/**
3810 * xmlParseCatalogPI:
3811 * @ctxt: an XML parser context
3812 * @catalog: the PI value string
3813 *
3814 * parse an XML Catalog Processing Instruction.
3815 *
3816 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3817 *
3818 * Occurs only if allowed by the user and if happening in the Misc
3819 * part of the document before any doctype informations
3820 * This will add the given catalog to the parsing context in order
3821 * to be used if there is a resolution need further down in the document
3822 */
3823
3824static void
3825xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3826 xmlChar *URL = NULL;
3827 const xmlChar *tmp, *base;
3828 xmlChar marker;
3829
3830 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003831 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003832 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3833 goto error;
3834 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003835 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003836 if (*tmp != '=') {
3837 return;
3838 }
3839 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003840 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003841 marker = *tmp;
3842 if ((marker != '\'') && (marker != '"'))
3843 goto error;
3844 tmp++;
3845 base = tmp;
3846 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3847 if (*tmp == 0)
3848 goto error;
3849 URL = xmlStrndup(base, tmp - base);
3850 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003851 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003852 if (*tmp != 0)
3853 goto error;
3854
3855 if (URL != NULL) {
3856 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3857 xmlFree(URL);
3858 }
3859 return;
3860
3861error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003862 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3863 "Catalog PI syntax error: %s\n",
3864 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003865 if (URL != NULL)
3866 xmlFree(URL);
3867}
3868#endif
3869
Owen Taylor3473f882001-02-23 17:55:21 +00003870/**
3871 * xmlParsePI:
3872 * @ctxt: an XML parser context
3873 *
3874 * parse an XML Processing Instruction.
3875 *
3876 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3877 *
3878 * The processing is transfered to SAX once parsed.
3879 */
3880
3881void
3882xmlParsePI(xmlParserCtxtPtr ctxt) {
3883 xmlChar *buf = NULL;
3884 int len = 0;
3885 int size = XML_PARSER_BUFFER_SIZE;
3886 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003887 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003888 xmlParserInputState state;
3889 int count = 0;
3890
3891 if ((RAW == '<') && (NXT(1) == '?')) {
3892 xmlParserInputPtr input = ctxt->input;
3893 state = ctxt->instate;
3894 ctxt->instate = XML_PARSER_PI;
3895 /*
3896 * this is a Processing Instruction.
3897 */
3898 SKIP(2);
3899 SHRINK;
3900
3901 /*
3902 * Parse the target name and check for special support like
3903 * namespace.
3904 */
3905 target = xmlParsePITarget(ctxt);
3906 if (target != NULL) {
3907 if ((RAW == '?') && (NXT(1) == '>')) {
3908 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003909 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3910 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003911 }
3912 SKIP(2);
3913
3914 /*
3915 * SAX: PI detected.
3916 */
3917 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3918 (ctxt->sax->processingInstruction != NULL))
3919 ctxt->sax->processingInstruction(ctxt->userData,
3920 target, NULL);
3921 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003922 return;
3923 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003924 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003925 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 ctxt->instate = state;
3928 return;
3929 }
3930 cur = CUR;
3931 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003932 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3933 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 }
3935 SKIP_BLANKS;
3936 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003937 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003938 ((cur != '?') || (NXT(1) != '>'))) {
3939 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003940 xmlChar *tmp;
3941
Owen Taylor3473f882001-02-23 17:55:21 +00003942 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003943 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3944 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003945 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003946 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003947 ctxt->instate = state;
3948 return;
3949 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003950 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003951 }
3952 count++;
3953 if (count > 50) {
3954 GROW;
3955 count = 0;
3956 }
3957 COPY_BUF(l,buf,len,cur);
3958 NEXTL(l);
3959 cur = CUR_CHAR(l);
3960 if (cur == 0) {
3961 SHRINK;
3962 GROW;
3963 cur = CUR_CHAR(l);
3964 }
3965 }
3966 buf[len] = 0;
3967 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003968 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3969 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 } else {
3971 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003972 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3973 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003974 }
3975 SKIP(2);
3976
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003977#ifdef LIBXML_CATALOG_ENABLED
3978 if (((state == XML_PARSER_MISC) ||
3979 (state == XML_PARSER_START)) &&
3980 (xmlStrEqual(target, XML_CATALOG_PI))) {
3981 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3982 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3983 (allow == XML_CATA_ALLOW_ALL))
3984 xmlParseCatalogPI(ctxt, buf);
3985 }
3986#endif
3987
3988
Owen Taylor3473f882001-02-23 17:55:21 +00003989 /*
3990 * SAX: PI detected.
3991 */
3992 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3993 (ctxt->sax->processingInstruction != NULL))
3994 ctxt->sax->processingInstruction(ctxt->userData,
3995 target, buf);
3996 }
3997 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003998 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003999 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004000 }
4001 ctxt->instate = state;
4002 }
4003}
4004
4005/**
4006 * xmlParseNotationDecl:
4007 * @ctxt: an XML parser context
4008 *
4009 * parse a notation declaration
4010 *
4011 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4012 *
4013 * Hence there is actually 3 choices:
4014 * 'PUBLIC' S PubidLiteral
4015 * 'PUBLIC' S PubidLiteral S SystemLiteral
4016 * and 'SYSTEM' S SystemLiteral
4017 *
4018 * See the NOTE on xmlParseExternalID().
4019 */
4020
4021void
4022xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004023 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004024 xmlChar *Pubid;
4025 xmlChar *Systemid;
4026
Daniel Veillarda07050d2003-10-19 14:46:32 +00004027 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004028 xmlParserInputPtr input = ctxt->input;
4029 SHRINK;
4030 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004031 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004032 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4033 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004034 return;
4035 }
4036 SKIP_BLANKS;
4037
Daniel Veillard76d66f42001-05-16 21:05:17 +00004038 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004039 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004040 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 return;
4042 }
William M. Brack76e95df2003-10-18 16:20:14 +00004043 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004044 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004045 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004046 return;
4047 }
4048 SKIP_BLANKS;
4049
4050 /*
4051 * Parse the IDs.
4052 */
4053 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4054 SKIP_BLANKS;
4055
4056 if (RAW == '>') {
4057 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004058 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4059 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004060 }
4061 NEXT;
4062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4063 (ctxt->sax->notationDecl != NULL))
4064 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4065 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004066 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
Owen Taylor3473f882001-02-23 17:55:21 +00004068 if (Systemid != NULL) xmlFree(Systemid);
4069 if (Pubid != NULL) xmlFree(Pubid);
4070 }
4071}
4072
4073/**
4074 * xmlParseEntityDecl:
4075 * @ctxt: an XML parser context
4076 *
4077 * parse <!ENTITY declarations
4078 *
4079 * [70] EntityDecl ::= GEDecl | PEDecl
4080 *
4081 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4082 *
4083 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4084 *
4085 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4086 *
4087 * [74] PEDef ::= EntityValue | ExternalID
4088 *
4089 * [76] NDataDecl ::= S 'NDATA' S Name
4090 *
4091 * [ VC: Notation Declared ]
4092 * The Name must match the declared name of a notation.
4093 */
4094
4095void
4096xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004097 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004098 xmlChar *value = NULL;
4099 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004100 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004101 int isParameter = 0;
4102 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004103 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004104
Daniel Veillard4c778d82005-01-23 17:37:44 +00004105 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004106 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004107 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004108 SHRINK;
4109 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004110 skipped = SKIP_BLANKS;
4111 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004112 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4113 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004114 }
Owen Taylor3473f882001-02-23 17:55:21 +00004115
4116 if (RAW == '%') {
4117 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004118 skipped = SKIP_BLANKS;
4119 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004120 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4121 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004122 }
Owen Taylor3473f882001-02-23 17:55:21 +00004123 isParameter = 1;
4124 }
4125
Daniel Veillard76d66f42001-05-16 21:05:17 +00004126 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004127 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4129 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004130 return;
4131 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004132 skipped = SKIP_BLANKS;
4133 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4135 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004136 }
Owen Taylor3473f882001-02-23 17:55:21 +00004137
Daniel Veillardf5582f12002-06-11 10:08:16 +00004138 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004139 /*
4140 * handle the various case of definitions...
4141 */
4142 if (isParameter) {
4143 if ((RAW == '"') || (RAW == '\'')) {
4144 value = xmlParseEntityValue(ctxt, &orig);
4145 if (value) {
4146 if ((ctxt->sax != NULL) &&
4147 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4148 ctxt->sax->entityDecl(ctxt->userData, name,
4149 XML_INTERNAL_PARAMETER_ENTITY,
4150 NULL, NULL, value);
4151 }
4152 } else {
4153 URI = xmlParseExternalID(ctxt, &literal, 1);
4154 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004155 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004156 }
4157 if (URI) {
4158 xmlURIPtr uri;
4159
4160 uri = xmlParseURI((const char *) URI);
4161 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004162 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4163 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004164 /*
4165 * This really ought to be a well formedness error
4166 * but the XML Core WG decided otherwise c.f. issue
4167 * E26 of the XML erratas.
4168 */
Owen Taylor3473f882001-02-23 17:55:21 +00004169 } else {
4170 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004171 /*
4172 * Okay this is foolish to block those but not
4173 * invalid URIs.
4174 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004175 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 } else {
4177 if ((ctxt->sax != NULL) &&
4178 (!ctxt->disableSAX) &&
4179 (ctxt->sax->entityDecl != NULL))
4180 ctxt->sax->entityDecl(ctxt->userData, name,
4181 XML_EXTERNAL_PARAMETER_ENTITY,
4182 literal, URI, NULL);
4183 }
4184 xmlFreeURI(uri);
4185 }
4186 }
4187 }
4188 } else {
4189 if ((RAW == '"') || (RAW == '\'')) {
4190 value = xmlParseEntityValue(ctxt, &orig);
4191 if ((ctxt->sax != NULL) &&
4192 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4193 ctxt->sax->entityDecl(ctxt->userData, name,
4194 XML_INTERNAL_GENERAL_ENTITY,
4195 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004196 /*
4197 * For expat compatibility in SAX mode.
4198 */
4199 if ((ctxt->myDoc == NULL) ||
4200 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4201 if (ctxt->myDoc == NULL) {
4202 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4203 }
4204 if (ctxt->myDoc->intSubset == NULL)
4205 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4206 BAD_CAST "fake", NULL, NULL);
4207
Daniel Veillard1af9a412003-08-20 22:54:39 +00004208 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4209 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004210 }
Owen Taylor3473f882001-02-23 17:55:21 +00004211 } else {
4212 URI = xmlParseExternalID(ctxt, &literal, 1);
4213 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004214 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 }
4216 if (URI) {
4217 xmlURIPtr uri;
4218
4219 uri = xmlParseURI((const char *)URI);
4220 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004221 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4222 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004223 /*
4224 * This really ought to be a well formedness error
4225 * but the XML Core WG decided otherwise c.f. issue
4226 * E26 of the XML erratas.
4227 */
Owen Taylor3473f882001-02-23 17:55:21 +00004228 } else {
4229 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004230 /*
4231 * Okay this is foolish to block those but not
4232 * invalid URIs.
4233 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004234 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 }
4236 xmlFreeURI(uri);
4237 }
4238 }
William M. Brack76e95df2003-10-18 16:20:14 +00004239 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004240 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4241 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004244 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004245 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004246 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004247 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4248 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004251 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4253 (ctxt->sax->unparsedEntityDecl != NULL))
4254 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4255 literal, URI, ndata);
4256 } else {
4257 if ((ctxt->sax != NULL) &&
4258 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4259 ctxt->sax->entityDecl(ctxt->userData, name,
4260 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4261 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004262 /*
4263 * For expat compatibility in SAX mode.
4264 * assuming the entity repalcement was asked for
4265 */
4266 if ((ctxt->replaceEntities != 0) &&
4267 ((ctxt->myDoc == NULL) ||
4268 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4269 if (ctxt->myDoc == NULL) {
4270 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4271 }
4272
4273 if (ctxt->myDoc->intSubset == NULL)
4274 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4275 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004276 xmlSAX2EntityDecl(ctxt, name,
4277 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4278 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004279 }
Owen Taylor3473f882001-02-23 17:55:21 +00004280 }
4281 }
4282 }
4283 SKIP_BLANKS;
4284 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004285 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004286 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004287 } else {
4288 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004289 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4290 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004291 }
4292 NEXT;
4293 }
4294 if (orig != NULL) {
4295 /*
4296 * Ugly mechanism to save the raw entity value.
4297 */
4298 xmlEntityPtr cur = NULL;
4299
4300 if (isParameter) {
4301 if ((ctxt->sax != NULL) &&
4302 (ctxt->sax->getParameterEntity != NULL))
4303 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4304 } else {
4305 if ((ctxt->sax != NULL) &&
4306 (ctxt->sax->getEntity != NULL))
4307 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004308 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004309 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004310 }
Owen Taylor3473f882001-02-23 17:55:21 +00004311 }
4312 if (cur != NULL) {
4313 if (cur->orig != NULL)
4314 xmlFree(orig);
4315 else
4316 cur->orig = orig;
4317 } else
4318 xmlFree(orig);
4319 }
Owen Taylor3473f882001-02-23 17:55:21 +00004320 if (value != NULL) xmlFree(value);
4321 if (URI != NULL) xmlFree(URI);
4322 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
4324}
4325
4326/**
4327 * xmlParseDefaultDecl:
4328 * @ctxt: an XML parser context
4329 * @value: Receive a possible fixed default value for the attribute
4330 *
4331 * Parse an attribute default declaration
4332 *
4333 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4334 *
4335 * [ VC: Required Attribute ]
4336 * if the default declaration is the keyword #REQUIRED, then the
4337 * attribute must be specified for all elements of the type in the
4338 * attribute-list declaration.
4339 *
4340 * [ VC: Attribute Default Legal ]
4341 * The declared default value must meet the lexical constraints of
4342 * the declared attribute type c.f. xmlValidateAttributeDecl()
4343 *
4344 * [ VC: Fixed Attribute Default ]
4345 * if an attribute has a default value declared with the #FIXED
4346 * keyword, instances of that attribute must match the default value.
4347 *
4348 * [ WFC: No < in Attribute Values ]
4349 * handled in xmlParseAttValue()
4350 *
4351 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4352 * or XML_ATTRIBUTE_FIXED.
4353 */
4354
4355int
4356xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4357 int val;
4358 xmlChar *ret;
4359
4360 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004361 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004362 SKIP(9);
4363 return(XML_ATTRIBUTE_REQUIRED);
4364 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004365 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004366 SKIP(8);
4367 return(XML_ATTRIBUTE_IMPLIED);
4368 }
4369 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004370 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004371 SKIP(6);
4372 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004373 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004374 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4375 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004376 }
4377 SKIP_BLANKS;
4378 }
4379 ret = xmlParseAttValue(ctxt);
4380 ctxt->instate = XML_PARSER_DTD;
4381 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004382 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004383 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004384 } else
4385 *value = ret;
4386 return(val);
4387}
4388
4389/**
4390 * xmlParseNotationType:
4391 * @ctxt: an XML parser context
4392 *
4393 * parse an Notation attribute type.
4394 *
4395 * Note: the leading 'NOTATION' S part has already being parsed...
4396 *
4397 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4398 *
4399 * [ VC: Notation Attributes ]
4400 * Values of this type must match one of the notation names included
4401 * in the declaration; all notation names in the declaration must be declared.
4402 *
4403 * Returns: the notation attribute tree built while parsing
4404 */
4405
4406xmlEnumerationPtr
4407xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004408 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4410
4411 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004412 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004413 return(NULL);
4414 }
4415 SHRINK;
4416 do {
4417 NEXT;
4418 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004419 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004420 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4422 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004423 return(ret);
4424 }
4425 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004426 if (cur == NULL) return(ret);
4427 if (last == NULL) ret = last = cur;
4428 else {
4429 last->next = cur;
4430 last = cur;
4431 }
4432 SKIP_BLANKS;
4433 } while (RAW == '|');
4434 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004435 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004436 if ((last != NULL) && (last != ret))
4437 xmlFreeEnumeration(last);
4438 return(ret);
4439 }
4440 NEXT;
4441 return(ret);
4442}
4443
4444/**
4445 * xmlParseEnumerationType:
4446 * @ctxt: an XML parser context
4447 *
4448 * parse an Enumeration attribute type.
4449 *
4450 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4451 *
4452 * [ VC: Enumeration ]
4453 * Values of this type must match one of the Nmtoken tokens in
4454 * the declaration
4455 *
4456 * Returns: the enumeration attribute tree built while parsing
4457 */
4458
4459xmlEnumerationPtr
4460xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4461 xmlChar *name;
4462 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4463
4464 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004465 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004466 return(NULL);
4467 }
4468 SHRINK;
4469 do {
4470 NEXT;
4471 SKIP_BLANKS;
4472 name = xmlParseNmtoken(ctxt);
4473 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004474 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004475 return(ret);
4476 }
4477 cur = xmlCreateEnumeration(name);
4478 xmlFree(name);
4479 if (cur == NULL) return(ret);
4480 if (last == NULL) ret = last = cur;
4481 else {
4482 last->next = cur;
4483 last = cur;
4484 }
4485 SKIP_BLANKS;
4486 } while (RAW == '|');
4487 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004488 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 return(ret);
4490 }
4491 NEXT;
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseEnumeratedType:
4497 * @ctxt: an XML parser context
4498 * @tree: the enumeration tree built while parsing
4499 *
4500 * parse an Enumerated attribute type.
4501 *
4502 * [57] EnumeratedType ::= NotationType | Enumeration
4503 *
4504 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4505 *
4506 *
4507 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4508 */
4509
4510int
4511xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004512 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004513 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004514 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004515 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4516 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004517 return(0);
4518 }
4519 SKIP_BLANKS;
4520 *tree = xmlParseNotationType(ctxt);
4521 if (*tree == NULL) return(0);
4522 return(XML_ATTRIBUTE_NOTATION);
4523 }
4524 *tree = xmlParseEnumerationType(ctxt);
4525 if (*tree == NULL) return(0);
4526 return(XML_ATTRIBUTE_ENUMERATION);
4527}
4528
4529/**
4530 * xmlParseAttributeType:
4531 * @ctxt: an XML parser context
4532 * @tree: the enumeration tree built while parsing
4533 *
4534 * parse the Attribute list def for an element
4535 *
4536 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4537 *
4538 * [55] StringType ::= 'CDATA'
4539 *
4540 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4541 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4542 *
4543 * Validity constraints for attribute values syntax are checked in
4544 * xmlValidateAttributeValue()
4545 *
4546 * [ VC: ID ]
4547 * Values of type ID must match the Name production. A name must not
4548 * appear more than once in an XML document as a value of this type;
4549 * i.e., ID values must uniquely identify the elements which bear them.
4550 *
4551 * [ VC: One ID per Element Type ]
4552 * No element type may have more than one ID attribute specified.
4553 *
4554 * [ VC: ID Attribute Default ]
4555 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4556 *
4557 * [ VC: IDREF ]
4558 * Values of type IDREF must match the Name production, and values
4559 * of type IDREFS must match Names; each IDREF Name must match the value
4560 * of an ID attribute on some element in the XML document; i.e. IDREF
4561 * values must match the value of some ID attribute.
4562 *
4563 * [ VC: Entity Name ]
4564 * Values of type ENTITY must match the Name production, values
4565 * of type ENTITIES must match Names; each Entity Name must match the
4566 * name of an unparsed entity declared in the DTD.
4567 *
4568 * [ VC: Name Token ]
4569 * Values of type NMTOKEN must match the Nmtoken production; values
4570 * of type NMTOKENS must match Nmtokens.
4571 *
4572 * Returns the attribute type
4573 */
4574int
4575xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4576 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004577 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004578 SKIP(5);
4579 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004580 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004581 SKIP(6);
4582 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004583 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004584 SKIP(5);
4585 return(XML_ATTRIBUTE_IDREF);
4586 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4587 SKIP(2);
4588 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004589 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004590 SKIP(6);
4591 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004592 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004593 SKIP(8);
4594 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004595 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004596 SKIP(8);
4597 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004598 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004599 SKIP(7);
4600 return(XML_ATTRIBUTE_NMTOKEN);
4601 }
4602 return(xmlParseEnumeratedType(ctxt, tree));
4603}
4604
4605/**
4606 * xmlParseAttributeListDecl:
4607 * @ctxt: an XML parser context
4608 *
4609 * : parse the Attribute list def for an element
4610 *
4611 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4612 *
4613 * [53] AttDef ::= S Name S AttType S DefaultDecl
4614 *
4615 */
4616void
4617xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004618 const xmlChar *elemName;
4619 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004620 xmlEnumerationPtr tree;
4621
Daniel Veillarda07050d2003-10-19 14:46:32 +00004622 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004623 xmlParserInputPtr input = ctxt->input;
4624
4625 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004626 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004628 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004629 }
4630 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004631 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004633 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4634 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004635 return;
4636 }
4637 SKIP_BLANKS;
4638 GROW;
4639 while (RAW != '>') {
4640 const xmlChar *check = CUR_PTR;
4641 int type;
4642 int def;
4643 xmlChar *defaultValue = NULL;
4644
4645 GROW;
4646 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004647 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4650 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004651 break;
4652 }
4653 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004654 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004655 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004656 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004657 if (defaultValue != NULL)
4658 xmlFree(defaultValue);
4659 break;
4660 }
4661 SKIP_BLANKS;
4662
4663 type = xmlParseAttributeType(ctxt, &tree);
4664 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004665 if (defaultValue != NULL)
4666 xmlFree(defaultValue);
4667 break;
4668 }
4669
4670 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004671 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4673 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (defaultValue != NULL)
4675 xmlFree(defaultValue);
4676 if (tree != NULL)
4677 xmlFreeEnumeration(tree);
4678 break;
4679 }
4680 SKIP_BLANKS;
4681
4682 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4683 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004684 if (defaultValue != NULL)
4685 xmlFree(defaultValue);
4686 if (tree != NULL)
4687 xmlFreeEnumeration(tree);
4688 break;
4689 }
4690
4691 GROW;
4692 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004693 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004695 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004696 if (defaultValue != NULL)
4697 xmlFree(defaultValue);
4698 if (tree != NULL)
4699 xmlFreeEnumeration(tree);
4700 break;
4701 }
4702 SKIP_BLANKS;
4703 }
4704 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004705 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4706 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004707 if (defaultValue != NULL)
4708 xmlFree(defaultValue);
4709 if (tree != NULL)
4710 xmlFreeEnumeration(tree);
4711 break;
4712 }
4713 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4714 (ctxt->sax->attributeDecl != NULL))
4715 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4716 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004717 else if (tree != NULL)
4718 xmlFreeEnumeration(tree);
4719
4720 if ((ctxt->sax2) && (defaultValue != NULL) &&
4721 (def != XML_ATTRIBUTE_IMPLIED) &&
4722 (def != XML_ATTRIBUTE_REQUIRED)) {
4723 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4724 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004725 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4726 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4727 }
Owen Taylor3473f882001-02-23 17:55:21 +00004728 if (defaultValue != NULL)
4729 xmlFree(defaultValue);
4730 GROW;
4731 }
4732 if (RAW == '>') {
4733 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004734 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4735 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004736 }
4737 NEXT;
4738 }
Owen Taylor3473f882001-02-23 17:55:21 +00004739 }
4740}
4741
4742/**
4743 * xmlParseElementMixedContentDecl:
4744 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004745 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004746 *
4747 * parse the declaration for a Mixed Element content
4748 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4749 *
4750 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4751 * '(' S? '#PCDATA' S? ')'
4752 *
4753 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4754 *
4755 * [ VC: No Duplicate Types ]
4756 * The same name must not appear more than once in a single
4757 * mixed-content declaration.
4758 *
4759 * returns: the list of the xmlElementContentPtr describing the element choices
4760 */
4761xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004763 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004764 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004765
4766 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004767 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004768 SKIP(7);
4769 SKIP_BLANKS;
4770 SHRINK;
4771 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004772 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004773 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4774"Element content declaration doesn't start and stop in the same entity\n",
4775 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004776 }
Owen Taylor3473f882001-02-23 17:55:21 +00004777 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004778 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004779 if (RAW == '*') {
4780 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4781 NEXT;
4782 }
4783 return(ret);
4784 }
4785 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004786 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004787 if (ret == NULL) return(NULL);
4788 }
4789 while (RAW == '|') {
4790 NEXT;
4791 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004792 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (ret == NULL) return(NULL);
4794 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004795 if (cur != NULL)
4796 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004797 cur = ret;
4798 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004799 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004800 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004801 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004802 if (n->c1 != NULL)
4803 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004804 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004805 if (n != NULL)
4806 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004808 }
4809 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004810 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004811 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004812 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004813 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004814 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004815 return(NULL);
4816 }
4817 SKIP_BLANKS;
4818 GROW;
4819 }
4820 if ((RAW == ')') && (NXT(1) == '*')) {
4821 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004822 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004823 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004824 if (cur->c2 != NULL)
4825 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004826 }
4827 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004828 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004829 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4830"Element content declaration doesn't start and stop in the same entity\n",
4831 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004832 }
Owen Taylor3473f882001-02-23 17:55:21 +00004833 SKIP(2);
4834 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004835 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004836 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004837 return(NULL);
4838 }
4839
4840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004841 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004842 }
4843 return(ret);
4844}
4845
4846/**
4847 * xmlParseElementChildrenContentDecl:
4848 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004849 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004850 *
4851 * parse the declaration for a Mixed Element content
4852 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4853 *
4854 *
4855 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4856 *
4857 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4858 *
4859 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4860 *
4861 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4862 *
4863 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4864 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004865 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004866 * opening or closing parentheses in a choice, seq, or Mixed
4867 * construct is contained in the replacement text for a parameter
4868 * entity, both must be contained in the same replacement text. For
4869 * interoperability, if a parameter-entity reference appears in a
4870 * choice, seq, or Mixed construct, its replacement text should not
4871 * be empty, and neither the first nor last non-blank character of
4872 * the replacement text should be a connector (| or ,).
4873 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004874 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004875 * hierarchy.
4876 */
4877xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004878xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004879 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004880 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004881 xmlChar type = 0;
4882
4883 SKIP_BLANKS;
4884 GROW;
4885 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004886 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004887
Owen Taylor3473f882001-02-23 17:55:21 +00004888 /* Recurse on first child */
4889 NEXT;
4890 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004891 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004892 SKIP_BLANKS;
4893 GROW;
4894 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004895 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004896 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004897 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004898 return(NULL);
4899 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004900 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004901 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004902 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004903 return(NULL);
4904 }
Owen Taylor3473f882001-02-23 17:55:21 +00004905 GROW;
4906 if (RAW == '?') {
4907 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4908 NEXT;
4909 } else if (RAW == '*') {
4910 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4911 NEXT;
4912 } else if (RAW == '+') {
4913 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4914 NEXT;
4915 } else {
4916 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4917 }
Owen Taylor3473f882001-02-23 17:55:21 +00004918 GROW;
4919 }
4920 SKIP_BLANKS;
4921 SHRINK;
4922 while (RAW != ')') {
4923 /*
4924 * Each loop we parse one separator and one element.
4925 */
4926 if (RAW == ',') {
4927 if (type == 0) type = CUR;
4928
4929 /*
4930 * Detect "Name | Name , Name" error
4931 */
4932 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004933 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004934 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004936 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004937 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004938 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004939 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004940 return(NULL);
4941 }
4942 NEXT;
4943
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004944 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004945 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004946 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004947 xmlFreeDocElementContent(ctxt->myDoc, last);
4948 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004949 return(NULL);
4950 }
4951 if (last == NULL) {
4952 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004953 if (ret != NULL)
4954 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004955 ret = cur = op;
4956 } else {
4957 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004958 if (op != NULL)
4959 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004960 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004961 if (last != NULL)
4962 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004963 cur =op;
4964 last = NULL;
4965 }
4966 } else if (RAW == '|') {
4967 if (type == 0) type = CUR;
4968
4969 /*
4970 * Detect "Name , Name | Name" error
4971 */
4972 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004973 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004974 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004975 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004976 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004977 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004978 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004979 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004980 return(NULL);
4981 }
4982 NEXT;
4983
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004984 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004986 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004987 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004989 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 return(NULL);
4991 }
4992 if (last == NULL) {
4993 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004994 if (ret != NULL)
4995 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004996 ret = cur = op;
4997 } else {
4998 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004999 if (op != NULL)
5000 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005001 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005002 if (last != NULL)
5003 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005004 cur =op;
5005 last = NULL;
5006 }
5007 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005008 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005009 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005010 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 return(NULL);
5012 }
5013 GROW;
5014 SKIP_BLANKS;
5015 GROW;
5016 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005017 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005018 /* Recurse on second child */
5019 NEXT;
5020 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005021 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005022 SKIP_BLANKS;
5023 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005024 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005025 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005026 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005028 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 return(NULL);
5030 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005031 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 if (RAW == '?') {
5033 last->ocur = XML_ELEMENT_CONTENT_OPT;
5034 NEXT;
5035 } else if (RAW == '*') {
5036 last->ocur = XML_ELEMENT_CONTENT_MULT;
5037 NEXT;
5038 } else if (RAW == '+') {
5039 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5040 NEXT;
5041 } else {
5042 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5043 }
5044 }
5045 SKIP_BLANKS;
5046 GROW;
5047 }
5048 if ((cur != NULL) && (last != NULL)) {
5049 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005050 if (last != NULL)
5051 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005052 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005053 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005054 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5055"Element content declaration doesn't start and stop in the same entity\n",
5056 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005057 }
Owen Taylor3473f882001-02-23 17:55:21 +00005058 NEXT;
5059 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005060 if (ret != NULL) {
5061 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5062 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5063 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5064 else
5065 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5066 }
Owen Taylor3473f882001-02-23 17:55:21 +00005067 NEXT;
5068 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005069 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005070 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005071 cur = ret;
5072 /*
5073 * Some normalization:
5074 * (a | b* | c?)* == (a | b | c)*
5075 */
5076 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5077 if ((cur->c1 != NULL) &&
5078 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5079 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5080 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5081 if ((cur->c2 != NULL) &&
5082 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5083 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5084 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5085 cur = cur->c2;
5086 }
5087 }
Owen Taylor3473f882001-02-23 17:55:21 +00005088 NEXT;
5089 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005090 if (ret != NULL) {
5091 int found = 0;
5092
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005093 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5094 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005096 else
5097 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005098 /*
5099 * Some normalization:
5100 * (a | b*)+ == (a | b)*
5101 * (a | b?)+ == (a | b)*
5102 */
5103 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5104 if ((cur->c1 != NULL) &&
5105 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5106 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5107 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5108 found = 1;
5109 }
5110 if ((cur->c2 != NULL) &&
5111 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5112 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5113 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5114 found = 1;
5115 }
5116 cur = cur->c2;
5117 }
5118 if (found)
5119 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5120 }
Owen Taylor3473f882001-02-23 17:55:21 +00005121 NEXT;
5122 }
5123 return(ret);
5124}
5125
5126/**
5127 * xmlParseElementContentDecl:
5128 * @ctxt: an XML parser context
5129 * @name: the name of the element being defined.
5130 * @result: the Element Content pointer will be stored here if any
5131 *
5132 * parse the declaration for an Element content either Mixed or Children,
5133 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5134 *
5135 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5136 *
5137 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5138 */
5139
5140int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005141xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005142 xmlElementContentPtr *result) {
5143
5144 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 int res;
5147
5148 *result = NULL;
5149
5150 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005151 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005152 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 return(-1);
5154 }
5155 NEXT;
5156 GROW;
5157 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005158 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005159 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005160 res = XML_ELEMENT_TYPE_MIXED;
5161 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005162 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 res = XML_ELEMENT_TYPE_ELEMENT;
5164 }
Owen Taylor3473f882001-02-23 17:55:21 +00005165 SKIP_BLANKS;
5166 *result = tree;
5167 return(res);
5168}
5169
5170/**
5171 * xmlParseElementDecl:
5172 * @ctxt: an XML parser context
5173 *
5174 * parse an Element declaration.
5175 *
5176 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5177 *
5178 * [ VC: Unique Element Type Declaration ]
5179 * No element type may be declared more than once
5180 *
5181 * Returns the type of the element, or -1 in case of error
5182 */
5183int
5184xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005185 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005186 int ret = -1;
5187 xmlElementContentPtr content = NULL;
5188
Daniel Veillard4c778d82005-01-23 17:37:44 +00005189 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005190 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005191 xmlParserInputPtr input = ctxt->input;
5192
5193 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005194 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5196 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005197 }
5198 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005199 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005201 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5202 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005203 return(-1);
5204 }
5205 while ((RAW == 0) && (ctxt->inputNr > 1))
5206 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005207 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5209 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005212 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005213 SKIP(5);
5214 /*
5215 * Element must always be empty.
5216 */
5217 ret = XML_ELEMENT_TYPE_EMPTY;
5218 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5219 (NXT(2) == 'Y')) {
5220 SKIP(3);
5221 /*
5222 * Element is a generic container.
5223 */
5224 ret = XML_ELEMENT_TYPE_ANY;
5225 } else if (RAW == '(') {
5226 ret = xmlParseElementContentDecl(ctxt, name, &content);
5227 } else {
5228 /*
5229 * [ WFC: PEs in Internal Subset ] error handling.
5230 */
5231 if ((RAW == '%') && (ctxt->external == 0) &&
5232 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005233 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005234 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005235 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005236 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005237 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5238 }
Owen Taylor3473f882001-02-23 17:55:21 +00005239 return(-1);
5240 }
5241
5242 SKIP_BLANKS;
5243 /*
5244 * Pop-up of finished entities.
5245 */
5246 while ((RAW == 0) && (ctxt->inputNr > 1))
5247 xmlPopInput(ctxt);
5248 SKIP_BLANKS;
5249
5250 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005251 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005252 if (content != NULL) {
5253 xmlFreeDocElementContent(ctxt->myDoc, content);
5254 }
Owen Taylor3473f882001-02-23 17:55:21 +00005255 } else {
5256 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005257 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5258 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005259 }
5260
5261 NEXT;
5262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 (ctxt->sax->elementDecl != NULL)) {
5264 if (content != NULL)
5265 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5267 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005268 if ((content != NULL) && (content->parent == NULL)) {
5269 /*
5270 * this is a trick: if xmlAddElementDecl is called,
5271 * instead of copying the full tree it is plugged directly
5272 * if called from the parser. Avoid duplicating the
5273 * interfaces or change the API/ABI
5274 */
5275 xmlFreeDocElementContent(ctxt->myDoc, content);
5276 }
5277 } else if (content != NULL) {
5278 xmlFreeDocElementContent(ctxt->myDoc, content);
5279 }
Owen Taylor3473f882001-02-23 17:55:21 +00005280 }
Owen Taylor3473f882001-02-23 17:55:21 +00005281 }
5282 return(ret);
5283}
5284
5285/**
Owen Taylor3473f882001-02-23 17:55:21 +00005286 * xmlParseConditionalSections
5287 * @ctxt: an XML parser context
5288 *
5289 * [61] conditionalSect ::= includeSect | ignoreSect
5290 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5291 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5292 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5293 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5294 */
5295
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005296static void
Owen Taylor3473f882001-02-23 17:55:21 +00005297xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5298 SKIP(3);
5299 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005300 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005301 SKIP(7);
5302 SKIP_BLANKS;
5303 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005304 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 } else {
5306 NEXT;
5307 }
5308 if (xmlParserDebugEntities) {
5309 if ((ctxt->input != NULL) && (ctxt->input->filename))
5310 xmlGenericError(xmlGenericErrorContext,
5311 "%s(%d): ", ctxt->input->filename,
5312 ctxt->input->line);
5313 xmlGenericError(xmlGenericErrorContext,
5314 "Entering INCLUDE Conditional Section\n");
5315 }
5316
5317 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5318 (NXT(2) != '>'))) {
5319 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005320 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005321
5322 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5323 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005324 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 NEXT;
5326 } else if (RAW == '%') {
5327 xmlParsePEReference(ctxt);
5328 } else
5329 xmlParseMarkupDecl(ctxt);
5330
5331 /*
5332 * Pop-up of finished entities.
5333 */
5334 while ((RAW == 0) && (ctxt->inputNr > 1))
5335 xmlPopInput(ctxt);
5336
Daniel Veillardfdc91562002-07-01 21:52:03 +00005337 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005338 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005339 break;
5340 }
5341 }
5342 if (xmlParserDebugEntities) {
5343 if ((ctxt->input != NULL) && (ctxt->input->filename))
5344 xmlGenericError(xmlGenericErrorContext,
5345 "%s(%d): ", ctxt->input->filename,
5346 ctxt->input->line);
5347 xmlGenericError(xmlGenericErrorContext,
5348 "Leaving INCLUDE Conditional Section\n");
5349 }
5350
Daniel Veillarda07050d2003-10-19 14:46:32 +00005351 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005352 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005353 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 int depth = 0;
5355
5356 SKIP(6);
5357 SKIP_BLANKS;
5358 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005359 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 } else {
5361 NEXT;
5362 }
5363 if (xmlParserDebugEntities) {
5364 if ((ctxt->input != NULL) && (ctxt->input->filename))
5365 xmlGenericError(xmlGenericErrorContext,
5366 "%s(%d): ", ctxt->input->filename,
5367 ctxt->input->line);
5368 xmlGenericError(xmlGenericErrorContext,
5369 "Entering IGNORE Conditional Section\n");
5370 }
5371
5372 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005373 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005374 * But disable SAX event generating DTD building in the meantime
5375 */
5376 state = ctxt->disableSAX;
5377 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005379 ctxt->instate = XML_PARSER_IGNORE;
5380
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005381 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005382 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5383 depth++;
5384 SKIP(3);
5385 continue;
5386 }
5387 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5388 if (--depth >= 0) SKIP(3);
5389 continue;
5390 }
5391 NEXT;
5392 continue;
5393 }
5394
5395 ctxt->disableSAX = state;
5396 ctxt->instate = instate;
5397
5398 if (xmlParserDebugEntities) {
5399 if ((ctxt->input != NULL) && (ctxt->input->filename))
5400 xmlGenericError(xmlGenericErrorContext,
5401 "%s(%d): ", ctxt->input->filename,
5402 ctxt->input->line);
5403 xmlGenericError(xmlGenericErrorContext,
5404 "Leaving IGNORE Conditional Section\n");
5405 }
5406
5407 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005408 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 }
5410
5411 if (RAW == 0)
5412 SHRINK;
5413
5414 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005415 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005416 } else {
5417 SKIP(3);
5418 }
5419}
5420
5421/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005422 * xmlParseMarkupDecl:
5423 * @ctxt: an XML parser context
5424 *
5425 * parse Markup declarations
5426 *
5427 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5428 * NotationDecl | PI | Comment
5429 *
5430 * [ VC: Proper Declaration/PE Nesting ]
5431 * Parameter-entity replacement text must be properly nested with
5432 * markup declarations. That is to say, if either the first character
5433 * or the last character of a markup declaration (markupdecl above) is
5434 * contained in the replacement text for a parameter-entity reference,
5435 * both must be contained in the same replacement text.
5436 *
5437 * [ WFC: PEs in Internal Subset ]
5438 * In the internal DTD subset, parameter-entity references can occur
5439 * only where markup declarations can occur, not within markup declarations.
5440 * (This does not apply to references that occur in external parameter
5441 * entities or to the external subset.)
5442 */
5443void
5444xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5445 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005446 if (CUR == '<') {
5447 if (NXT(1) == '!') {
5448 switch (NXT(2)) {
5449 case 'E':
5450 if (NXT(3) == 'L')
5451 xmlParseElementDecl(ctxt);
5452 else if (NXT(3) == 'N')
5453 xmlParseEntityDecl(ctxt);
5454 break;
5455 case 'A':
5456 xmlParseAttributeListDecl(ctxt);
5457 break;
5458 case 'N':
5459 xmlParseNotationDecl(ctxt);
5460 break;
5461 case '-':
5462 xmlParseComment(ctxt);
5463 break;
5464 default:
5465 /* there is an error but it will be detected later */
5466 break;
5467 }
5468 } else if (NXT(1) == '?') {
5469 xmlParsePI(ctxt);
5470 }
5471 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005472 /*
5473 * This is only for internal subset. On external entities,
5474 * the replacement is done before parsing stage
5475 */
5476 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5477 xmlParsePEReference(ctxt);
5478
5479 /*
5480 * Conditional sections are allowed from entities included
5481 * by PE References in the internal subset.
5482 */
5483 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5484 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5485 xmlParseConditionalSections(ctxt);
5486 }
5487 }
5488
5489 ctxt->instate = XML_PARSER_DTD;
5490}
5491
5492/**
5493 * xmlParseTextDecl:
5494 * @ctxt: an XML parser context
5495 *
5496 * parse an XML declaration header for external entities
5497 *
5498 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5499 *
5500 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5501 */
5502
5503void
5504xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5505 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005506 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005507
5508 /*
5509 * We know that '<?xml' is here.
5510 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005511 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005512 SKIP(5);
5513 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005514 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005515 return;
5516 }
5517
William M. Brack76e95df2003-10-18 16:20:14 +00005518 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005521 }
5522 SKIP_BLANKS;
5523
5524 /*
5525 * We may have the VersionInfo here.
5526 */
5527 version = xmlParseVersionInfo(ctxt);
5528 if (version == NULL)
5529 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005530 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005531 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5533 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005534 }
5535 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005536 ctxt->input->version = version;
5537
5538 /*
5539 * We must have the encoding declaration
5540 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005541 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005542 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5543 /*
5544 * The XML REC instructs us to stop parsing right here
5545 */
5546 return;
5547 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005548 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5549 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5550 "Missing encoding in text declaration\n");
5551 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005552
5553 SKIP_BLANKS;
5554 if ((RAW == '?') && (NXT(1) == '>')) {
5555 SKIP(2);
5556 } else if (RAW == '>') {
5557 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005558 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005559 NEXT;
5560 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005561 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005562 MOVETO_ENDTAG(CUR_PTR);
5563 NEXT;
5564 }
5565}
5566
5567/**
Owen Taylor3473f882001-02-23 17:55:21 +00005568 * xmlParseExternalSubset:
5569 * @ctxt: an XML parser context
5570 * @ExternalID: the external identifier
5571 * @SystemID: the system identifier (or URL)
5572 *
5573 * parse Markup declarations from an external subset
5574 *
5575 * [30] extSubset ::= textDecl? extSubsetDecl
5576 *
5577 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5578 */
5579void
5580xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5581 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005582 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005583 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005584 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005585 xmlParseTextDecl(ctxt);
5586 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5587 /*
5588 * The XML REC instructs us to stop parsing right here
5589 */
5590 ctxt->instate = XML_PARSER_EOF;
5591 return;
5592 }
5593 }
5594 if (ctxt->myDoc == NULL) {
5595 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5596 }
5597 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5598 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5599
5600 ctxt->instate = XML_PARSER_DTD;
5601 ctxt->external = 1;
5602 while (((RAW == '<') && (NXT(1) == '?')) ||
5603 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005604 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005605 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005606 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005607
5608 GROW;
5609 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5610 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005611 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005612 NEXT;
5613 } else if (RAW == '%') {
5614 xmlParsePEReference(ctxt);
5615 } else
5616 xmlParseMarkupDecl(ctxt);
5617
5618 /*
5619 * Pop-up of finished entities.
5620 */
5621 while ((RAW == 0) && (ctxt->inputNr > 1))
5622 xmlPopInput(ctxt);
5623
Daniel Veillardfdc91562002-07-01 21:52:03 +00005624 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005625 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005626 break;
5627 }
5628 }
5629
5630 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005631 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005632 }
5633
5634}
5635
5636/**
5637 * xmlParseReference:
5638 * @ctxt: an XML parser context
5639 *
5640 * parse and handle entity references in content, depending on the SAX
5641 * interface, this may end-up in a call to character() if this is a
5642 * CharRef, a predefined entity, if there is no reference() callback.
5643 * or if the parser was asked to switch to that mode.
5644 *
5645 * [67] Reference ::= EntityRef | CharRef
5646 */
5647void
5648xmlParseReference(xmlParserCtxtPtr ctxt) {
5649 xmlEntityPtr ent;
5650 xmlChar *val;
5651 if (RAW != '&') return;
5652
5653 if (NXT(1) == '#') {
5654 int i = 0;
5655 xmlChar out[10];
5656 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005657 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005658
5659 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5660 /*
5661 * So we are using non-UTF-8 buffers
5662 * Check that the char fit on 8bits, if not
5663 * generate a CharRef.
5664 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005665 if (value <= 0xFF) {
5666 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005667 out[1] = 0;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5669 (!ctxt->disableSAX))
5670 ctxt->sax->characters(ctxt->userData, out, 1);
5671 } else {
5672 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005673 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005674 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005675 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5677 (!ctxt->disableSAX))
5678 ctxt->sax->reference(ctxt->userData, out);
5679 }
5680 } else {
5681 /*
5682 * Just encode the value in UTF-8
5683 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005684 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005685 out[i] = 0;
5686 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5687 (!ctxt->disableSAX))
5688 ctxt->sax->characters(ctxt->userData, out, i);
5689 }
5690 } else {
5691 ent = xmlParseEntityRef(ctxt);
5692 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005693 if (!ctxt->wellFormed)
5694 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005695 if ((ent->name != NULL) &&
5696 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5697 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005698 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005699
5700
5701 /*
5702 * The first reference to the entity trigger a parsing phase
5703 * where the ent->children is filled with the result from
5704 * the parsing.
5705 */
5706 if (ent->children == NULL) {
5707 xmlChar *value;
5708 value = ent->content;
5709
5710 /*
5711 * Check that this entity is well formed
5712 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005713 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005714 (value[1] == 0) && (value[0] == '<') &&
5715 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5716 /*
5717 * DONE: get definite answer on this !!!
5718 * Lots of entity decls are used to declare a single
5719 * char
5720 * <!ENTITY lt "<">
5721 * Which seems to be valid since
5722 * 2.4: The ampersand character (&) and the left angle
5723 * bracket (<) may appear in their literal form only
5724 * when used ... They are also legal within the literal
5725 * entity value of an internal entity declaration;i
5726 * see "4.3.2 Well-Formed Parsed Entities".
5727 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5728 * Looking at the OASIS test suite and James Clark
5729 * tests, this is broken. However the XML REC uses
5730 * it. Is the XML REC not well-formed ????
5731 * This is a hack to avoid this problem
5732 *
5733 * ANSWER: since lt gt amp .. are already defined,
5734 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005735 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005736 * is lousy but acceptable.
5737 */
5738 list = xmlNewDocText(ctxt->myDoc, value);
5739 if (list != NULL) {
5740 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5741 (ent->children == NULL)) {
5742 ent->children = list;
5743 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005744 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 list->parent = (xmlNodePtr) ent;
5746 } else {
5747 xmlFreeNodeList(list);
5748 }
5749 } else if (list != NULL) {
5750 xmlFreeNodeList(list);
5751 }
5752 } else {
5753 /*
5754 * 4.3.2: An internal general parsed entity is well-formed
5755 * if its replacement text matches the production labeled
5756 * content.
5757 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005758
5759 void *user_data;
5760 /*
5761 * This is a bit hackish but this seems the best
5762 * way to make sure both SAX and DOM entity support
5763 * behaves okay.
5764 */
5765 if (ctxt->userData == ctxt)
5766 user_data = NULL;
5767 else
5768 user_data = ctxt->userData;
5769
Owen Taylor3473f882001-02-23 17:55:21 +00005770 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5771 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005772 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5773 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 ctxt->depth--;
5775 } else if (ent->etype ==
5776 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5777 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005778 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005779 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005780 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005781 ctxt->depth--;
5782 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005783 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005784 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5785 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005786 }
5787 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005788 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005789 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005790 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005791 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5792 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005793 (ent->children == NULL)) {
5794 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005795 if (ctxt->replaceEntities) {
5796 /*
5797 * Prune it directly in the generated document
5798 * except for single text nodes.
5799 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005800 if (((list->type == XML_TEXT_NODE) &&
5801 (list->next == NULL)) ||
5802 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005803 list->parent = (xmlNodePtr) ent;
5804 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005805 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005806 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005807 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005808 while (list != NULL) {
5809 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005810 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005811 if (list->next == NULL)
5812 ent->last = list;
5813 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005814 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005815 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005816#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005817 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5818 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005819#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005820 }
5821 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005822 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005823 while (list != NULL) {
5824 list->parent = (xmlNodePtr) ent;
5825 if (list->next == NULL)
5826 ent->last = list;
5827 list = list->next;
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 }
5830 } else {
5831 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005832 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005833 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005834 } else if ((ret != XML_ERR_OK) &&
5835 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005836 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005837 } else if (list != NULL) {
5838 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005839 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005840 }
5841 }
5842 }
5843 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5844 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5845 /*
5846 * Create a node.
5847 */
5848 ctxt->sax->reference(ctxt->userData, ent->name);
5849 return;
5850 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005851 /*
5852 * There is a problem on the handling of _private for entities
5853 * (bug 155816): Should we copy the content of the field from
5854 * the entity (possibly overwriting some value set by the user
5855 * when a copy is created), should we leave it alone, or should
5856 * we try to take care of different situations? The problem
5857 * is exacerbated by the usage of this field by the xmlReader.
5858 * To fix this bug, we look at _private on the created node
5859 * and, if it's NULL, we copy in whatever was in the entity.
5860 * If it's not NULL we leave it alone. This is somewhat of a
5861 * hack - maybe we should have further tests to determine
5862 * what to do.
5863 */
Owen Taylor3473f882001-02-23 17:55:21 +00005864 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5865 /*
5866 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005867 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005868 * In the first occurrence list contains the replacement.
5869 * progressive == 2 means we are operating on the Reader
5870 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005871 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005872 if (((list == NULL) && (ent->owner == 0)) ||
5873 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005874 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005875
5876 /*
5877 * when operating on a reader, the entities definitions
5878 * are always owning the entities subtree.
5879 if (ctxt->parseMode == XML_PARSE_READER)
5880 ent->owner = 1;
5881 */
5882
Daniel Veillard62f313b2001-07-04 19:49:14 +00005883 cur = ent->children;
5884 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005885 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005886 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005887 if (nw->_private == NULL)
5888 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005889 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005890 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005891 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005892 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005893 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005894 if (cur == ent->last) {
5895 /*
5896 * needed to detect some strange empty
5897 * node cases in the reader tests
5898 */
5899 if ((ctxt->parseMode == XML_PARSE_READER) &&
5900 (nw->type == XML_ELEMENT_NODE) &&
5901 (nw->children == NULL))
5902 nw->extra = 1;
5903
Daniel Veillard62f313b2001-07-04 19:49:14 +00005904 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005905 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005906 cur = cur->next;
5907 }
Daniel Veillard81273902003-09-30 00:43:48 +00005908#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005909 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005910 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005911#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005912 } else if (list == NULL) {
5913 xmlNodePtr nw = NULL, cur, next, last,
5914 firstChild = NULL;
5915 /*
5916 * Copy the entity child list and make it the new
5917 * entity child list. The goal is to make sure any
5918 * ID or REF referenced will be the one from the
5919 * document content and not the entity copy.
5920 */
5921 cur = ent->children;
5922 ent->children = NULL;
5923 last = ent->last;
5924 ent->last = NULL;
5925 while (cur != NULL) {
5926 next = cur->next;
5927 cur->next = NULL;
5928 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005929 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005930 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005931 if (nw->_private == NULL)
5932 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005933 if (firstChild == NULL){
5934 firstChild = cur;
5935 }
5936 xmlAddChild((xmlNodePtr) ent, nw);
5937 xmlAddChild(ctxt->node, cur);
5938 }
5939 if (cur == last)
5940 break;
5941 cur = next;
5942 }
5943 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005944#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005945 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5946 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005947#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005948 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005949 const xmlChar *nbktext;
5950
Daniel Veillard62f313b2001-07-04 19:49:14 +00005951 /*
5952 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005953 * node with a possible previous text one which
5954 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005955 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005956 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5957 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005958 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005959 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005960 if ((ent->last != ent->children) &&
5961 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005962 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005963 xmlAddChildList(ctxt->node, ent->children);
5964 }
5965
Owen Taylor3473f882001-02-23 17:55:21 +00005966 /*
5967 * This is to avoid a nasty side effect, see
5968 * characters() in SAX.c
5969 */
5970 ctxt->nodemem = 0;
5971 ctxt->nodelen = 0;
5972 return;
5973 } else {
5974 /*
5975 * Probably running in SAX mode
5976 */
5977 xmlParserInputPtr input;
5978
5979 input = xmlNewEntityInputStream(ctxt, ent);
5980 xmlPushInput(ctxt, input);
5981 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005982 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5983 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005984 xmlParseTextDecl(ctxt);
5985 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5986 /*
5987 * The XML REC instructs us to stop parsing right here
5988 */
5989 ctxt->instate = XML_PARSER_EOF;
5990 return;
5991 }
5992 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005993 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5994 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005995 }
5996 }
5997 return;
5998 }
5999 }
6000 } else {
6001 val = ent->content;
6002 if (val == NULL) return;
6003 /*
6004 * inline the entity.
6005 */
6006 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6007 (!ctxt->disableSAX))
6008 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6009 }
6010 }
6011}
6012
6013/**
6014 * xmlParseEntityRef:
6015 * @ctxt: an XML parser context
6016 *
6017 * parse ENTITY references declarations
6018 *
6019 * [68] EntityRef ::= '&' Name ';'
6020 *
6021 * [ WFC: Entity Declared ]
6022 * In a document without any DTD, a document with only an internal DTD
6023 * subset which contains no parameter entity references, or a document
6024 * with "standalone='yes'", the Name given in the entity reference
6025 * must match that in an entity declaration, except that well-formed
6026 * documents need not declare any of the following entities: amp, lt,
6027 * gt, apos, quot. The declaration of a parameter entity must precede
6028 * any reference to it. Similarly, the declaration of a general entity
6029 * must precede any reference to it which appears in a default value in an
6030 * attribute-list declaration. Note that if entities are declared in the
6031 * external subset or in external parameter entities, a non-validating
6032 * processor is not obligated to read and process their declarations;
6033 * for such documents, the rule that an entity must be declared is a
6034 * well-formedness constraint only if standalone='yes'.
6035 *
6036 * [ WFC: Parsed Entity ]
6037 * An entity reference must not contain the name of an unparsed entity
6038 *
6039 * Returns the xmlEntityPtr if found, or NULL otherwise.
6040 */
6041xmlEntityPtr
6042xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006043 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006044 xmlEntityPtr ent = NULL;
6045
6046 GROW;
6047
6048 if (RAW == '&') {
6049 NEXT;
6050 name = xmlParseName(ctxt);
6051 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006052 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6053 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006054 } else {
6055 if (RAW == ';') {
6056 NEXT;
6057 /*
6058 * Ask first SAX for entity resolution, otherwise try the
6059 * predefined set.
6060 */
6061 if (ctxt->sax != NULL) {
6062 if (ctxt->sax->getEntity != NULL)
6063 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006064 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006065 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006066 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6067 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006068 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006069 }
Owen Taylor3473f882001-02-23 17:55:21 +00006070 }
6071 /*
6072 * [ WFC: Entity Declared ]
6073 * In a document without any DTD, a document with only an
6074 * internal DTD subset which contains no parameter entity
6075 * references, or a document with "standalone='yes'", the
6076 * Name given in the entity reference must match that in an
6077 * entity declaration, except that well-formed documents
6078 * need not declare any of the following entities: amp, lt,
6079 * gt, apos, quot.
6080 * The declaration of a parameter entity must precede any
6081 * reference to it.
6082 * Similarly, the declaration of a general entity must
6083 * precede any reference to it which appears in a default
6084 * value in an attribute-list declaration. Note that if
6085 * entities are declared in the external subset or in
6086 * external parameter entities, a non-validating processor
6087 * is not obligated to read and process their declarations;
6088 * for such documents, the rule that an entity must be
6089 * declared is a well-formedness constraint only if
6090 * standalone='yes'.
6091 */
6092 if (ent == NULL) {
6093 if ((ctxt->standalone == 1) ||
6094 ((ctxt->hasExternalSubset == 0) &&
6095 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006096 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006097 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006099 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006100 "Entity '%s' not defined\n", name);
6101 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006102 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104
6105 /*
6106 * [ WFC: Parsed Entity ]
6107 * An entity reference must not contain the name of an
6108 * unparsed entity
6109 */
6110 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006111 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006112 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114
6115 /*
6116 * [ WFC: No External Entity References ]
6117 * Attribute values cannot contain direct or indirect
6118 * entity references to external entities.
6119 */
6120 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6121 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006122 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6123 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 }
6125 /*
6126 * [ WFC: No < in Attribute Values ]
6127 * The replacement text of any entity referred to directly or
6128 * indirectly in an attribute value (other than "&lt;") must
6129 * not contain a <.
6130 */
6131 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6132 (ent != NULL) &&
6133 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6134 (ent->content != NULL) &&
6135 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006136 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006137 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006138 }
6139
6140 /*
6141 * Internal check, no parameter entities here ...
6142 */
6143 else {
6144 switch (ent->etype) {
6145 case XML_INTERNAL_PARAMETER_ENTITY:
6146 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006147 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6148 "Attempt to reference the parameter entity '%s'\n",
6149 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006150 break;
6151 default:
6152 break;
6153 }
6154 }
6155
6156 /*
6157 * [ WFC: No Recursion ]
6158 * A parsed entity must not contain a recursive reference
6159 * to itself, either directly or indirectly.
6160 * Done somewhere else
6161 */
6162
6163 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006164 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
Owen Taylor3473f882001-02-23 17:55:21 +00006166 }
6167 }
6168 return(ent);
6169}
6170
6171/**
6172 * xmlParseStringEntityRef:
6173 * @ctxt: an XML parser context
6174 * @str: a pointer to an index in the string
6175 *
6176 * parse ENTITY references declarations, but this version parses it from
6177 * a string value.
6178 *
6179 * [68] EntityRef ::= '&' Name ';'
6180 *
6181 * [ WFC: Entity Declared ]
6182 * In a document without any DTD, a document with only an internal DTD
6183 * subset which contains no parameter entity references, or a document
6184 * with "standalone='yes'", the Name given in the entity reference
6185 * must match that in an entity declaration, except that well-formed
6186 * documents need not declare any of the following entities: amp, lt,
6187 * gt, apos, quot. The declaration of a parameter entity must precede
6188 * any reference to it. Similarly, the declaration of a general entity
6189 * must precede any reference to it which appears in a default value in an
6190 * attribute-list declaration. Note that if entities are declared in the
6191 * external subset or in external parameter entities, a non-validating
6192 * processor is not obligated to read and process their declarations;
6193 * for such documents, the rule that an entity must be declared is a
6194 * well-formedness constraint only if standalone='yes'.
6195 *
6196 * [ WFC: Parsed Entity ]
6197 * An entity reference must not contain the name of an unparsed entity
6198 *
6199 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6200 * is updated to the current location in the string.
6201 */
6202xmlEntityPtr
6203xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6204 xmlChar *name;
6205 const xmlChar *ptr;
6206 xmlChar cur;
6207 xmlEntityPtr ent = NULL;
6208
6209 if ((str == NULL) || (*str == NULL))
6210 return(NULL);
6211 ptr = *str;
6212 cur = *ptr;
6213 if (cur == '&') {
6214 ptr++;
6215 cur = *ptr;
6216 name = xmlParseStringName(ctxt, &ptr);
6217 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6219 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006220 } else {
6221 if (*ptr == ';') {
6222 ptr++;
6223 /*
6224 * Ask first SAX for entity resolution, otherwise try the
6225 * predefined set.
6226 */
6227 if (ctxt->sax != NULL) {
6228 if (ctxt->sax->getEntity != NULL)
6229 ent = ctxt->sax->getEntity(ctxt->userData, name);
6230 if (ent == NULL)
6231 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006232 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006233 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006234 }
Owen Taylor3473f882001-02-23 17:55:21 +00006235 }
6236 /*
6237 * [ WFC: Entity Declared ]
6238 * In a document without any DTD, a document with only an
6239 * internal DTD subset which contains no parameter entity
6240 * references, or a document with "standalone='yes'", the
6241 * Name given in the entity reference must match that in an
6242 * entity declaration, except that well-formed documents
6243 * need not declare any of the following entities: amp, lt,
6244 * gt, apos, quot.
6245 * The declaration of a parameter entity must precede any
6246 * reference to it.
6247 * Similarly, the declaration of a general entity must
6248 * precede any reference to it which appears in a default
6249 * value in an attribute-list declaration. Note that if
6250 * entities are declared in the external subset or in
6251 * external parameter entities, a non-validating processor
6252 * is not obligated to read and process their declarations;
6253 * for such documents, the rule that an entity must be
6254 * declared is a well-formedness constraint only if
6255 * standalone='yes'.
6256 */
6257 if (ent == NULL) {
6258 if ((ctxt->standalone == 1) ||
6259 ((ctxt->hasExternalSubset == 0) &&
6260 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006261 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006262 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006264 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006265 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006266 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006268 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270
6271 /*
6272 * [ WFC: Parsed Entity ]
6273 * An entity reference must not contain the name of an
6274 * unparsed entity
6275 */
6276 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006277 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006278 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006279 }
6280
6281 /*
6282 * [ WFC: No External Entity References ]
6283 * Attribute values cannot contain direct or indirect
6284 * entity references to external entities.
6285 */
6286 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006289 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006290 }
6291 /*
6292 * [ WFC: No < in Attribute Values ]
6293 * The replacement text of any entity referred to directly or
6294 * indirectly in an attribute value (other than "&lt;") must
6295 * not contain a <.
6296 */
6297 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6298 (ent != NULL) &&
6299 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6300 (ent->content != NULL) &&
6301 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006302 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6303 "'<' in entity '%s' is not allowed in attributes values\n",
6304 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 }
6306
6307 /*
6308 * Internal check, no parameter entities here ...
6309 */
6310 else {
6311 switch (ent->etype) {
6312 case XML_INTERNAL_PARAMETER_ENTITY:
6313 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006314 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6315 "Attempt to reference the parameter entity '%s'\n",
6316 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 break;
6318 default:
6319 break;
6320 }
6321 }
6322
6323 /*
6324 * [ WFC: No Recursion ]
6325 * A parsed entity must not contain a recursive reference
6326 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006327 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006328 */
6329
6330 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006331 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
6333 xmlFree(name);
6334 }
6335 }
6336 *str = ptr;
6337 return(ent);
6338}
6339
6340/**
6341 * xmlParsePEReference:
6342 * @ctxt: an XML parser context
6343 *
6344 * parse PEReference declarations
6345 * The entity content is handled directly by pushing it's content as
6346 * a new input stream.
6347 *
6348 * [69] PEReference ::= '%' Name ';'
6349 *
6350 * [ WFC: No Recursion ]
6351 * A parsed entity must not contain a recursive
6352 * reference to itself, either directly or indirectly.
6353 *
6354 * [ WFC: Entity Declared ]
6355 * In a document without any DTD, a document with only an internal DTD
6356 * subset which contains no parameter entity references, or a document
6357 * with "standalone='yes'", ... ... The declaration of a parameter
6358 * entity must precede any reference to it...
6359 *
6360 * [ VC: Entity Declared ]
6361 * In a document with an external subset or external parameter entities
6362 * with "standalone='no'", ... ... The declaration of a parameter entity
6363 * must precede any reference to it...
6364 *
6365 * [ WFC: In DTD ]
6366 * Parameter-entity references may only appear in the DTD.
6367 * NOTE: misleading but this is handled.
6368 */
6369void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006370xmlParsePEReference(xmlParserCtxtPtr ctxt)
6371{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006372 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 xmlEntityPtr entity = NULL;
6374 xmlParserInputPtr input;
6375
6376 if (RAW == '%') {
6377 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006378 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006379 if (name == NULL) {
6380 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6381 "xmlParsePEReference: no name\n");
6382 } else {
6383 if (RAW == ';') {
6384 NEXT;
6385 if ((ctxt->sax != NULL) &&
6386 (ctxt->sax->getParameterEntity != NULL))
6387 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6388 name);
6389 if (entity == NULL) {
6390 /*
6391 * [ WFC: Entity Declared ]
6392 * In a document without any DTD, a document with only an
6393 * internal DTD subset which contains no parameter entity
6394 * references, or a document with "standalone='yes'", ...
6395 * ... The declaration of a parameter entity must precede
6396 * any reference to it...
6397 */
6398 if ((ctxt->standalone == 1) ||
6399 ((ctxt->hasExternalSubset == 0) &&
6400 (ctxt->hasPErefs == 0))) {
6401 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6402 "PEReference: %%%s; not found\n",
6403 name);
6404 } else {
6405 /*
6406 * [ VC: Entity Declared ]
6407 * In a document with an external subset or external
6408 * parameter entities with "standalone='no'", ...
6409 * ... The declaration of a parameter entity must
6410 * precede any reference to it...
6411 */
6412 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6413 "PEReference: %%%s; not found\n",
6414 name, NULL);
6415 ctxt->valid = 0;
6416 }
6417 } else {
6418 /*
6419 * Internal checking in case the entity quest barfed
6420 */
6421 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6422 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6423 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6424 "Internal: %%%s; is not a parameter entity\n",
6425 name, NULL);
6426 } else if (ctxt->input->free != deallocblankswrapper) {
6427 input =
6428 xmlNewBlanksWrapperInputStream(ctxt, entity);
6429 xmlPushInput(ctxt, input);
6430 } else {
6431 /*
6432 * TODO !!!
6433 * handle the extra spaces added before and after
6434 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6435 */
6436 input = xmlNewEntityInputStream(ctxt, entity);
6437 xmlPushInput(ctxt, input);
6438 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006439 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006440 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006441 xmlParseTextDecl(ctxt);
6442 if (ctxt->errNo ==
6443 XML_ERR_UNSUPPORTED_ENCODING) {
6444 /*
6445 * The XML REC instructs us to stop parsing
6446 * right here
6447 */
6448 ctxt->instate = XML_PARSER_EOF;
6449 return;
6450 }
6451 }
6452 }
6453 }
6454 ctxt->hasPErefs = 1;
6455 } else {
6456 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6457 }
6458 }
Owen Taylor3473f882001-02-23 17:55:21 +00006459 }
6460}
6461
6462/**
6463 * xmlParseStringPEReference:
6464 * @ctxt: an XML parser context
6465 * @str: a pointer to an index in the string
6466 *
6467 * parse PEReference declarations
6468 *
6469 * [69] PEReference ::= '%' Name ';'
6470 *
6471 * [ WFC: No Recursion ]
6472 * A parsed entity must not contain a recursive
6473 * reference to itself, either directly or indirectly.
6474 *
6475 * [ WFC: Entity Declared ]
6476 * In a document without any DTD, a document with only an internal DTD
6477 * subset which contains no parameter entity references, or a document
6478 * with "standalone='yes'", ... ... The declaration of a parameter
6479 * entity must precede any reference to it...
6480 *
6481 * [ VC: Entity Declared ]
6482 * In a document with an external subset or external parameter entities
6483 * with "standalone='no'", ... ... The declaration of a parameter entity
6484 * must precede any reference to it...
6485 *
6486 * [ WFC: In DTD ]
6487 * Parameter-entity references may only appear in the DTD.
6488 * NOTE: misleading but this is handled.
6489 *
6490 * Returns the string of the entity content.
6491 * str is updated to the current value of the index
6492 */
6493xmlEntityPtr
6494xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6495 const xmlChar *ptr;
6496 xmlChar cur;
6497 xmlChar *name;
6498 xmlEntityPtr entity = NULL;
6499
6500 if ((str == NULL) || (*str == NULL)) return(NULL);
6501 ptr = *str;
6502 cur = *ptr;
6503 if (cur == '%') {
6504 ptr++;
6505 cur = *ptr;
6506 name = xmlParseStringName(ctxt, &ptr);
6507 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006508 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6509 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006510 } else {
6511 cur = *ptr;
6512 if (cur == ';') {
6513 ptr++;
6514 cur = *ptr;
6515 if ((ctxt->sax != NULL) &&
6516 (ctxt->sax->getParameterEntity != NULL))
6517 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6518 name);
6519 if (entity == NULL) {
6520 /*
6521 * [ WFC: Entity Declared ]
6522 * In a document without any DTD, a document with only an
6523 * internal DTD subset which contains no parameter entity
6524 * references, or a document with "standalone='yes'", ...
6525 * ... The declaration of a parameter entity must precede
6526 * any reference to it...
6527 */
6528 if ((ctxt->standalone == 1) ||
6529 ((ctxt->hasExternalSubset == 0) &&
6530 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006531 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006532 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006533 } else {
6534 /*
6535 * [ VC: Entity Declared ]
6536 * In a document with an external subset or external
6537 * parameter entities with "standalone='no'", ...
6538 * ... The declaration of a parameter entity must
6539 * precede any reference to it...
6540 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006541 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6542 "PEReference: %%%s; not found\n",
6543 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006544 ctxt->valid = 0;
6545 }
6546 } else {
6547 /*
6548 * Internal checking in case the entity quest barfed
6549 */
6550 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6551 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006552 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6553 "%%%s; is not a parameter entity\n",
6554 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006555 }
6556 }
6557 ctxt->hasPErefs = 1;
6558 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006559 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 xmlFree(name);
6562 }
6563 }
6564 *str = ptr;
6565 return(entity);
6566}
6567
6568/**
6569 * xmlParseDocTypeDecl:
6570 * @ctxt: an XML parser context
6571 *
6572 * parse a DOCTYPE declaration
6573 *
6574 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6575 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6576 *
6577 * [ VC: Root Element Type ]
6578 * The Name in the document type declaration must match the element
6579 * type of the root element.
6580 */
6581
6582void
6583xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006584 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006585 xmlChar *ExternalID = NULL;
6586 xmlChar *URI = NULL;
6587
6588 /*
6589 * We know that '<!DOCTYPE' has been detected.
6590 */
6591 SKIP(9);
6592
6593 SKIP_BLANKS;
6594
6595 /*
6596 * Parse the DOCTYPE name.
6597 */
6598 name = xmlParseName(ctxt);
6599 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006600 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6601 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 ctxt->intSubName = name;
6604
6605 SKIP_BLANKS;
6606
6607 /*
6608 * Check for SystemID and ExternalID
6609 */
6610 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6611
6612 if ((URI != NULL) || (ExternalID != NULL)) {
6613 ctxt->hasExternalSubset = 1;
6614 }
6615 ctxt->extSubURI = URI;
6616 ctxt->extSubSystem = ExternalID;
6617
6618 SKIP_BLANKS;
6619
6620 /*
6621 * Create and update the internal subset.
6622 */
6623 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6624 (!ctxt->disableSAX))
6625 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6626
6627 /*
6628 * Is there any internal subset declarations ?
6629 * they are handled separately in xmlParseInternalSubset()
6630 */
6631 if (RAW == '[')
6632 return;
6633
6634 /*
6635 * We should be at the end of the DOCTYPE declaration.
6636 */
6637 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006638 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006639 }
6640 NEXT;
6641}
6642
6643/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006644 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006645 * @ctxt: an XML parser context
6646 *
6647 * parse the internal subset declaration
6648 *
6649 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6650 */
6651
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006652static void
Owen Taylor3473f882001-02-23 17:55:21 +00006653xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6654 /*
6655 * Is there any DTD definition ?
6656 */
6657 if (RAW == '[') {
6658 ctxt->instate = XML_PARSER_DTD;
6659 NEXT;
6660 /*
6661 * Parse the succession of Markup declarations and
6662 * PEReferences.
6663 * Subsequence (markupdecl | PEReference | S)*
6664 */
6665 while (RAW != ']') {
6666 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006667 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006668
6669 SKIP_BLANKS;
6670 xmlParseMarkupDecl(ctxt);
6671 xmlParsePEReference(ctxt);
6672
6673 /*
6674 * Pop-up of finished entities.
6675 */
6676 while ((RAW == 0) && (ctxt->inputNr > 1))
6677 xmlPopInput(ctxt);
6678
6679 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006680 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006681 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006682 break;
6683 }
6684 }
6685 if (RAW == ']') {
6686 NEXT;
6687 SKIP_BLANKS;
6688 }
6689 }
6690
6691 /*
6692 * We should be at the end of the DOCTYPE declaration.
6693 */
6694 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006695 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006696 }
6697 NEXT;
6698}
6699
Daniel Veillard81273902003-09-30 00:43:48 +00006700#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006701/**
6702 * xmlParseAttribute:
6703 * @ctxt: an XML parser context
6704 * @value: a xmlChar ** used to store the value of the attribute
6705 *
6706 * parse an attribute
6707 *
6708 * [41] Attribute ::= Name Eq AttValue
6709 *
6710 * [ WFC: No External Entity References ]
6711 * Attribute values cannot contain direct or indirect entity references
6712 * to external entities.
6713 *
6714 * [ WFC: No < in Attribute Values ]
6715 * The replacement text of any entity referred to directly or indirectly in
6716 * an attribute value (other than "&lt;") must not contain a <.
6717 *
6718 * [ VC: Attribute Value Type ]
6719 * The attribute must have been declared; the value must be of the type
6720 * declared for it.
6721 *
6722 * [25] Eq ::= S? '=' S?
6723 *
6724 * With namespace:
6725 *
6726 * [NS 11] Attribute ::= QName Eq AttValue
6727 *
6728 * Also the case QName == xmlns:??? is handled independently as a namespace
6729 * definition.
6730 *
6731 * Returns the attribute name, and the value in *value.
6732 */
6733
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006734const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006735xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006736 const xmlChar *name;
6737 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006738
6739 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006740 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006741 name = xmlParseName(ctxt);
6742 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006744 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006745 return(NULL);
6746 }
6747
6748 /*
6749 * read the value
6750 */
6751 SKIP_BLANKS;
6752 if (RAW == '=') {
6753 NEXT;
6754 SKIP_BLANKS;
6755 val = xmlParseAttValue(ctxt);
6756 ctxt->instate = XML_PARSER_CONTENT;
6757 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006758 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006759 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006760 return(NULL);
6761 }
6762
6763 /*
6764 * Check that xml:lang conforms to the specification
6765 * No more registered as an error, just generate a warning now
6766 * since this was deprecated in XML second edition
6767 */
6768 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6769 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006770 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6771 "Malformed value for xml:lang : %s\n",
6772 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 }
6775
6776 /*
6777 * Check that xml:space conforms to the specification
6778 */
6779 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6780 if (xmlStrEqual(val, BAD_CAST "default"))
6781 *(ctxt->space) = 0;
6782 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6783 *(ctxt->space) = 1;
6784 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006785 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006786"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00006787 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006788 }
6789 }
6790
6791 *value = val;
6792 return(name);
6793}
6794
6795/**
6796 * xmlParseStartTag:
6797 * @ctxt: an XML parser context
6798 *
6799 * parse a start of tag either for rule element or
6800 * EmptyElement. In both case we don't parse the tag closing chars.
6801 *
6802 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6803 *
6804 * [ WFC: Unique Att Spec ]
6805 * No attribute name may appear more than once in the same start-tag or
6806 * empty-element tag.
6807 *
6808 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6809 *
6810 * [ WFC: Unique Att Spec ]
6811 * No attribute name may appear more than once in the same start-tag or
6812 * empty-element tag.
6813 *
6814 * With namespace:
6815 *
6816 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6817 *
6818 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6819 *
6820 * Returns the element name parsed
6821 */
6822
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006823const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006824xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006825 const xmlChar *name;
6826 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006827 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006828 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006829 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006830 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006831 int i;
6832
6833 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006834 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006835
6836 name = xmlParseName(ctxt);
6837 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006838 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006839 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006840 return(NULL);
6841 }
6842
6843 /*
6844 * Now parse the attributes, it ends up with the ending
6845 *
6846 * (S Attribute)* S?
6847 */
6848 SKIP_BLANKS;
6849 GROW;
6850
Daniel Veillard21a0f912001-02-25 19:54:14 +00006851 while ((RAW != '>') &&
6852 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006853 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006854 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006855 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006856
6857 attname = xmlParseAttribute(ctxt, &attvalue);
6858 if ((attname != NULL) && (attvalue != NULL)) {
6859 /*
6860 * [ WFC: Unique Att Spec ]
6861 * No attribute name may appear more than once in the same
6862 * start-tag or empty-element tag.
6863 */
6864 for (i = 0; i < nbatts;i += 2) {
6865 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006866 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006867 xmlFree(attvalue);
6868 goto failed;
6869 }
6870 }
Owen Taylor3473f882001-02-23 17:55:21 +00006871 /*
6872 * Add the pair to atts
6873 */
6874 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006875 maxatts = 22; /* allow for 10 attrs by default */
6876 atts = (const xmlChar **)
6877 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006878 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006879 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006880 if (attvalue != NULL)
6881 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006882 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006884 ctxt->atts = atts;
6885 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006886 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006887 const xmlChar **n;
6888
Owen Taylor3473f882001-02-23 17:55:21 +00006889 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006890 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006891 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006892 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006893 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006894 if (attvalue != NULL)
6895 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006896 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006897 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006898 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006899 ctxt->atts = atts;
6900 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006901 }
6902 atts[nbatts++] = attname;
6903 atts[nbatts++] = attvalue;
6904 atts[nbatts] = NULL;
6905 atts[nbatts + 1] = NULL;
6906 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006907 if (attvalue != NULL)
6908 xmlFree(attvalue);
6909 }
6910
6911failed:
6912
Daniel Veillard3772de32002-12-17 10:31:45 +00006913 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006914 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6915 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006916 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6918 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006919 }
6920 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006921 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6922 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006923 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6924 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006925 break;
6926 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006927 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006928 GROW;
6929 }
6930
6931 /*
6932 * SAX: Start of Element !
6933 */
6934 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006935 (!ctxt->disableSAX)) {
6936 if (nbatts > 0)
6937 ctxt->sax->startElement(ctxt->userData, name, atts);
6938 else
6939 ctxt->sax->startElement(ctxt->userData, name, NULL);
6940 }
Owen Taylor3473f882001-02-23 17:55:21 +00006941
6942 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006943 /* Free only the content strings */
6944 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006945 if (atts[i] != NULL)
6946 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006947 }
6948 return(name);
6949}
6950
6951/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006952 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006953 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006954 * @line: line of the start tag
6955 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006956 *
6957 * parse an end of tag
6958 *
6959 * [42] ETag ::= '</' Name S? '>'
6960 *
6961 * With namespace
6962 *
6963 * [NS 9] ETag ::= '</' QName S? '>'
6964 */
6965
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006966static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006967xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006968 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006969
6970 GROW;
6971 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006972 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006973 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006974 return;
6975 }
6976 SKIP(2);
6977
Daniel Veillard46de64e2002-05-29 08:21:33 +00006978 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006979
6980 /*
6981 * We should definitely be at the ending "S? '>'" part
6982 */
6983 GROW;
6984 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006985 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006986 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006987 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006988 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006989
6990 /*
6991 * [ WFC: Element Type Match ]
6992 * The Name in an element's end-tag must match the element type in the
6993 * start-tag.
6994 *
6995 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006996 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006997 if (name == NULL) name = BAD_CAST "unparseable";
6998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006999 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007000 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002
7003 /*
7004 * SAX: End of Tag
7005 */
7006 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7007 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007008 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007009
Daniel Veillarde57ec792003-09-10 10:50:59 +00007010 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007011 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007012 return;
7013}
7014
7015/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007016 * xmlParseEndTag:
7017 * @ctxt: an XML parser context
7018 *
7019 * parse an end of tag
7020 *
7021 * [42] ETag ::= '</' Name S? '>'
7022 *
7023 * With namespace
7024 *
7025 * [NS 9] ETag ::= '</' QName S? '>'
7026 */
7027
7028void
7029xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007030 xmlParseEndTag1(ctxt, 0);
7031}
Daniel Veillard81273902003-09-30 00:43:48 +00007032#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007033
7034/************************************************************************
7035 * *
7036 * SAX 2 specific operations *
7037 * *
7038 ************************************************************************/
7039
7040static const xmlChar *
7041xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7042 int len = 0, l;
7043 int c;
7044 int count = 0;
7045
7046 /*
7047 * Handler for more complex cases
7048 */
7049 GROW;
7050 c = CUR_CHAR(l);
7051 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007052 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 return(NULL);
7054 }
7055
7056 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007057 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007058 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007059 (IS_COMBINING(c)) ||
7060 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007061 if (count++ > 100) {
7062 count = 0;
7063 GROW;
7064 }
7065 len += l;
7066 NEXTL(l);
7067 c = CUR_CHAR(l);
7068 }
7069 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7070}
7071
7072/*
7073 * xmlGetNamespace:
7074 * @ctxt: an XML parser context
7075 * @prefix: the prefix to lookup
7076 *
7077 * Lookup the namespace name for the @prefix (which ca be NULL)
7078 * The prefix must come from the @ctxt->dict dictionnary
7079 *
7080 * Returns the namespace name or NULL if not bound
7081 */
7082static const xmlChar *
7083xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7084 int i;
7085
Daniel Veillarde57ec792003-09-10 10:50:59 +00007086 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007087 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007088 if (ctxt->nsTab[i] == prefix) {
7089 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7090 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007092 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007093 return(NULL);
7094}
7095
7096/**
7097 * xmlParseNCName:
7098 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007099 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007100 *
7101 * parse an XML name.
7102 *
7103 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7104 * CombiningChar | Extender
7105 *
7106 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7107 *
7108 * Returns the Name parsed or NULL
7109 */
7110
7111static const xmlChar *
7112xmlParseNCName(xmlParserCtxtPtr ctxt) {
7113 const xmlChar *in;
7114 const xmlChar *ret;
7115 int count = 0;
7116
7117 /*
7118 * Accelerator for simple ASCII names
7119 */
7120 in = ctxt->input->cur;
7121 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7122 ((*in >= 0x41) && (*in <= 0x5A)) ||
7123 (*in == '_')) {
7124 in++;
7125 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7126 ((*in >= 0x41) && (*in <= 0x5A)) ||
7127 ((*in >= 0x30) && (*in <= 0x39)) ||
7128 (*in == '_') || (*in == '-') ||
7129 (*in == '.'))
7130 in++;
7131 if ((*in > 0) && (*in < 0x80)) {
7132 count = in - ctxt->input->cur;
7133 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7134 ctxt->input->cur = in;
7135 ctxt->nbChars += count;
7136 ctxt->input->col += count;
7137 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007138 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007139 }
7140 return(ret);
7141 }
7142 }
7143 return(xmlParseNCNameComplex(ctxt));
7144}
7145
7146/**
7147 * xmlParseQName:
7148 * @ctxt: an XML parser context
7149 * @prefix: pointer to store the prefix part
7150 *
7151 * parse an XML Namespace QName
7152 *
7153 * [6] QName ::= (Prefix ':')? LocalPart
7154 * [7] Prefix ::= NCName
7155 * [8] LocalPart ::= NCName
7156 *
7157 * Returns the Name parsed or NULL
7158 */
7159
7160static const xmlChar *
7161xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7162 const xmlChar *l, *p;
7163
7164 GROW;
7165
7166 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007167 if (l == NULL) {
7168 if (CUR == ':') {
7169 l = xmlParseName(ctxt);
7170 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007171 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7172 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007173 *prefix = NULL;
7174 return(l);
7175 }
7176 }
7177 return(NULL);
7178 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007179 if (CUR == ':') {
7180 NEXT;
7181 p = l;
7182 l = xmlParseNCName(ctxt);
7183 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007184 xmlChar *tmp;
7185
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007186 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7187 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007188 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7189 p = xmlDictLookup(ctxt->dict, tmp, -1);
7190 if (tmp != NULL) xmlFree(tmp);
7191 *prefix = NULL;
7192 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007193 }
7194 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007195 xmlChar *tmp;
7196
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007197 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7198 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007199 NEXT;
7200 tmp = (xmlChar *) xmlParseName(ctxt);
7201 if (tmp != NULL) {
7202 tmp = xmlBuildQName(tmp, l, NULL, 0);
7203 l = xmlDictLookup(ctxt->dict, tmp, -1);
7204 if (tmp != NULL) xmlFree(tmp);
7205 *prefix = p;
7206 return(l);
7207 }
7208 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7209 l = xmlDictLookup(ctxt->dict, tmp, -1);
7210 if (tmp != NULL) xmlFree(tmp);
7211 *prefix = p;
7212 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007213 }
7214 *prefix = p;
7215 } else
7216 *prefix = NULL;
7217 return(l);
7218}
7219
7220/**
7221 * xmlParseQNameAndCompare:
7222 * @ctxt: an XML parser context
7223 * @name: the localname
7224 * @prefix: the prefix, if any.
7225 *
7226 * parse an XML name and compares for match
7227 * (specialized for endtag parsing)
7228 *
7229 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7230 * and the name for mismatch
7231 */
7232
7233static const xmlChar *
7234xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7235 xmlChar const *prefix) {
7236 const xmlChar *cmp = name;
7237 const xmlChar *in;
7238 const xmlChar *ret;
7239 const xmlChar *prefix2;
7240
7241 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7242
7243 GROW;
7244 in = ctxt->input->cur;
7245
7246 cmp = prefix;
7247 while (*in != 0 && *in == *cmp) {
7248 ++in;
7249 ++cmp;
7250 }
7251 if ((*cmp == 0) && (*in == ':')) {
7252 in++;
7253 cmp = name;
7254 while (*in != 0 && *in == *cmp) {
7255 ++in;
7256 ++cmp;
7257 }
William M. Brack76e95df2003-10-18 16:20:14 +00007258 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007259 /* success */
7260 ctxt->input->cur = in;
7261 return((const xmlChar*) 1);
7262 }
7263 }
7264 /*
7265 * all strings coms from the dictionary, equality can be done directly
7266 */
7267 ret = xmlParseQName (ctxt, &prefix2);
7268 if ((ret == name) && (prefix == prefix2))
7269 return((const xmlChar*) 1);
7270 return ret;
7271}
7272
7273/**
7274 * xmlParseAttValueInternal:
7275 * @ctxt: an XML parser context
7276 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007277 * @alloc: whether the attribute was reallocated as a new string
7278 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007279 *
7280 * parse a value for an attribute.
7281 * NOTE: if no normalization is needed, the routine will return pointers
7282 * directly from the data buffer.
7283 *
7284 * 3.3.3 Attribute-Value Normalization:
7285 * Before the value of an attribute is passed to the application or
7286 * checked for validity, the XML processor must normalize it as follows:
7287 * - a character reference is processed by appending the referenced
7288 * character to the attribute value
7289 * - an entity reference is processed by recursively processing the
7290 * replacement text of the entity
7291 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7292 * appending #x20 to the normalized value, except that only a single
7293 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7294 * parsed entity or the literal entity value of an internal parsed entity
7295 * - other characters are processed by appending them to the normalized value
7296 * If the declared value is not CDATA, then the XML processor must further
7297 * process the normalized attribute value by discarding any leading and
7298 * trailing space (#x20) characters, and by replacing sequences of space
7299 * (#x20) characters by a single space (#x20) character.
7300 * All attributes for which no declaration has been read should be treated
7301 * by a non-validating parser as if declared CDATA.
7302 *
7303 * Returns the AttValue parsed or NULL. The value has to be freed by the
7304 * caller if it was copied, this can be detected by val[*len] == 0.
7305 */
7306
7307static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007308xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7309 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007310{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007312 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 xmlChar *ret = NULL;
7314
7315 GROW;
7316 in = (xmlChar *) CUR_PTR;
7317 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007318 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007319 return (NULL);
7320 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007321 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007322
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007323 /*
7324 * try to handle in this routine the most common case where no
7325 * allocation of a new string is required and where content is
7326 * pure ASCII.
7327 */
7328 limit = *in++;
7329 end = ctxt->input->end;
7330 start = in;
7331 if (in >= end) {
7332 const xmlChar *oldbase = ctxt->input->base;
7333 GROW;
7334 if (oldbase != ctxt->input->base) {
7335 long delta = ctxt->input->base - oldbase;
7336 start = start + delta;
7337 in = in + delta;
7338 }
7339 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007340 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007341 if (normalize) {
7342 /*
7343 * Skip any leading spaces
7344 */
7345 while ((in < end) && (*in != limit) &&
7346 ((*in == 0x20) || (*in == 0x9) ||
7347 (*in == 0xA) || (*in == 0xD))) {
7348 in++;
7349 start = in;
7350 if (in >= end) {
7351 const xmlChar *oldbase = ctxt->input->base;
7352 GROW;
7353 if (oldbase != ctxt->input->base) {
7354 long delta = ctxt->input->base - oldbase;
7355 start = start + delta;
7356 in = in + delta;
7357 }
7358 end = ctxt->input->end;
7359 }
7360 }
7361 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7362 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7363 if ((*in++ == 0x20) && (*in == 0x20)) break;
7364 if (in >= end) {
7365 const xmlChar *oldbase = ctxt->input->base;
7366 GROW;
7367 if (oldbase != ctxt->input->base) {
7368 long delta = ctxt->input->base - oldbase;
7369 start = start + delta;
7370 in = in + delta;
7371 }
7372 end = ctxt->input->end;
7373 }
7374 }
7375 last = in;
7376 /*
7377 * skip the trailing blanks
7378 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007379 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007380 while ((in < end) && (*in != limit) &&
7381 ((*in == 0x20) || (*in == 0x9) ||
7382 (*in == 0xA) || (*in == 0xD))) {
7383 in++;
7384 if (in >= end) {
7385 const xmlChar *oldbase = ctxt->input->base;
7386 GROW;
7387 if (oldbase != ctxt->input->base) {
7388 long delta = ctxt->input->base - oldbase;
7389 start = start + delta;
7390 in = in + delta;
7391 last = last + delta;
7392 }
7393 end = ctxt->input->end;
7394 }
7395 }
7396 if (*in != limit) goto need_complex;
7397 } else {
7398 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7399 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7400 in++;
7401 if (in >= end) {
7402 const xmlChar *oldbase = ctxt->input->base;
7403 GROW;
7404 if (oldbase != ctxt->input->base) {
7405 long delta = ctxt->input->base - oldbase;
7406 start = start + delta;
7407 in = in + delta;
7408 }
7409 end = ctxt->input->end;
7410 }
7411 }
7412 last = in;
7413 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007414 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007415 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007417 *len = last - start;
7418 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007419 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007420 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007421 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007422 }
7423 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007424 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007425 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007426need_complex:
7427 if (alloc) *alloc = 1;
7428 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429}
7430
7431/**
7432 * xmlParseAttribute2:
7433 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007434 * @pref: the element prefix
7435 * @elem: the element name
7436 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007438 * @len: an int * to save the length of the attribute
7439 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 *
7441 * parse an attribute in the new SAX2 framework.
7442 *
7443 * Returns the attribute name, and the value in *value, .
7444 */
7445
7446static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007447xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7448 const xmlChar *pref, const xmlChar *elem,
7449 const xmlChar **prefix, xmlChar **value,
7450 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007451 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007452 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007453 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007454
7455 *value = NULL;
7456 GROW;
7457 name = xmlParseQName(ctxt, prefix);
7458 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007459 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7460 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 return(NULL);
7462 }
7463
7464 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007465 * get the type if needed
7466 */
7467 if (ctxt->attsSpecial != NULL) {
7468 int type;
7469
7470 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7471 pref, elem, *prefix, name);
7472 if (type != 0) normalize = 1;
7473 }
7474
7475 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 * read the value
7477 */
7478 SKIP_BLANKS;
7479 if (RAW == '=') {
7480 NEXT;
7481 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007482 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007483 ctxt->instate = XML_PARSER_CONTENT;
7484 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007485 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007486 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007487 return(NULL);
7488 }
7489
Daniel Veillardd8925572005-06-08 22:34:55 +00007490 if (*prefix == ctxt->str_xml) {
7491 /*
7492 * Check that xml:lang conforms to the specification
7493 * No more registered as an error, just generate a warning now
7494 * since this was deprecated in XML second edition
7495 */
7496 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7497 internal_val = xmlStrndup(val, *len);
7498 if (!xmlCheckLanguageID(internal_val)) {
7499 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7500 "Malformed value for xml:lang : %s\n",
7501 internal_val, NULL);
7502 }
7503 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504
Daniel Veillardd8925572005-06-08 22:34:55 +00007505 /*
7506 * Check that xml:space conforms to the specification
7507 */
7508 if (xmlStrEqual(name, BAD_CAST "space")) {
7509 internal_val = xmlStrndup(val, *len);
7510 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7511 *(ctxt->space) = 0;
7512 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7513 *(ctxt->space) = 1;
7514 else {
7515 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007517 internal_val, NULL);
7518 }
7519 }
7520 if (internal_val) {
7521 xmlFree(internal_val);
7522 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524
7525 *value = val;
7526 return(name);
7527}
7528
7529/**
7530 * xmlParseStartTag2:
7531 * @ctxt: an XML parser context
7532 *
7533 * parse a start of tag either for rule element or
7534 * EmptyElement. In both case we don't parse the tag closing chars.
7535 * This routine is called when running SAX2 parsing
7536 *
7537 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7538 *
7539 * [ WFC: Unique Att Spec ]
7540 * No attribute name may appear more than once in the same start-tag or
7541 * empty-element tag.
7542 *
7543 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7544 *
7545 * [ WFC: Unique Att Spec ]
7546 * No attribute name may appear more than once in the same start-tag or
7547 * empty-element tag.
7548 *
7549 * With namespace:
7550 *
7551 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7552 *
7553 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7554 *
7555 * Returns the element name parsed
7556 */
7557
7558static const xmlChar *
7559xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007560 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561 const xmlChar *localname;
7562 const xmlChar *prefix;
7563 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007564 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007565 const xmlChar *nsname;
7566 xmlChar *attvalue;
7567 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007569 int nratts, nbatts, nbdef;
7570 int i, j, nbNs, attval;
7571 const xmlChar *base;
7572 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007573 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007574
7575 if (RAW != '<') return(NULL);
7576 NEXT1;
7577
7578 /*
7579 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7580 * point since the attribute values may be stored as pointers to
7581 * the buffer and calling SHRINK would destroy them !
7582 * The Shrinking is only possible once the full set of attribute
7583 * callbacks have been done.
7584 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007585reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007586 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007587 base = ctxt->input->base;
7588 cur = ctxt->input->cur - ctxt->input->base;
7589 nbatts = 0;
7590 nratts = 0;
7591 nbdef = 0;
7592 nbNs = 0;
7593 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007594 /* Forget any namespaces added during an earlier parse of this element. */
7595 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007596
7597 localname = xmlParseQName(ctxt, &prefix);
7598 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7600 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007601 return(NULL);
7602 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007603 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007604
7605 /*
7606 * Now parse the attributes, it ends up with the ending
7607 *
7608 * (S Attribute)* S?
7609 */
7610 SKIP_BLANKS;
7611 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007612 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007613
7614 while ((RAW != '>') &&
7615 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007616 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 const xmlChar *q = CUR_PTR;
7618 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007620
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007621 attname = xmlParseAttribute2(ctxt, prefix, localname,
7622 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007623 if ((attname != NULL) && (attvalue != NULL)) {
7624 if (len < 0) len = xmlStrlen(attvalue);
7625 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007626 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7627 xmlURIPtr uri;
7628
7629 if (*URL != 0) {
7630 uri = xmlParseURI((const char *) URL);
7631 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007632 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7633 "xmlns: %s not a valid URI\n",
7634 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007635 } else {
7636 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007637 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7638 "xmlns: URI %s is not absolute\n",
7639 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007640 }
7641 xmlFreeURI(uri);
7642 }
7643 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007645 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007647 for (j = 1;j <= nbNs;j++)
7648 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7649 break;
7650 if (j <= nbNs)
7651 xmlErrAttributeDup(ctxt, NULL, attname);
7652 else
7653 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007654 if (alloc != 0) xmlFree(attvalue);
7655 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 continue;
7657 }
7658 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007659 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7660 xmlURIPtr uri;
7661
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007662 if (attname == ctxt->str_xml) {
7663 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007664 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7665 "xml namespace prefix mapped to wrong URI\n",
7666 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007667 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007668 /*
7669 * Do not keep a namespace definition node
7670 */
7671 if (alloc != 0) xmlFree(attvalue);
7672 SKIP_BLANKS;
7673 continue;
7674 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007675 uri = xmlParseURI((const char *) URL);
7676 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007677 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7678 "xmlns:%s: '%s' is not a valid URI\n",
7679 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007680 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007681 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007682 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7683 "xmlns:%s: URI %s is not absolute\n",
7684 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007685 }
7686 xmlFreeURI(uri);
7687 }
7688
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007690 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007691 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007692 for (j = 1;j <= nbNs;j++)
7693 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7694 break;
7695 if (j <= nbNs)
7696 xmlErrAttributeDup(ctxt, aprefix, attname);
7697 else
7698 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007699 if (alloc != 0) xmlFree(attvalue);
7700 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007701 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 continue;
7703 }
7704
7705 /*
7706 * Add the pair to atts
7707 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007708 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7709 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007710 if (attvalue[len] == 0)
7711 xmlFree(attvalue);
7712 goto failed;
7713 }
7714 maxatts = ctxt->maxatts;
7715 atts = ctxt->atts;
7716 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007717 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007718 atts[nbatts++] = attname;
7719 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007720 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 atts[nbatts++] = attvalue;
7722 attvalue += len;
7723 atts[nbatts++] = attvalue;
7724 /*
7725 * tag if some deallocation is needed
7726 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007727 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007728 } else {
7729 if ((attvalue != NULL) && (attvalue[len] == 0))
7730 xmlFree(attvalue);
7731 }
7732
7733failed:
7734
7735 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007736 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7738 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007739 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7741 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007742 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743 }
7744 SKIP_BLANKS;
7745 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7746 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007747 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 break;
7750 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007752 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 }
7754
Daniel Veillard0fb18932003-09-07 09:14:37 +00007755 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007756 * The attributes defaulting
7757 */
7758 if (ctxt->attsDefault != NULL) {
7759 xmlDefAttrsPtr defaults;
7760
7761 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7762 if (defaults != NULL) {
7763 for (i = 0;i < defaults->nbAttrs;i++) {
7764 attname = defaults->values[4 * i];
7765 aprefix = defaults->values[4 * i + 1];
7766
7767 /*
7768 * special work for namespaces defaulted defs
7769 */
7770 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7771 /*
7772 * check that it's not a defined namespace
7773 */
7774 for (j = 1;j <= nbNs;j++)
7775 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7776 break;
7777 if (j <= nbNs) continue;
7778
7779 nsname = xmlGetNamespace(ctxt, NULL);
7780 if (nsname != defaults->values[4 * i + 2]) {
7781 if (nsPush(ctxt, NULL,
7782 defaults->values[4 * i + 2]) > 0)
7783 nbNs++;
7784 }
7785 } else if (aprefix == ctxt->str_xmlns) {
7786 /*
7787 * check that it's not a defined namespace
7788 */
7789 for (j = 1;j <= nbNs;j++)
7790 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7791 break;
7792 if (j <= nbNs) continue;
7793
7794 nsname = xmlGetNamespace(ctxt, attname);
7795 if (nsname != defaults->values[2]) {
7796 if (nsPush(ctxt, attname,
7797 defaults->values[4 * i + 2]) > 0)
7798 nbNs++;
7799 }
7800 } else {
7801 /*
7802 * check that it's not a defined attribute
7803 */
7804 for (j = 0;j < nbatts;j+=5) {
7805 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7806 break;
7807 }
7808 if (j < nbatts) continue;
7809
7810 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7811 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007812 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007813 }
7814 maxatts = ctxt->maxatts;
7815 atts = ctxt->atts;
7816 }
7817 atts[nbatts++] = attname;
7818 atts[nbatts++] = aprefix;
7819 if (aprefix == NULL)
7820 atts[nbatts++] = NULL;
7821 else
7822 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7823 atts[nbatts++] = defaults->values[4 * i + 2];
7824 atts[nbatts++] = defaults->values[4 * i + 3];
7825 nbdef++;
7826 }
7827 }
7828 }
7829 }
7830
Daniel Veillarde70c8772003-11-25 07:21:18 +00007831 /*
7832 * The attributes checkings
7833 */
7834 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00007835 /*
7836 * The default namespace does not apply to attribute names.
7837 */
7838 if (atts[i + 1] != NULL) {
7839 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7840 if (nsname == NULL) {
7841 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7842 "Namespace prefix %s for %s on %s is not defined\n",
7843 atts[i + 1], atts[i], localname);
7844 }
7845 atts[i + 2] = nsname;
7846 } else
7847 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00007848 /*
7849 * [ WFC: Unique Att Spec ]
7850 * No attribute name may appear more than once in the same
7851 * start-tag or empty-element tag.
7852 * As extended by the Namespace in XML REC.
7853 */
7854 for (j = 0; j < i;j += 5) {
7855 if (atts[i] == atts[j]) {
7856 if (atts[i+1] == atts[j+1]) {
7857 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7858 break;
7859 }
7860 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7861 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7862 "Namespaced Attribute %s in '%s' redefined\n",
7863 atts[i], nsname, NULL);
7864 break;
7865 }
7866 }
7867 }
7868 }
7869
Daniel Veillarde57ec792003-09-10 10:50:59 +00007870 nsname = xmlGetNamespace(ctxt, prefix);
7871 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007872 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7873 "Namespace prefix %s on %s is not defined\n",
7874 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007875 }
7876 *pref = prefix;
7877 *URI = nsname;
7878
7879 /*
7880 * SAX: Start of Element !
7881 */
7882 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7883 (!ctxt->disableSAX)) {
7884 if (nbNs > 0)
7885 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7886 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7887 nbatts / 5, nbdef, atts);
7888 else
7889 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7890 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7891 }
7892
7893 /*
7894 * Free up attribute allocated strings if needed
7895 */
7896 if (attval != 0) {
7897 for (i = 3,j = 0; j < nratts;i += 5,j++)
7898 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7899 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900 }
7901
7902 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007903
7904base_changed:
7905 /*
7906 * the attribute strings are valid iif the base didn't changed
7907 */
7908 if (attval != 0) {
7909 for (i = 3,j = 0; j < nratts;i += 5,j++)
7910 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7911 xmlFree((xmlChar *) atts[i]);
7912 }
7913 ctxt->input->cur = ctxt->input->base + cur;
7914 if (ctxt->wellFormed == 1) {
7915 goto reparse;
7916 }
7917 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007918}
7919
7920/**
7921 * xmlParseEndTag2:
7922 * @ctxt: an XML parser context
7923 * @line: line of the start tag
7924 * @nsNr: number of namespaces on the start tag
7925 *
7926 * parse an end of tag
7927 *
7928 * [42] ETag ::= '</' Name S? '>'
7929 *
7930 * With namespace
7931 *
7932 * [NS 9] ETag ::= '</' QName S? '>'
7933 */
7934
7935static void
7936xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007937 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007938 const xmlChar *name;
7939
7940 GROW;
7941 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007942 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007943 return;
7944 }
7945 SKIP(2);
7946
William M. Brack13dfa872004-09-18 04:52:08 +00007947 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007948 if (ctxt->input->cur[tlen] == '>') {
7949 ctxt->input->cur += tlen + 1;
7950 goto done;
7951 }
7952 ctxt->input->cur += tlen;
7953 name = (xmlChar*)1;
7954 } else {
7955 if (prefix == NULL)
7956 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7957 else
7958 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7959 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007960
7961 /*
7962 * We should definitely be at the ending "S? '>'" part
7963 */
7964 GROW;
7965 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007966 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007967 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 } else
7969 NEXT1;
7970
7971 /*
7972 * [ WFC: Element Type Match ]
7973 * The Name in an element's end-tag must match the element type in the
7974 * start-tag.
7975 *
7976 */
7977 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007978 if (name == NULL) name = BAD_CAST "unparseable";
7979 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007980 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007981 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007982 }
7983
7984 /*
7985 * SAX: End of Tag
7986 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007987done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007988 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7989 (!ctxt->disableSAX))
7990 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7991
Daniel Veillard0fb18932003-09-07 09:14:37 +00007992 spacePop(ctxt);
7993 if (nsNr != 0)
7994 nsPop(ctxt, nsNr);
7995 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007996}
7997
7998/**
Owen Taylor3473f882001-02-23 17:55:21 +00007999 * xmlParseCDSect:
8000 * @ctxt: an XML parser context
8001 *
8002 * Parse escaped pure raw content.
8003 *
8004 * [18] CDSect ::= CDStart CData CDEnd
8005 *
8006 * [19] CDStart ::= '<![CDATA['
8007 *
8008 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8009 *
8010 * [21] CDEnd ::= ']]>'
8011 */
8012void
8013xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8014 xmlChar *buf = NULL;
8015 int len = 0;
8016 int size = XML_PARSER_BUFFER_SIZE;
8017 int r, rl;
8018 int s, sl;
8019 int cur, l;
8020 int count = 0;
8021
Daniel Veillard8f597c32003-10-06 08:19:27 +00008022 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008023 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008024 SKIP(9);
8025 } else
8026 return;
8027
8028 ctxt->instate = XML_PARSER_CDATA_SECTION;
8029 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008030 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008031 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008032 ctxt->instate = XML_PARSER_CONTENT;
8033 return;
8034 }
8035 NEXTL(rl);
8036 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008037 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008038 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008039 ctxt->instate = XML_PARSER_CONTENT;
8040 return;
8041 }
8042 NEXTL(sl);
8043 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008044 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008045 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008046 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008047 return;
8048 }
William M. Brack871611b2003-10-18 04:53:14 +00008049 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008050 ((r != ']') || (s != ']') || (cur != '>'))) {
8051 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008052 xmlChar *tmp;
8053
Owen Taylor3473f882001-02-23 17:55:21 +00008054 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008055 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8056 if (tmp == NULL) {
8057 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008058 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008059 return;
8060 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008061 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008062 }
8063 COPY_BUF(rl,buf,len,r);
8064 r = s;
8065 rl = sl;
8066 s = cur;
8067 sl = l;
8068 count++;
8069 if (count > 50) {
8070 GROW;
8071 count = 0;
8072 }
8073 NEXTL(l);
8074 cur = CUR_CHAR(l);
8075 }
8076 buf[len] = 0;
8077 ctxt->instate = XML_PARSER_CONTENT;
8078 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008079 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008080 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008081 xmlFree(buf);
8082 return;
8083 }
8084 NEXTL(l);
8085
8086 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008087 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008088 */
8089 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8090 if (ctxt->sax->cdataBlock != NULL)
8091 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008092 else if (ctxt->sax->characters != NULL)
8093 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008094 }
8095 xmlFree(buf);
8096}
8097
8098/**
8099 * xmlParseContent:
8100 * @ctxt: an XML parser context
8101 *
8102 * Parse a content:
8103 *
8104 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8105 */
8106
8107void
8108xmlParseContent(xmlParserCtxtPtr ctxt) {
8109 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008110 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008111 ((RAW != '<') || (NXT(1) != '/'))) {
8112 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008113 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008114 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008115
8116 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008117 * First case : a Processing Instruction.
8118 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008119 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008120 xmlParsePI(ctxt);
8121 }
8122
8123 /*
8124 * Second case : a CDSection
8125 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008126 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008127 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008128 xmlParseCDSect(ctxt);
8129 }
8130
8131 /*
8132 * Third case : a comment
8133 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008134 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008135 (NXT(2) == '-') && (NXT(3) == '-')) {
8136 xmlParseComment(ctxt);
8137 ctxt->instate = XML_PARSER_CONTENT;
8138 }
8139
8140 /*
8141 * Fourth case : a sub-element.
8142 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008143 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008144 xmlParseElement(ctxt);
8145 }
8146
8147 /*
8148 * Fifth case : a reference. If if has not been resolved,
8149 * parsing returns it's Name, create the node
8150 */
8151
Daniel Veillard21a0f912001-02-25 19:54:14 +00008152 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008153 xmlParseReference(ctxt);
8154 }
8155
8156 /*
8157 * Last case, text. Note that References are handled directly.
8158 */
8159 else {
8160 xmlParseCharData(ctxt, 0);
8161 }
8162
8163 GROW;
8164 /*
8165 * Pop-up of finished entities.
8166 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008167 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008168 xmlPopInput(ctxt);
8169 SHRINK;
8170
Daniel Veillardfdc91562002-07-01 21:52:03 +00008171 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008172 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8173 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008174 ctxt->instate = XML_PARSER_EOF;
8175 break;
8176 }
8177 }
8178}
8179
8180/**
8181 * xmlParseElement:
8182 * @ctxt: an XML parser context
8183 *
8184 * parse an XML element, this is highly recursive
8185 *
8186 * [39] element ::= EmptyElemTag | STag content ETag
8187 *
8188 * [ WFC: Element Type Match ]
8189 * The Name in an element's end-tag must match the element type in the
8190 * start-tag.
8191 *
Owen Taylor3473f882001-02-23 17:55:21 +00008192 */
8193
8194void
8195xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008196 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008197 const xmlChar *prefix;
8198 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008199 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008200 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008201 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008202 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008203
8204 /* Capture start position */
8205 if (ctxt->record_info) {
8206 node_info.begin_pos = ctxt->input->consumed +
8207 (CUR_PTR - ctxt->input->base);
8208 node_info.begin_line = ctxt->input->line;
8209 }
8210
8211 if (ctxt->spaceNr == 0)
8212 spacePush(ctxt, -1);
8213 else
8214 spacePush(ctxt, *ctxt->space);
8215
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008216 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008217#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008218 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008219#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008220 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008221#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222 else
8223 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008224#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008225 if (name == NULL) {
8226 spacePop(ctxt);
8227 return;
8228 }
8229 namePush(ctxt, name);
8230 ret = ctxt->node;
8231
Daniel Veillard4432df22003-09-28 18:58:27 +00008232#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008233 /*
8234 * [ VC: Root Element Type ]
8235 * The Name in the document type declaration must match the element
8236 * type of the root element.
8237 */
8238 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8239 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8240 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008241#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008242
8243 /*
8244 * Check for an Empty Element.
8245 */
8246 if ((RAW == '/') && (NXT(1) == '>')) {
8247 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 if (ctxt->sax2) {
8249 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8250 (!ctxt->disableSAX))
8251 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008252#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253 } else {
8254 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8255 (!ctxt->disableSAX))
8256 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008257#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008258 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008259 namePop(ctxt);
8260 spacePop(ctxt);
8261 if (nsNr != ctxt->nsNr)
8262 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008263 if ( ret != NULL && ctxt->record_info ) {
8264 node_info.end_pos = ctxt->input->consumed +
8265 (CUR_PTR - ctxt->input->base);
8266 node_info.end_line = ctxt->input->line;
8267 node_info.node = ret;
8268 xmlParserAddNodeInfo(ctxt, &node_info);
8269 }
8270 return;
8271 }
8272 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008273 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008274 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8276 "Couldn't find end of Start Tag %s line %d\n",
8277 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008278
8279 /*
8280 * end of parsing of this node.
8281 */
8282 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008284 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008285 if (nsNr != ctxt->nsNr)
8286 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008287
8288 /*
8289 * Capture end position and add node
8290 */
8291 if ( ret != NULL && ctxt->record_info ) {
8292 node_info.end_pos = ctxt->input->consumed +
8293 (CUR_PTR - ctxt->input->base);
8294 node_info.end_line = ctxt->input->line;
8295 node_info.node = ret;
8296 xmlParserAddNodeInfo(ctxt, &node_info);
8297 }
8298 return;
8299 }
8300
8301 /*
8302 * Parse the content of the element:
8303 */
8304 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008305 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008306 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008307 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008308 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008309
8310 /*
8311 * end of parsing of this node.
8312 */
8313 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008315 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 if (nsNr != ctxt->nsNr)
8317 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008318 return;
8319 }
8320
8321 /*
8322 * parse the end of tag: '</' should be here.
8323 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008324 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008325 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008326 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008327 }
8328#ifdef LIBXML_SAX1_ENABLED
8329 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008330 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008331#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008332
8333 /*
8334 * Capture end position and add node
8335 */
8336 if ( ret != NULL && ctxt->record_info ) {
8337 node_info.end_pos = ctxt->input->consumed +
8338 (CUR_PTR - ctxt->input->base);
8339 node_info.end_line = ctxt->input->line;
8340 node_info.node = ret;
8341 xmlParserAddNodeInfo(ctxt, &node_info);
8342 }
8343}
8344
8345/**
8346 * xmlParseVersionNum:
8347 * @ctxt: an XML parser context
8348 *
8349 * parse the XML version value.
8350 *
8351 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8352 *
8353 * Returns the string giving the XML version number, or NULL
8354 */
8355xmlChar *
8356xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8357 xmlChar *buf = NULL;
8358 int len = 0;
8359 int size = 10;
8360 xmlChar cur;
8361
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008362 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008363 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008364 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008365 return(NULL);
8366 }
8367 cur = CUR;
8368 while (((cur >= 'a') && (cur <= 'z')) ||
8369 ((cur >= 'A') && (cur <= 'Z')) ||
8370 ((cur >= '0') && (cur <= '9')) ||
8371 (cur == '_') || (cur == '.') ||
8372 (cur == ':') || (cur == '-')) {
8373 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008374 xmlChar *tmp;
8375
Owen Taylor3473f882001-02-23 17:55:21 +00008376 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008377 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8378 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008379 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008380 return(NULL);
8381 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008382 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008383 }
8384 buf[len++] = cur;
8385 NEXT;
8386 cur=CUR;
8387 }
8388 buf[len] = 0;
8389 return(buf);
8390}
8391
8392/**
8393 * xmlParseVersionInfo:
8394 * @ctxt: an XML parser context
8395 *
8396 * parse the XML version.
8397 *
8398 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8399 *
8400 * [25] Eq ::= S? '=' S?
8401 *
8402 * Returns the version string, e.g. "1.0"
8403 */
8404
8405xmlChar *
8406xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8407 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008408
Daniel Veillarda07050d2003-10-19 14:46:32 +00008409 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008410 SKIP(7);
8411 SKIP_BLANKS;
8412 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008413 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008414 return(NULL);
8415 }
8416 NEXT;
8417 SKIP_BLANKS;
8418 if (RAW == '"') {
8419 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008420 version = xmlParseVersionNum(ctxt);
8421 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008422 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008423 } else
8424 NEXT;
8425 } else if (RAW == '\''){
8426 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008427 version = xmlParseVersionNum(ctxt);
8428 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008429 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008430 } else
8431 NEXT;
8432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008433 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008434 }
8435 }
8436 return(version);
8437}
8438
8439/**
8440 * xmlParseEncName:
8441 * @ctxt: an XML parser context
8442 *
8443 * parse the XML encoding name
8444 *
8445 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8446 *
8447 * Returns the encoding name value or NULL
8448 */
8449xmlChar *
8450xmlParseEncName(xmlParserCtxtPtr ctxt) {
8451 xmlChar *buf = NULL;
8452 int len = 0;
8453 int size = 10;
8454 xmlChar cur;
8455
8456 cur = CUR;
8457 if (((cur >= 'a') && (cur <= 'z')) ||
8458 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008459 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008460 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008461 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008462 return(NULL);
8463 }
8464
8465 buf[len++] = cur;
8466 NEXT;
8467 cur = CUR;
8468 while (((cur >= 'a') && (cur <= 'z')) ||
8469 ((cur >= 'A') && (cur <= 'Z')) ||
8470 ((cur >= '0') && (cur <= '9')) ||
8471 (cur == '.') || (cur == '_') ||
8472 (cur == '-')) {
8473 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008474 xmlChar *tmp;
8475
Owen Taylor3473f882001-02-23 17:55:21 +00008476 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008477 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8478 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008480 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008481 return(NULL);
8482 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008483 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008484 }
8485 buf[len++] = cur;
8486 NEXT;
8487 cur = CUR;
8488 if (cur == 0) {
8489 SHRINK;
8490 GROW;
8491 cur = CUR;
8492 }
8493 }
8494 buf[len] = 0;
8495 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008496 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 }
8498 return(buf);
8499}
8500
8501/**
8502 * xmlParseEncodingDecl:
8503 * @ctxt: an XML parser context
8504 *
8505 * parse the XML encoding declaration
8506 *
8507 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8508 *
8509 * this setups the conversion filters.
8510 *
8511 * Returns the encoding value or NULL
8512 */
8513
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008514const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008515xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8516 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008517
8518 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008519 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008520 SKIP(8);
8521 SKIP_BLANKS;
8522 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008523 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008524 return(NULL);
8525 }
8526 NEXT;
8527 SKIP_BLANKS;
8528 if (RAW == '"') {
8529 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008530 encoding = xmlParseEncName(ctxt);
8531 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008533 } else
8534 NEXT;
8535 } else if (RAW == '\''){
8536 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 encoding = xmlParseEncName(ctxt);
8538 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008539 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008540 } else
8541 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008543 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008544 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008545 /*
8546 * UTF-16 encoding stwich has already taken place at this stage,
8547 * more over the little-endian/big-endian selection is already done
8548 */
8549 if ((encoding != NULL) &&
8550 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8551 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008552 if (ctxt->encoding != NULL)
8553 xmlFree((xmlChar *) ctxt->encoding);
8554 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008555 }
8556 /*
8557 * UTF-8 encoding is handled natively
8558 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008559 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008560 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8561 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008562 if (ctxt->encoding != NULL)
8563 xmlFree((xmlChar *) ctxt->encoding);
8564 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008565 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008566 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008567 xmlCharEncodingHandlerPtr handler;
8568
8569 if (ctxt->input->encoding != NULL)
8570 xmlFree((xmlChar *) ctxt->input->encoding);
8571 ctxt->input->encoding = encoding;
8572
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008573 handler = xmlFindCharEncodingHandler((const char *) encoding);
8574 if (handler != NULL) {
8575 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008576 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008578 "Unsupported encoding %s\n", encoding);
8579 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008580 }
8581 }
8582 }
8583 return(encoding);
8584}
8585
8586/**
8587 * xmlParseSDDecl:
8588 * @ctxt: an XML parser context
8589 *
8590 * parse the XML standalone declaration
8591 *
8592 * [32] SDDecl ::= S 'standalone' Eq
8593 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8594 *
8595 * [ VC: Standalone Document Declaration ]
8596 * TODO The standalone document declaration must have the value "no"
8597 * if any external markup declarations contain declarations of:
8598 * - attributes with default values, if elements to which these
8599 * attributes apply appear in the document without specifications
8600 * of values for these attributes, or
8601 * - entities (other than amp, lt, gt, apos, quot), if references
8602 * to those entities appear in the document, or
8603 * - attributes with values subject to normalization, where the
8604 * attribute appears in the document with a value which will change
8605 * as a result of normalization, or
8606 * - element types with element content, if white space occurs directly
8607 * within any instance of those types.
8608 *
8609 * Returns 1 if standalone, 0 otherwise
8610 */
8611
8612int
8613xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8614 int standalone = -1;
8615
8616 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008617 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008618 SKIP(10);
8619 SKIP_BLANKS;
8620 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008621 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008622 return(standalone);
8623 }
8624 NEXT;
8625 SKIP_BLANKS;
8626 if (RAW == '\''){
8627 NEXT;
8628 if ((RAW == 'n') && (NXT(1) == 'o')) {
8629 standalone = 0;
8630 SKIP(2);
8631 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8632 (NXT(2) == 's')) {
8633 standalone = 1;
8634 SKIP(3);
8635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008636 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008637 }
8638 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008639 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 } else
8641 NEXT;
8642 } else if (RAW == '"'){
8643 NEXT;
8644 if ((RAW == 'n') && (NXT(1) == 'o')) {
8645 standalone = 0;
8646 SKIP(2);
8647 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8648 (NXT(2) == 's')) {
8649 standalone = 1;
8650 SKIP(3);
8651 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008652 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008653 }
8654 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008655 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008656 } else
8657 NEXT;
8658 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008659 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008660 }
8661 }
8662 return(standalone);
8663}
8664
8665/**
8666 * xmlParseXMLDecl:
8667 * @ctxt: an XML parser context
8668 *
8669 * parse an XML declaration header
8670 *
8671 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8672 */
8673
8674void
8675xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8676 xmlChar *version;
8677
8678 /*
8679 * We know that '<?xml' is here.
8680 */
8681 SKIP(5);
8682
William M. Brack76e95df2003-10-18 16:20:14 +00008683 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008684 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8685 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008686 }
8687 SKIP_BLANKS;
8688
8689 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008690 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008691 */
8692 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008693 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008694 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008695 } else {
8696 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8697 /*
8698 * TODO: Blueberry should be detected here
8699 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008700 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8701 "Unsupported version '%s'\n",
8702 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008703 }
8704 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008705 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008706 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008707 }
Owen Taylor3473f882001-02-23 17:55:21 +00008708
8709 /*
8710 * We may have the encoding declaration
8711 */
William M. Brack76e95df2003-10-18 16:20:14 +00008712 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008713 if ((RAW == '?') && (NXT(1) == '>')) {
8714 SKIP(2);
8715 return;
8716 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008717 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008718 }
8719 xmlParseEncodingDecl(ctxt);
8720 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8721 /*
8722 * The XML REC instructs us to stop parsing right here
8723 */
8724 return;
8725 }
8726
8727 /*
8728 * We may have the standalone status.
8729 */
William M. Brack76e95df2003-10-18 16:20:14 +00008730 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008731 if ((RAW == '?') && (NXT(1) == '>')) {
8732 SKIP(2);
8733 return;
8734 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008736 }
8737 SKIP_BLANKS;
8738 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8739
8740 SKIP_BLANKS;
8741 if ((RAW == '?') && (NXT(1) == '>')) {
8742 SKIP(2);
8743 } else if (RAW == '>') {
8744 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008745 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008746 NEXT;
8747 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008748 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 MOVETO_ENDTAG(CUR_PTR);
8750 NEXT;
8751 }
8752}
8753
8754/**
8755 * xmlParseMisc:
8756 * @ctxt: an XML parser context
8757 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008758 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008759 *
8760 * [27] Misc ::= Comment | PI | S
8761 */
8762
8763void
8764xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008765 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008766 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008767 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008768 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008769 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008770 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008771 NEXT;
8772 } else
8773 xmlParseComment(ctxt);
8774 }
8775}
8776
8777/**
8778 * xmlParseDocument:
8779 * @ctxt: an XML parser context
8780 *
8781 * parse an XML document (and build a tree if using the standard SAX
8782 * interface).
8783 *
8784 * [1] document ::= prolog element Misc*
8785 *
8786 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8787 *
8788 * Returns 0, -1 in case of error. the parser context is augmented
8789 * as a result of the parsing.
8790 */
8791
8792int
8793xmlParseDocument(xmlParserCtxtPtr ctxt) {
8794 xmlChar start[4];
8795 xmlCharEncoding enc;
8796
8797 xmlInitParser();
8798
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008799 if ((ctxt == NULL) || (ctxt->input == NULL))
8800 return(-1);
8801
Owen Taylor3473f882001-02-23 17:55:21 +00008802 GROW;
8803
8804 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008805 * SAX: detecting the level.
8806 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008807 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008808
8809 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008810 * SAX: beginning of the document processing.
8811 */
8812 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8813 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8814
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008815 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8816 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008817 /*
8818 * Get the 4 first bytes and decode the charset
8819 * if enc != XML_CHAR_ENCODING_NONE
8820 * plug some encoding conversion routines.
8821 */
8822 start[0] = RAW;
8823 start[1] = NXT(1);
8824 start[2] = NXT(2);
8825 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008826 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008827 if (enc != XML_CHAR_ENCODING_NONE) {
8828 xmlSwitchEncoding(ctxt, enc);
8829 }
Owen Taylor3473f882001-02-23 17:55:21 +00008830 }
8831
8832
8833 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008835 }
8836
8837 /*
8838 * Check for the XMLDecl in the Prolog.
8839 */
8840 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008841 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008842
8843 /*
8844 * Note that we will switch encoding on the fly.
8845 */
8846 xmlParseXMLDecl(ctxt);
8847 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8848 /*
8849 * The XML REC instructs us to stop parsing right here
8850 */
8851 return(-1);
8852 }
8853 ctxt->standalone = ctxt->input->standalone;
8854 SKIP_BLANKS;
8855 } else {
8856 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8857 }
8858 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8859 ctxt->sax->startDocument(ctxt->userData);
8860
8861 /*
8862 * The Misc part of the Prolog
8863 */
8864 GROW;
8865 xmlParseMisc(ctxt);
8866
8867 /*
8868 * Then possibly doc type declaration(s) and more Misc
8869 * (doctypedecl Misc*)?
8870 */
8871 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008872 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008873
8874 ctxt->inSubset = 1;
8875 xmlParseDocTypeDecl(ctxt);
8876 if (RAW == '[') {
8877 ctxt->instate = XML_PARSER_DTD;
8878 xmlParseInternalSubset(ctxt);
8879 }
8880
8881 /*
8882 * Create and update the external subset.
8883 */
8884 ctxt->inSubset = 2;
8885 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8886 (!ctxt->disableSAX))
8887 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8888 ctxt->extSubSystem, ctxt->extSubURI);
8889 ctxt->inSubset = 0;
8890
8891
8892 ctxt->instate = XML_PARSER_PROLOG;
8893 xmlParseMisc(ctxt);
8894 }
8895
8896 /*
8897 * Time to start parsing the tree itself
8898 */
8899 GROW;
8900 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008901 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8902 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008903 } else {
8904 ctxt->instate = XML_PARSER_CONTENT;
8905 xmlParseElement(ctxt);
8906 ctxt->instate = XML_PARSER_EPILOG;
8907
8908
8909 /*
8910 * The Misc part at the end
8911 */
8912 xmlParseMisc(ctxt);
8913
Daniel Veillard561b7f82002-03-20 21:55:57 +00008914 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008915 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008916 }
8917 ctxt->instate = XML_PARSER_EOF;
8918 }
8919
8920 /*
8921 * SAX: end of the document processing.
8922 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008923 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008924 ctxt->sax->endDocument(ctxt->userData);
8925
Daniel Veillard5997aca2002-03-18 18:36:20 +00008926 /*
8927 * Remove locally kept entity definitions if the tree was not built
8928 */
8929 if ((ctxt->myDoc != NULL) &&
8930 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8931 xmlFreeDoc(ctxt->myDoc);
8932 ctxt->myDoc = NULL;
8933 }
8934
Daniel Veillardc7612992002-02-17 22:47:37 +00008935 if (! ctxt->wellFormed) {
8936 ctxt->valid = 0;
8937 return(-1);
8938 }
Owen Taylor3473f882001-02-23 17:55:21 +00008939 return(0);
8940}
8941
8942/**
8943 * xmlParseExtParsedEnt:
8944 * @ctxt: an XML parser context
8945 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008946 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008947 * An external general parsed entity is well-formed if it matches the
8948 * production labeled extParsedEnt.
8949 *
8950 * [78] extParsedEnt ::= TextDecl? content
8951 *
8952 * Returns 0, -1 in case of error. the parser context is augmented
8953 * as a result of the parsing.
8954 */
8955
8956int
8957xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8958 xmlChar start[4];
8959 xmlCharEncoding enc;
8960
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008961 if ((ctxt == NULL) || (ctxt->input == NULL))
8962 return(-1);
8963
Owen Taylor3473f882001-02-23 17:55:21 +00008964 xmlDefaultSAXHandlerInit();
8965
Daniel Veillard309f81d2003-09-23 09:02:53 +00008966 xmlDetectSAX2(ctxt);
8967
Owen Taylor3473f882001-02-23 17:55:21 +00008968 GROW;
8969
8970 /*
8971 * SAX: beginning of the document processing.
8972 */
8973 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8974 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8975
8976 /*
8977 * Get the 4 first bytes and decode the charset
8978 * if enc != XML_CHAR_ENCODING_NONE
8979 * plug some encoding conversion routines.
8980 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008981 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8982 start[0] = RAW;
8983 start[1] = NXT(1);
8984 start[2] = NXT(2);
8985 start[3] = NXT(3);
8986 enc = xmlDetectCharEncoding(start, 4);
8987 if (enc != XML_CHAR_ENCODING_NONE) {
8988 xmlSwitchEncoding(ctxt, enc);
8989 }
Owen Taylor3473f882001-02-23 17:55:21 +00008990 }
8991
8992
8993 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008994 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008995 }
8996
8997 /*
8998 * Check for the XMLDecl in the Prolog.
8999 */
9000 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009001 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009002
9003 /*
9004 * Note that we will switch encoding on the fly.
9005 */
9006 xmlParseXMLDecl(ctxt);
9007 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9008 /*
9009 * The XML REC instructs us to stop parsing right here
9010 */
9011 return(-1);
9012 }
9013 SKIP_BLANKS;
9014 } else {
9015 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9016 }
9017 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9018 ctxt->sax->startDocument(ctxt->userData);
9019
9020 /*
9021 * Doing validity checking on chunk doesn't make sense
9022 */
9023 ctxt->instate = XML_PARSER_CONTENT;
9024 ctxt->validate = 0;
9025 ctxt->loadsubset = 0;
9026 ctxt->depth = 0;
9027
9028 xmlParseContent(ctxt);
9029
9030 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009031 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009032 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009034 }
9035
9036 /*
9037 * SAX: end of the document processing.
9038 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009039 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009040 ctxt->sax->endDocument(ctxt->userData);
9041
9042 if (! ctxt->wellFormed) return(-1);
9043 return(0);
9044}
9045
Daniel Veillard73b013f2003-09-30 12:36:01 +00009046#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009047/************************************************************************
9048 * *
9049 * Progressive parsing interfaces *
9050 * *
9051 ************************************************************************/
9052
9053/**
9054 * xmlParseLookupSequence:
9055 * @ctxt: an XML parser context
9056 * @first: the first char to lookup
9057 * @next: the next char to lookup or zero
9058 * @third: the next char to lookup or zero
9059 *
9060 * Try to find if a sequence (first, next, third) or just (first next) or
9061 * (first) is available in the input stream.
9062 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9063 * to avoid rescanning sequences of bytes, it DOES change the state of the
9064 * parser, do not use liberally.
9065 *
9066 * Returns the index to the current parsing point if the full sequence
9067 * is available, -1 otherwise.
9068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009069static int
Owen Taylor3473f882001-02-23 17:55:21 +00009070xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9071 xmlChar next, xmlChar third) {
9072 int base, len;
9073 xmlParserInputPtr in;
9074 const xmlChar *buf;
9075
9076 in = ctxt->input;
9077 if (in == NULL) return(-1);
9078 base = in->cur - in->base;
9079 if (base < 0) return(-1);
9080 if (ctxt->checkIndex > base)
9081 base = ctxt->checkIndex;
9082 if (in->buf == NULL) {
9083 buf = in->base;
9084 len = in->length;
9085 } else {
9086 buf = in->buf->buffer->content;
9087 len = in->buf->buffer->use;
9088 }
9089 /* take into account the sequence length */
9090 if (third) len -= 2;
9091 else if (next) len --;
9092 for (;base < len;base++) {
9093 if (buf[base] == first) {
9094 if (third != 0) {
9095 if ((buf[base + 1] != next) ||
9096 (buf[base + 2] != third)) continue;
9097 } else if (next != 0) {
9098 if (buf[base + 1] != next) continue;
9099 }
9100 ctxt->checkIndex = 0;
9101#ifdef DEBUG_PUSH
9102 if (next == 0)
9103 xmlGenericError(xmlGenericErrorContext,
9104 "PP: lookup '%c' found at %d\n",
9105 first, base);
9106 else if (third == 0)
9107 xmlGenericError(xmlGenericErrorContext,
9108 "PP: lookup '%c%c' found at %d\n",
9109 first, next, base);
9110 else
9111 xmlGenericError(xmlGenericErrorContext,
9112 "PP: lookup '%c%c%c' found at %d\n",
9113 first, next, third, base);
9114#endif
9115 return(base - (in->cur - in->base));
9116 }
9117 }
9118 ctxt->checkIndex = base;
9119#ifdef DEBUG_PUSH
9120 if (next == 0)
9121 xmlGenericError(xmlGenericErrorContext,
9122 "PP: lookup '%c' failed\n", first);
9123 else if (third == 0)
9124 xmlGenericError(xmlGenericErrorContext,
9125 "PP: lookup '%c%c' failed\n", first, next);
9126 else
9127 xmlGenericError(xmlGenericErrorContext,
9128 "PP: lookup '%c%c%c' failed\n", first, next, third);
9129#endif
9130 return(-1);
9131}
9132
9133/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009134 * xmlParseGetLasts:
9135 * @ctxt: an XML parser context
9136 * @lastlt: pointer to store the last '<' from the input
9137 * @lastgt: pointer to store the last '>' from the input
9138 *
9139 * Lookup the last < and > in the current chunk
9140 */
9141static void
9142xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9143 const xmlChar **lastgt) {
9144 const xmlChar *tmp;
9145
9146 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9147 xmlGenericError(xmlGenericErrorContext,
9148 "Internal error: xmlParseGetLasts\n");
9149 return;
9150 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009151 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009152 tmp = ctxt->input->end;
9153 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009154 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009155 if (tmp < ctxt->input->base) {
9156 *lastlt = NULL;
9157 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009158 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009159 *lastlt = tmp;
9160 tmp++;
9161 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9162 if (*tmp == '\'') {
9163 tmp++;
9164 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9165 if (tmp < ctxt->input->end) tmp++;
9166 } else if (*tmp == '"') {
9167 tmp++;
9168 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9169 if (tmp < ctxt->input->end) tmp++;
9170 } else
9171 tmp++;
9172 }
9173 if (tmp < ctxt->input->end)
9174 *lastgt = tmp;
9175 else {
9176 tmp = *lastlt;
9177 tmp--;
9178 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9179 if (tmp >= ctxt->input->base)
9180 *lastgt = tmp;
9181 else
9182 *lastgt = NULL;
9183 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009184 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009185 } else {
9186 *lastlt = NULL;
9187 *lastgt = NULL;
9188 }
9189}
9190/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009191 * xmlCheckCdataPush:
9192 * @cur: pointer to the bock of characters
9193 * @len: length of the block in bytes
9194 *
9195 * Check that the block of characters is okay as SCdata content [20]
9196 *
9197 * Returns the number of bytes to pass if okay, a negative index where an
9198 * UTF-8 error occured otherwise
9199 */
9200static int
9201xmlCheckCdataPush(const xmlChar *utf, int len) {
9202 int ix;
9203 unsigned char c;
9204 int codepoint;
9205
9206 if ((utf == NULL) || (len <= 0))
9207 return(0);
9208
9209 for (ix = 0; ix < len;) { /* string is 0-terminated */
9210 c = utf[ix];
9211 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9212 if (c >= 0x20)
9213 ix++;
9214 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9215 ix++;
9216 else
9217 return(-ix);
9218 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9219 if (ix + 2 > len) return(ix);
9220 if ((utf[ix+1] & 0xc0 ) != 0x80)
9221 return(-ix);
9222 codepoint = (utf[0] & 0x1f) << 6;
9223 codepoint |= utf[1] & 0x3f;
9224 if (!xmlIsCharQ(codepoint))
9225 return(-ix);
9226 ix += 2;
9227 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9228 if (ix + 3 > len) return(ix);
9229 if (((utf[ix+1] & 0xc0) != 0x80) ||
9230 ((utf[ix+2] & 0xc0) != 0x80))
9231 return(-ix);
9232 codepoint = (utf[0] & 0xf) << 12;
9233 codepoint |= (utf[1] & 0x3f) << 6;
9234 codepoint |= utf[2] & 0x3f;
9235 if (!xmlIsCharQ(codepoint))
9236 return(-ix);
9237 ix += 3;
9238 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9239 if (ix + 4 > len) return(ix);
9240 if (((utf[ix+1] & 0xc0) != 0x80) ||
9241 ((utf[ix+2] & 0xc0) != 0x80) ||
9242 ((utf[ix+3] & 0xc0) != 0x80))
9243 return(-ix);
9244 codepoint = (utf[0] & 0x7) << 18;
9245 codepoint |= (utf[1] & 0x3f) << 12;
9246 codepoint |= (utf[2] & 0x3f) << 6;
9247 codepoint |= utf[3] & 0x3f;
9248 if (!xmlIsCharQ(codepoint))
9249 return(-ix);
9250 ix += 4;
9251 } else /* unknown encoding */
9252 return(-ix);
9253 }
9254 return(ix);
9255}
9256
9257/**
Owen Taylor3473f882001-02-23 17:55:21 +00009258 * xmlParseTryOrFinish:
9259 * @ctxt: an XML parser context
9260 * @terminate: last chunk indicator
9261 *
9262 * Try to progress on parsing
9263 *
9264 * Returns zero if no parsing was possible
9265 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009266static int
Owen Taylor3473f882001-02-23 17:55:21 +00009267xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9268 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009269 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009270 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009271 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009272
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009273 if (ctxt->input == NULL)
9274 return(0);
9275
Owen Taylor3473f882001-02-23 17:55:21 +00009276#ifdef DEBUG_PUSH
9277 switch (ctxt->instate) {
9278 case XML_PARSER_EOF:
9279 xmlGenericError(xmlGenericErrorContext,
9280 "PP: try EOF\n"); break;
9281 case XML_PARSER_START:
9282 xmlGenericError(xmlGenericErrorContext,
9283 "PP: try START\n"); break;
9284 case XML_PARSER_MISC:
9285 xmlGenericError(xmlGenericErrorContext,
9286 "PP: try MISC\n");break;
9287 case XML_PARSER_COMMENT:
9288 xmlGenericError(xmlGenericErrorContext,
9289 "PP: try COMMENT\n");break;
9290 case XML_PARSER_PROLOG:
9291 xmlGenericError(xmlGenericErrorContext,
9292 "PP: try PROLOG\n");break;
9293 case XML_PARSER_START_TAG:
9294 xmlGenericError(xmlGenericErrorContext,
9295 "PP: try START_TAG\n");break;
9296 case XML_PARSER_CONTENT:
9297 xmlGenericError(xmlGenericErrorContext,
9298 "PP: try CONTENT\n");break;
9299 case XML_PARSER_CDATA_SECTION:
9300 xmlGenericError(xmlGenericErrorContext,
9301 "PP: try CDATA_SECTION\n");break;
9302 case XML_PARSER_END_TAG:
9303 xmlGenericError(xmlGenericErrorContext,
9304 "PP: try END_TAG\n");break;
9305 case XML_PARSER_ENTITY_DECL:
9306 xmlGenericError(xmlGenericErrorContext,
9307 "PP: try ENTITY_DECL\n");break;
9308 case XML_PARSER_ENTITY_VALUE:
9309 xmlGenericError(xmlGenericErrorContext,
9310 "PP: try ENTITY_VALUE\n");break;
9311 case XML_PARSER_ATTRIBUTE_VALUE:
9312 xmlGenericError(xmlGenericErrorContext,
9313 "PP: try ATTRIBUTE_VALUE\n");break;
9314 case XML_PARSER_DTD:
9315 xmlGenericError(xmlGenericErrorContext,
9316 "PP: try DTD\n");break;
9317 case XML_PARSER_EPILOG:
9318 xmlGenericError(xmlGenericErrorContext,
9319 "PP: try EPILOG\n");break;
9320 case XML_PARSER_PI:
9321 xmlGenericError(xmlGenericErrorContext,
9322 "PP: try PI\n");break;
9323 case XML_PARSER_IGNORE:
9324 xmlGenericError(xmlGenericErrorContext,
9325 "PP: try IGNORE\n");break;
9326 }
9327#endif
9328
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009329 if ((ctxt->input != NULL) &&
9330 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009331 xmlSHRINK(ctxt);
9332 ctxt->checkIndex = 0;
9333 }
9334 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009335
Daniel Veillarda880b122003-04-21 21:36:41 +00009336 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009337 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009338 return(0);
9339
9340
Owen Taylor3473f882001-02-23 17:55:21 +00009341 /*
9342 * Pop-up of finished entities.
9343 */
9344 while ((RAW == 0) && (ctxt->inputNr > 1))
9345 xmlPopInput(ctxt);
9346
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009347 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009348 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009349 avail = ctxt->input->length -
9350 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009351 else {
9352 /*
9353 * If we are operating on converted input, try to flush
9354 * remainng chars to avoid them stalling in the non-converted
9355 * buffer.
9356 */
9357 if ((ctxt->input->buf->raw != NULL) &&
9358 (ctxt->input->buf->raw->use > 0)) {
9359 int base = ctxt->input->base -
9360 ctxt->input->buf->buffer->content;
9361 int current = ctxt->input->cur - ctxt->input->base;
9362
9363 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9364 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9365 ctxt->input->cur = ctxt->input->base + current;
9366 ctxt->input->end =
9367 &ctxt->input->buf->buffer->content[
9368 ctxt->input->buf->buffer->use];
9369 }
9370 avail = ctxt->input->buf->buffer->use -
9371 (ctxt->input->cur - ctxt->input->base);
9372 }
Owen Taylor3473f882001-02-23 17:55:21 +00009373 if (avail < 1)
9374 goto done;
9375 switch (ctxt->instate) {
9376 case XML_PARSER_EOF:
9377 /*
9378 * Document parsing is done !
9379 */
9380 goto done;
9381 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009382 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9383 xmlChar start[4];
9384 xmlCharEncoding enc;
9385
9386 /*
9387 * Very first chars read from the document flow.
9388 */
9389 if (avail < 4)
9390 goto done;
9391
9392 /*
9393 * Get the 4 first bytes and decode the charset
9394 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009395 * plug some encoding conversion routines,
9396 * else xmlSwitchEncoding will set to (default)
9397 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009398 */
9399 start[0] = RAW;
9400 start[1] = NXT(1);
9401 start[2] = NXT(2);
9402 start[3] = NXT(3);
9403 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009404 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009405 break;
9406 }
Owen Taylor3473f882001-02-23 17:55:21 +00009407
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009408 if (avail < 2)
9409 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009410 cur = ctxt->input->cur[0];
9411 next = ctxt->input->cur[1];
9412 if (cur == 0) {
9413 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9414 ctxt->sax->setDocumentLocator(ctxt->userData,
9415 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009416 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009417 ctxt->instate = XML_PARSER_EOF;
9418#ifdef DEBUG_PUSH
9419 xmlGenericError(xmlGenericErrorContext,
9420 "PP: entering EOF\n");
9421#endif
9422 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9423 ctxt->sax->endDocument(ctxt->userData);
9424 goto done;
9425 }
9426 if ((cur == '<') && (next == '?')) {
9427 /* PI or XML decl */
9428 if (avail < 5) return(ret);
9429 if ((!terminate) &&
9430 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9431 return(ret);
9432 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9433 ctxt->sax->setDocumentLocator(ctxt->userData,
9434 &xmlDefaultSAXLocator);
9435 if ((ctxt->input->cur[2] == 'x') &&
9436 (ctxt->input->cur[3] == 'm') &&
9437 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009438 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009439 ret += 5;
9440#ifdef DEBUG_PUSH
9441 xmlGenericError(xmlGenericErrorContext,
9442 "PP: Parsing XML Decl\n");
9443#endif
9444 xmlParseXMLDecl(ctxt);
9445 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9446 /*
9447 * The XML REC instructs us to stop parsing right
9448 * here
9449 */
9450 ctxt->instate = XML_PARSER_EOF;
9451 return(0);
9452 }
9453 ctxt->standalone = ctxt->input->standalone;
9454 if ((ctxt->encoding == NULL) &&
9455 (ctxt->input->encoding != NULL))
9456 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9457 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9458 (!ctxt->disableSAX))
9459 ctxt->sax->startDocument(ctxt->userData);
9460 ctxt->instate = XML_PARSER_MISC;
9461#ifdef DEBUG_PUSH
9462 xmlGenericError(xmlGenericErrorContext,
9463 "PP: entering MISC\n");
9464#endif
9465 } else {
9466 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9467 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9468 (!ctxt->disableSAX))
9469 ctxt->sax->startDocument(ctxt->userData);
9470 ctxt->instate = XML_PARSER_MISC;
9471#ifdef DEBUG_PUSH
9472 xmlGenericError(xmlGenericErrorContext,
9473 "PP: entering MISC\n");
9474#endif
9475 }
9476 } else {
9477 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9478 ctxt->sax->setDocumentLocator(ctxt->userData,
9479 &xmlDefaultSAXLocator);
9480 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009481 if (ctxt->version == NULL) {
9482 xmlErrMemory(ctxt, NULL);
9483 break;
9484 }
Owen Taylor3473f882001-02-23 17:55:21 +00009485 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9486 (!ctxt->disableSAX))
9487 ctxt->sax->startDocument(ctxt->userData);
9488 ctxt->instate = XML_PARSER_MISC;
9489#ifdef DEBUG_PUSH
9490 xmlGenericError(xmlGenericErrorContext,
9491 "PP: entering MISC\n");
9492#endif
9493 }
9494 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009495 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009496 const xmlChar *name;
9497 const xmlChar *prefix;
9498 const xmlChar *URI;
9499 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009500
9501 if ((avail < 2) && (ctxt->inputNr == 1))
9502 goto done;
9503 cur = ctxt->input->cur[0];
9504 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009505 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009506 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9508 ctxt->sax->endDocument(ctxt->userData);
9509 goto done;
9510 }
9511 if (!terminate) {
9512 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009513 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009514 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009515 goto done;
9516 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9517 goto done;
9518 }
9519 }
9520 if (ctxt->spaceNr == 0)
9521 spacePush(ctxt, -1);
9522 else
9523 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009524#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009525 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009526#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009527 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009528#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009529 else
9530 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009531#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009532 if (name == NULL) {
9533 spacePop(ctxt);
9534 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009535 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9536 ctxt->sax->endDocument(ctxt->userData);
9537 goto done;
9538 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009539#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009540 /*
9541 * [ VC: Root Element Type ]
9542 * The Name in the document type declaration must match
9543 * the element type of the root element.
9544 */
9545 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9546 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9547 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009548#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009549
9550 /*
9551 * Check for an Empty Element.
9552 */
9553 if ((RAW == '/') && (NXT(1) == '>')) {
9554 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009555
9556 if (ctxt->sax2) {
9557 if ((ctxt->sax != NULL) &&
9558 (ctxt->sax->endElementNs != NULL) &&
9559 (!ctxt->disableSAX))
9560 ctxt->sax->endElementNs(ctxt->userData, name,
9561 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009562 if (ctxt->nsNr - nsNr > 0)
9563 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009564#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009565 } else {
9566 if ((ctxt->sax != NULL) &&
9567 (ctxt->sax->endElement != NULL) &&
9568 (!ctxt->disableSAX))
9569 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009570#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009571 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 spacePop(ctxt);
9573 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009574 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009575 } else {
9576 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009577 }
9578 break;
9579 }
9580 if (RAW == '>') {
9581 NEXT;
9582 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009583 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 "Couldn't find end of Start Tag %s\n",
9585 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009588 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009589 if (ctxt->sax2)
9590 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009591#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009592 else
9593 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009594#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009595
Daniel Veillarda880b122003-04-21 21:36:41 +00009596 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 break;
9598 }
9599 case XML_PARSER_CONTENT: {
9600 const xmlChar *test;
9601 unsigned int cons;
9602 if ((avail < 2) && (ctxt->inputNr == 1))
9603 goto done;
9604 cur = ctxt->input->cur[0];
9605 next = ctxt->input->cur[1];
9606
9607 test = CUR_PTR;
9608 cons = ctxt->input->consumed;
9609 if ((cur == '<') && (next == '/')) {
9610 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009611 break;
9612 } else if ((cur == '<') && (next == '?')) {
9613 if ((!terminate) &&
9614 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9615 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009616 xmlParsePI(ctxt);
9617 } else if ((cur == '<') && (next != '!')) {
9618 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009619 break;
9620 } else if ((cur == '<') && (next == '!') &&
9621 (ctxt->input->cur[2] == '-') &&
9622 (ctxt->input->cur[3] == '-')) {
9623 if ((!terminate) &&
9624 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9625 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009626 xmlParseComment(ctxt);
9627 ctxt->instate = XML_PARSER_CONTENT;
9628 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9629 (ctxt->input->cur[2] == '[') &&
9630 (ctxt->input->cur[3] == 'C') &&
9631 (ctxt->input->cur[4] == 'D') &&
9632 (ctxt->input->cur[5] == 'A') &&
9633 (ctxt->input->cur[6] == 'T') &&
9634 (ctxt->input->cur[7] == 'A') &&
9635 (ctxt->input->cur[8] == '[')) {
9636 SKIP(9);
9637 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009638 break;
9639 } else if ((cur == '<') && (next == '!') &&
9640 (avail < 9)) {
9641 goto done;
9642 } else if (cur == '&') {
9643 if ((!terminate) &&
9644 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9645 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009646 xmlParseReference(ctxt);
9647 } else {
9648 /* TODO Avoid the extra copy, handle directly !!! */
9649 /*
9650 * Goal of the following test is:
9651 * - minimize calls to the SAX 'character' callback
9652 * when they are mergeable
9653 * - handle an problem for isBlank when we only parse
9654 * a sequence of blank chars and the next one is
9655 * not available to check against '<' presence.
9656 * - tries to homogenize the differences in SAX
9657 * callbacks between the push and pull versions
9658 * of the parser.
9659 */
9660 if ((ctxt->inputNr == 1) &&
9661 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9662 if (!terminate) {
9663 if (ctxt->progressive) {
9664 if ((lastlt == NULL) ||
9665 (ctxt->input->cur > lastlt))
9666 goto done;
9667 } else if (xmlParseLookupSequence(ctxt,
9668 '<', 0, 0) < 0) {
9669 goto done;
9670 }
9671 }
9672 }
9673 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009674 xmlParseCharData(ctxt, 0);
9675 }
9676 /*
9677 * Pop-up of finished entities.
9678 */
9679 while ((RAW == 0) && (ctxt->inputNr > 1))
9680 xmlPopInput(ctxt);
9681 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009682 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9683 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009684 ctxt->instate = XML_PARSER_EOF;
9685 break;
9686 }
9687 break;
9688 }
9689 case XML_PARSER_END_TAG:
9690 if (avail < 2)
9691 goto done;
9692 if (!terminate) {
9693 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009694 /* > can be found unescaped in attribute values */
9695 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009696 goto done;
9697 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9698 goto done;
9699 }
9700 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009701 if (ctxt->sax2) {
9702 xmlParseEndTag2(ctxt,
9703 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9704 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009705 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009706 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009707 }
9708#ifdef LIBXML_SAX1_ENABLED
9709 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009710 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009711#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009712 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009713 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009714 } else {
9715 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009716 }
9717 break;
9718 case XML_PARSER_CDATA_SECTION: {
9719 /*
9720 * The Push mode need to have the SAX callback for
9721 * cdataBlock merge back contiguous callbacks.
9722 */
9723 int base;
9724
9725 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9726 if (base < 0) {
9727 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009728 int tmp;
9729
9730 tmp = xmlCheckCdataPush(ctxt->input->cur,
9731 XML_PARSER_BIG_BUFFER_SIZE);
9732 if (tmp < 0) {
9733 tmp = -tmp;
9734 ctxt->input->cur += tmp;
9735 goto encoding_error;
9736 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009737 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9738 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009739 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009740 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009741 else if (ctxt->sax->characters != NULL)
9742 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009743 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009744 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009745 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009746 ctxt->checkIndex = 0;
9747 }
9748 goto done;
9749 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009750 int tmp;
9751
9752 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9753 if ((tmp < 0) || (tmp != base)) {
9754 tmp = -tmp;
9755 ctxt->input->cur += tmp;
9756 goto encoding_error;
9757 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009758 if ((ctxt->sax != NULL) && (base > 0) &&
9759 (!ctxt->disableSAX)) {
9760 if (ctxt->sax->cdataBlock != NULL)
9761 ctxt->sax->cdataBlock(ctxt->userData,
9762 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009763 else if (ctxt->sax->characters != NULL)
9764 ctxt->sax->characters(ctxt->userData,
9765 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009766 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009767 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009768 ctxt->checkIndex = 0;
9769 ctxt->instate = XML_PARSER_CONTENT;
9770#ifdef DEBUG_PUSH
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: entering CONTENT\n");
9773#endif
9774 }
9775 break;
9776 }
Owen Taylor3473f882001-02-23 17:55:21 +00009777 case XML_PARSER_MISC:
9778 SKIP_BLANKS;
9779 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009780 avail = ctxt->input->length -
9781 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009782 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009783 avail = ctxt->input->buf->buffer->use -
9784 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009785 if (avail < 2)
9786 goto done;
9787 cur = ctxt->input->cur[0];
9788 next = ctxt->input->cur[1];
9789 if ((cur == '<') && (next == '?')) {
9790 if ((!terminate) &&
9791 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9792 goto done;
9793#ifdef DEBUG_PUSH
9794 xmlGenericError(xmlGenericErrorContext,
9795 "PP: Parsing PI\n");
9796#endif
9797 xmlParsePI(ctxt);
9798 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009799 (ctxt->input->cur[2] == '-') &&
9800 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009801 if ((!terminate) &&
9802 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9803 goto done;
9804#ifdef DEBUG_PUSH
9805 xmlGenericError(xmlGenericErrorContext,
9806 "PP: Parsing Comment\n");
9807#endif
9808 xmlParseComment(ctxt);
9809 ctxt->instate = XML_PARSER_MISC;
9810 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009811 (ctxt->input->cur[2] == 'D') &&
9812 (ctxt->input->cur[3] == 'O') &&
9813 (ctxt->input->cur[4] == 'C') &&
9814 (ctxt->input->cur[5] == 'T') &&
9815 (ctxt->input->cur[6] == 'Y') &&
9816 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009817 (ctxt->input->cur[8] == 'E')) {
9818 if ((!terminate) &&
9819 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9820 goto done;
9821#ifdef DEBUG_PUSH
9822 xmlGenericError(xmlGenericErrorContext,
9823 "PP: Parsing internal subset\n");
9824#endif
9825 ctxt->inSubset = 1;
9826 xmlParseDocTypeDecl(ctxt);
9827 if (RAW == '[') {
9828 ctxt->instate = XML_PARSER_DTD;
9829#ifdef DEBUG_PUSH
9830 xmlGenericError(xmlGenericErrorContext,
9831 "PP: entering DTD\n");
9832#endif
9833 } else {
9834 /*
9835 * Create and update the external subset.
9836 */
9837 ctxt->inSubset = 2;
9838 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9839 (ctxt->sax->externalSubset != NULL))
9840 ctxt->sax->externalSubset(ctxt->userData,
9841 ctxt->intSubName, ctxt->extSubSystem,
9842 ctxt->extSubURI);
9843 ctxt->inSubset = 0;
9844 ctxt->instate = XML_PARSER_PROLOG;
9845#ifdef DEBUG_PUSH
9846 xmlGenericError(xmlGenericErrorContext,
9847 "PP: entering PROLOG\n");
9848#endif
9849 }
9850 } else if ((cur == '<') && (next == '!') &&
9851 (avail < 9)) {
9852 goto done;
9853 } else {
9854 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009855 ctxt->progressive = 1;
9856 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009857#ifdef DEBUG_PUSH
9858 xmlGenericError(xmlGenericErrorContext,
9859 "PP: entering START_TAG\n");
9860#endif
9861 }
9862 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009863 case XML_PARSER_PROLOG:
9864 SKIP_BLANKS;
9865 if (ctxt->input->buf == NULL)
9866 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9867 else
9868 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9869 if (avail < 2)
9870 goto done;
9871 cur = ctxt->input->cur[0];
9872 next = ctxt->input->cur[1];
9873 if ((cur == '<') && (next == '?')) {
9874 if ((!terminate) &&
9875 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9876 goto done;
9877#ifdef DEBUG_PUSH
9878 xmlGenericError(xmlGenericErrorContext,
9879 "PP: Parsing PI\n");
9880#endif
9881 xmlParsePI(ctxt);
9882 } else if ((cur == '<') && (next == '!') &&
9883 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9884 if ((!terminate) &&
9885 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9886 goto done;
9887#ifdef DEBUG_PUSH
9888 xmlGenericError(xmlGenericErrorContext,
9889 "PP: Parsing Comment\n");
9890#endif
9891 xmlParseComment(ctxt);
9892 ctxt->instate = XML_PARSER_PROLOG;
9893 } else if ((cur == '<') && (next == '!') &&
9894 (avail < 4)) {
9895 goto done;
9896 } else {
9897 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009898 if (ctxt->progressive == 0)
9899 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009900 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009901#ifdef DEBUG_PUSH
9902 xmlGenericError(xmlGenericErrorContext,
9903 "PP: entering START_TAG\n");
9904#endif
9905 }
9906 break;
9907 case XML_PARSER_EPILOG:
9908 SKIP_BLANKS;
9909 if (ctxt->input->buf == NULL)
9910 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9911 else
9912 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9913 if (avail < 2)
9914 goto done;
9915 cur = ctxt->input->cur[0];
9916 next = ctxt->input->cur[1];
9917 if ((cur == '<') && (next == '?')) {
9918 if ((!terminate) &&
9919 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9920 goto done;
9921#ifdef DEBUG_PUSH
9922 xmlGenericError(xmlGenericErrorContext,
9923 "PP: Parsing PI\n");
9924#endif
9925 xmlParsePI(ctxt);
9926 ctxt->instate = XML_PARSER_EPILOG;
9927 } else if ((cur == '<') && (next == '!') &&
9928 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9929 if ((!terminate) &&
9930 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9931 goto done;
9932#ifdef DEBUG_PUSH
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: Parsing Comment\n");
9935#endif
9936 xmlParseComment(ctxt);
9937 ctxt->instate = XML_PARSER_EPILOG;
9938 } else if ((cur == '<') && (next == '!') &&
9939 (avail < 4)) {
9940 goto done;
9941 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009943 ctxt->instate = XML_PARSER_EOF;
9944#ifdef DEBUG_PUSH
9945 xmlGenericError(xmlGenericErrorContext,
9946 "PP: entering EOF\n");
9947#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009948 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009949 ctxt->sax->endDocument(ctxt->userData);
9950 goto done;
9951 }
9952 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009953 case XML_PARSER_DTD: {
9954 /*
9955 * Sorry but progressive parsing of the internal subset
9956 * is not expected to be supported. We first check that
9957 * the full content of the internal subset is available and
9958 * the parsing is launched only at that point.
9959 * Internal subset ends up with "']' S? '>'" in an unescaped
9960 * section and not in a ']]>' sequence which are conditional
9961 * sections (whoever argued to keep that crap in XML deserve
9962 * a place in hell !).
9963 */
9964 int base, i;
9965 xmlChar *buf;
9966 xmlChar quote = 0;
9967
9968 base = ctxt->input->cur - ctxt->input->base;
9969 if (base < 0) return(0);
9970 if (ctxt->checkIndex > base)
9971 base = ctxt->checkIndex;
9972 buf = ctxt->input->buf->buffer->content;
9973 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9974 base++) {
9975 if (quote != 0) {
9976 if (buf[base] == quote)
9977 quote = 0;
9978 continue;
9979 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009980 if ((quote == 0) && (buf[base] == '<')) {
9981 int found = 0;
9982 /* special handling of comments */
9983 if (((unsigned int) base + 4 <
9984 ctxt->input->buf->buffer->use) &&
9985 (buf[base + 1] == '!') &&
9986 (buf[base + 2] == '-') &&
9987 (buf[base + 3] == '-')) {
9988 for (;(unsigned int) base + 3 <
9989 ctxt->input->buf->buffer->use; base++) {
9990 if ((buf[base] == '-') &&
9991 (buf[base + 1] == '-') &&
9992 (buf[base + 2] == '>')) {
9993 found = 1;
9994 base += 2;
9995 break;
9996 }
9997 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009998 if (!found) {
9999#if 0
10000 fprintf(stderr, "unfinished comment\n");
10001#endif
10002 break; /* for */
10003 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010004 continue;
10005 }
10006 }
Owen Taylor3473f882001-02-23 17:55:21 +000010007 if (buf[base] == '"') {
10008 quote = '"';
10009 continue;
10010 }
10011 if (buf[base] == '\'') {
10012 quote = '\'';
10013 continue;
10014 }
10015 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010016#if 0
10017 fprintf(stderr, "%c%c%c%c: ", buf[base],
10018 buf[base + 1], buf[base + 2], buf[base + 3]);
10019#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010020 if ((unsigned int) base +1 >=
10021 ctxt->input->buf->buffer->use)
10022 break;
10023 if (buf[base + 1] == ']') {
10024 /* conditional crap, skip both ']' ! */
10025 base++;
10026 continue;
10027 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010028 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010029 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10030 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010031 if (buf[base + i] == '>') {
10032#if 0
10033 fprintf(stderr, "found\n");
10034#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010035 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010036 }
10037 if (!IS_BLANK_CH(buf[base + i])) {
10038#if 0
10039 fprintf(stderr, "not found\n");
10040#endif
10041 goto not_end_of_int_subset;
10042 }
Owen Taylor3473f882001-02-23 17:55:21 +000010043 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010044#if 0
10045 fprintf(stderr, "end of stream\n");
10046#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010047 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010048
Owen Taylor3473f882001-02-23 17:55:21 +000010049 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010050not_end_of_int_subset:
10051 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010052 }
10053 /*
10054 * We didn't found the end of the Internal subset
10055 */
Owen Taylor3473f882001-02-23 17:55:21 +000010056#ifdef DEBUG_PUSH
10057 if (next == 0)
10058 xmlGenericError(xmlGenericErrorContext,
10059 "PP: lookup of int subset end filed\n");
10060#endif
10061 goto done;
10062
10063found_end_int_subset:
10064 xmlParseInternalSubset(ctxt);
10065 ctxt->inSubset = 2;
10066 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10067 (ctxt->sax->externalSubset != NULL))
10068 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10069 ctxt->extSubSystem, ctxt->extSubURI);
10070 ctxt->inSubset = 0;
10071 ctxt->instate = XML_PARSER_PROLOG;
10072 ctxt->checkIndex = 0;
10073#ifdef DEBUG_PUSH
10074 xmlGenericError(xmlGenericErrorContext,
10075 "PP: entering PROLOG\n");
10076#endif
10077 break;
10078 }
10079 case XML_PARSER_COMMENT:
10080 xmlGenericError(xmlGenericErrorContext,
10081 "PP: internal error, state == COMMENT\n");
10082 ctxt->instate = XML_PARSER_CONTENT;
10083#ifdef DEBUG_PUSH
10084 xmlGenericError(xmlGenericErrorContext,
10085 "PP: entering CONTENT\n");
10086#endif
10087 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010088 case XML_PARSER_IGNORE:
10089 xmlGenericError(xmlGenericErrorContext,
10090 "PP: internal error, state == IGNORE");
10091 ctxt->instate = XML_PARSER_DTD;
10092#ifdef DEBUG_PUSH
10093 xmlGenericError(xmlGenericErrorContext,
10094 "PP: entering DTD\n");
10095#endif
10096 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010097 case XML_PARSER_PI:
10098 xmlGenericError(xmlGenericErrorContext,
10099 "PP: internal error, state == PI\n");
10100 ctxt->instate = XML_PARSER_CONTENT;
10101#ifdef DEBUG_PUSH
10102 xmlGenericError(xmlGenericErrorContext,
10103 "PP: entering CONTENT\n");
10104#endif
10105 break;
10106 case XML_PARSER_ENTITY_DECL:
10107 xmlGenericError(xmlGenericErrorContext,
10108 "PP: internal error, state == ENTITY_DECL\n");
10109 ctxt->instate = XML_PARSER_DTD;
10110#ifdef DEBUG_PUSH
10111 xmlGenericError(xmlGenericErrorContext,
10112 "PP: entering DTD\n");
10113#endif
10114 break;
10115 case XML_PARSER_ENTITY_VALUE:
10116 xmlGenericError(xmlGenericErrorContext,
10117 "PP: internal error, state == ENTITY_VALUE\n");
10118 ctxt->instate = XML_PARSER_CONTENT;
10119#ifdef DEBUG_PUSH
10120 xmlGenericError(xmlGenericErrorContext,
10121 "PP: entering DTD\n");
10122#endif
10123 break;
10124 case XML_PARSER_ATTRIBUTE_VALUE:
10125 xmlGenericError(xmlGenericErrorContext,
10126 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10127 ctxt->instate = XML_PARSER_START_TAG;
10128#ifdef DEBUG_PUSH
10129 xmlGenericError(xmlGenericErrorContext,
10130 "PP: entering START_TAG\n");
10131#endif
10132 break;
10133 case XML_PARSER_SYSTEM_LITERAL:
10134 xmlGenericError(xmlGenericErrorContext,
10135 "PP: internal error, state == SYSTEM_LITERAL\n");
10136 ctxt->instate = XML_PARSER_START_TAG;
10137#ifdef DEBUG_PUSH
10138 xmlGenericError(xmlGenericErrorContext,
10139 "PP: entering START_TAG\n");
10140#endif
10141 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010142 case XML_PARSER_PUBLIC_LITERAL:
10143 xmlGenericError(xmlGenericErrorContext,
10144 "PP: internal error, state == PUBLIC_LITERAL\n");
10145 ctxt->instate = XML_PARSER_START_TAG;
10146#ifdef DEBUG_PUSH
10147 xmlGenericError(xmlGenericErrorContext,
10148 "PP: entering START_TAG\n");
10149#endif
10150 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010151 }
10152 }
10153done:
10154#ifdef DEBUG_PUSH
10155 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10156#endif
10157 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010158encoding_error:
10159 {
10160 char buffer[150];
10161
10162 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10163 ctxt->input->cur[0], ctxt->input->cur[1],
10164 ctxt->input->cur[2], ctxt->input->cur[3]);
10165 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10166 "Input is not proper UTF-8, indicate encoding !\n%s",
10167 BAD_CAST buffer, NULL);
10168 }
10169 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010170}
10171
10172/**
Owen Taylor3473f882001-02-23 17:55:21 +000010173 * xmlParseChunk:
10174 * @ctxt: an XML parser context
10175 * @chunk: an char array
10176 * @size: the size in byte of the chunk
10177 * @terminate: last chunk indicator
10178 *
10179 * Parse a Chunk of memory
10180 *
10181 * Returns zero if no error, the xmlParserErrors otherwise.
10182 */
10183int
10184xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10185 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010186 if (ctxt == NULL)
10187 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010188 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010189 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010190 if (ctxt->instate == XML_PARSER_START)
10191 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010192 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10193 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10194 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10195 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010196 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010197
William M. Bracka3215c72004-07-31 16:24:01 +000010198 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10199 if (res < 0) {
10200 ctxt->errNo = XML_PARSER_EOF;
10201 ctxt->disableSAX = 1;
10202 return (XML_PARSER_EOF);
10203 }
Owen Taylor3473f882001-02-23 17:55:21 +000010204 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10205 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010206 ctxt->input->end =
10207 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010208#ifdef DEBUG_PUSH
10209 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10210#endif
10211
Owen Taylor3473f882001-02-23 17:55:21 +000010212 } else if (ctxt->instate != XML_PARSER_EOF) {
10213 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10214 xmlParserInputBufferPtr in = ctxt->input->buf;
10215 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10216 (in->raw != NULL)) {
10217 int nbchars;
10218
10219 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10220 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010221 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010222 xmlGenericError(xmlGenericErrorContext,
10223 "xmlParseChunk: encoder error\n");
10224 return(XML_ERR_INVALID_ENCODING);
10225 }
10226 }
10227 }
10228 }
10229 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010230 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010231 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010232 if (terminate) {
10233 /*
10234 * Check for termination
10235 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010236 int avail = 0;
10237
10238 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010239 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010240 avail = ctxt->input->length -
10241 (ctxt->input->cur - ctxt->input->base);
10242 else
10243 avail = ctxt->input->buf->buffer->use -
10244 (ctxt->input->cur - ctxt->input->base);
10245 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010246
Owen Taylor3473f882001-02-23 17:55:21 +000010247 if ((ctxt->instate != XML_PARSER_EOF) &&
10248 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010249 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010250 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010251 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010252 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010253 }
Owen Taylor3473f882001-02-23 17:55:21 +000010254 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010255 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010256 ctxt->sax->endDocument(ctxt->userData);
10257 }
10258 ctxt->instate = XML_PARSER_EOF;
10259 }
10260 return((xmlParserErrors) ctxt->errNo);
10261}
10262
10263/************************************************************************
10264 * *
10265 * I/O front end functions to the parser *
10266 * *
10267 ************************************************************************/
10268
10269/**
Owen Taylor3473f882001-02-23 17:55:21 +000010270 * xmlCreatePushParserCtxt:
10271 * @sax: a SAX handler
10272 * @user_data: The user data returned on SAX callbacks
10273 * @chunk: a pointer to an array of chars
10274 * @size: number of chars in the array
10275 * @filename: an optional file name or URI
10276 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010277 * Create a parser context for using the XML parser in push mode.
10278 * If @buffer and @size are non-NULL, the data is used to detect
10279 * the encoding. The remaining characters will be parsed so they
10280 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010281 * To allow content encoding detection, @size should be >= 4
10282 * The value of @filename is used for fetching external entities
10283 * and error/warning reports.
10284 *
10285 * Returns the new parser context or NULL
10286 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010287
Owen Taylor3473f882001-02-23 17:55:21 +000010288xmlParserCtxtPtr
10289xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10290 const char *chunk, int size, const char *filename) {
10291 xmlParserCtxtPtr ctxt;
10292 xmlParserInputPtr inputStream;
10293 xmlParserInputBufferPtr buf;
10294 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10295
10296 /*
10297 * plug some encoding conversion routines
10298 */
10299 if ((chunk != NULL) && (size >= 4))
10300 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10301
10302 buf = xmlAllocParserInputBuffer(enc);
10303 if (buf == NULL) return(NULL);
10304
10305 ctxt = xmlNewParserCtxt();
10306 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010307 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010308 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 return(NULL);
10310 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010311 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010312 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10313 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010314 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010315 xmlFreeParserInputBuffer(buf);
10316 xmlFreeParserCtxt(ctxt);
10317 return(NULL);
10318 }
Owen Taylor3473f882001-02-23 17:55:21 +000010319 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010320#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010321 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010322#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010323 xmlFree(ctxt->sax);
10324 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10325 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010326 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010327 xmlFreeParserInputBuffer(buf);
10328 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010329 return(NULL);
10330 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010331 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10332 if (sax->initialized == XML_SAX2_MAGIC)
10333 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10334 else
10335 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010336 if (user_data != NULL)
10337 ctxt->userData = user_data;
10338 }
10339 if (filename == NULL) {
10340 ctxt->directory = NULL;
10341 } else {
10342 ctxt->directory = xmlParserGetDirectory(filename);
10343 }
10344
10345 inputStream = xmlNewInputStream(ctxt);
10346 if (inputStream == NULL) {
10347 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010348 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010349 return(NULL);
10350 }
10351
10352 if (filename == NULL)
10353 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010354 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010355 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010356 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010357 if (inputStream->filename == NULL) {
10358 xmlFreeParserCtxt(ctxt);
10359 xmlFreeParserInputBuffer(buf);
10360 return(NULL);
10361 }
10362 }
Owen Taylor3473f882001-02-23 17:55:21 +000010363 inputStream->buf = buf;
10364 inputStream->base = inputStream->buf->buffer->content;
10365 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010366 inputStream->end =
10367 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010368
10369 inputPush(ctxt, inputStream);
10370
William M. Brack3a1cd212005-02-11 14:35:54 +000010371 /*
10372 * If the caller didn't provide an initial 'chunk' for determining
10373 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10374 * that it can be automatically determined later
10375 */
10376 if ((size == 0) || (chunk == NULL)) {
10377 ctxt->charset = XML_CHAR_ENCODING_NONE;
10378 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010379 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10380 int cur = ctxt->input->cur - ctxt->input->base;
10381
Owen Taylor3473f882001-02-23 17:55:21 +000010382 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010383
10384 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10385 ctxt->input->cur = ctxt->input->base + cur;
10386 ctxt->input->end =
10387 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010388#ifdef DEBUG_PUSH
10389 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10390#endif
10391 }
10392
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010393 if (enc != XML_CHAR_ENCODING_NONE) {
10394 xmlSwitchEncoding(ctxt, enc);
10395 }
10396
Owen Taylor3473f882001-02-23 17:55:21 +000010397 return(ctxt);
10398}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010399#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010400
10401/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010402 * xmlStopParser:
10403 * @ctxt: an XML parser context
10404 *
10405 * Blocks further parser processing
10406 */
10407void
10408xmlStopParser(xmlParserCtxtPtr ctxt) {
10409 if (ctxt == NULL)
10410 return;
10411 ctxt->instate = XML_PARSER_EOF;
10412 ctxt->disableSAX = 1;
10413 if (ctxt->input != NULL) {
10414 ctxt->input->cur = BAD_CAST"";
10415 ctxt->input->base = ctxt->input->cur;
10416 }
10417}
10418
10419/**
Owen Taylor3473f882001-02-23 17:55:21 +000010420 * xmlCreateIOParserCtxt:
10421 * @sax: a SAX handler
10422 * @user_data: The user data returned on SAX callbacks
10423 * @ioread: an I/O read function
10424 * @ioclose: an I/O close function
10425 * @ioctx: an I/O handler
10426 * @enc: the charset encoding if known
10427 *
10428 * Create a parser context for using the XML parser with an existing
10429 * I/O stream
10430 *
10431 * Returns the new parser context or NULL
10432 */
10433xmlParserCtxtPtr
10434xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10435 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10436 void *ioctx, xmlCharEncoding enc) {
10437 xmlParserCtxtPtr ctxt;
10438 xmlParserInputPtr inputStream;
10439 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010440
10441 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010442
10443 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10444 if (buf == NULL) return(NULL);
10445
10446 ctxt = xmlNewParserCtxt();
10447 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010448 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010449 return(NULL);
10450 }
10451 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010452#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010453 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010454#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010455 xmlFree(ctxt->sax);
10456 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10457 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010458 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010459 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010460 return(NULL);
10461 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010462 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10463 if (sax->initialized == XML_SAX2_MAGIC)
10464 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10465 else
10466 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010467 if (user_data != NULL)
10468 ctxt->userData = user_data;
10469 }
10470
10471 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10472 if (inputStream == NULL) {
10473 xmlFreeParserCtxt(ctxt);
10474 return(NULL);
10475 }
10476 inputPush(ctxt, inputStream);
10477
10478 return(ctxt);
10479}
10480
Daniel Veillard4432df22003-09-28 18:58:27 +000010481#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010482/************************************************************************
10483 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010484 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010485 * *
10486 ************************************************************************/
10487
10488/**
10489 * xmlIOParseDTD:
10490 * @sax: the SAX handler block or NULL
10491 * @input: an Input Buffer
10492 * @enc: the charset encoding if known
10493 *
10494 * Load and parse a DTD
10495 *
10496 * Returns the resulting xmlDtdPtr or NULL in case of error.
10497 * @input will be freed at parsing end.
10498 */
10499
10500xmlDtdPtr
10501xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10502 xmlCharEncoding enc) {
10503 xmlDtdPtr ret = NULL;
10504 xmlParserCtxtPtr ctxt;
10505 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010506 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010507
10508 if (input == NULL)
10509 return(NULL);
10510
10511 ctxt = xmlNewParserCtxt();
10512 if (ctxt == NULL) {
10513 return(NULL);
10514 }
10515
10516 /*
10517 * Set-up the SAX context
10518 */
10519 if (sax != NULL) {
10520 if (ctxt->sax != NULL)
10521 xmlFree(ctxt->sax);
10522 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010523 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010524 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010525 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010526
10527 /*
10528 * generate a parser input from the I/O handler
10529 */
10530
Daniel Veillard43caefb2003-12-07 19:32:22 +000010531 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010532 if (pinput == NULL) {
10533 if (sax != NULL) ctxt->sax = NULL;
10534 xmlFreeParserCtxt(ctxt);
10535 return(NULL);
10536 }
10537
10538 /*
10539 * plug some encoding conversion routines here.
10540 */
10541 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010542 if (enc != XML_CHAR_ENCODING_NONE) {
10543 xmlSwitchEncoding(ctxt, enc);
10544 }
Owen Taylor3473f882001-02-23 17:55:21 +000010545
10546 pinput->filename = NULL;
10547 pinput->line = 1;
10548 pinput->col = 1;
10549 pinput->base = ctxt->input->cur;
10550 pinput->cur = ctxt->input->cur;
10551 pinput->free = NULL;
10552
10553 /*
10554 * let's parse that entity knowing it's an external subset.
10555 */
10556 ctxt->inSubset = 2;
10557 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10558 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10559 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010560
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010561 if ((enc == XML_CHAR_ENCODING_NONE) &&
10562 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010563 /*
10564 * Get the 4 first bytes and decode the charset
10565 * if enc != XML_CHAR_ENCODING_NONE
10566 * plug some encoding conversion routines.
10567 */
10568 start[0] = RAW;
10569 start[1] = NXT(1);
10570 start[2] = NXT(2);
10571 start[3] = NXT(3);
10572 enc = xmlDetectCharEncoding(start, 4);
10573 if (enc != XML_CHAR_ENCODING_NONE) {
10574 xmlSwitchEncoding(ctxt, enc);
10575 }
10576 }
10577
Owen Taylor3473f882001-02-23 17:55:21 +000010578 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10579
10580 if (ctxt->myDoc != NULL) {
10581 if (ctxt->wellFormed) {
10582 ret = ctxt->myDoc->extSubset;
10583 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010584 if (ret != NULL) {
10585 xmlNodePtr tmp;
10586
10587 ret->doc = NULL;
10588 tmp = ret->children;
10589 while (tmp != NULL) {
10590 tmp->doc = NULL;
10591 tmp = tmp->next;
10592 }
10593 }
Owen Taylor3473f882001-02-23 17:55:21 +000010594 } else {
10595 ret = NULL;
10596 }
10597 xmlFreeDoc(ctxt->myDoc);
10598 ctxt->myDoc = NULL;
10599 }
10600 if (sax != NULL) ctxt->sax = NULL;
10601 xmlFreeParserCtxt(ctxt);
10602
10603 return(ret);
10604}
10605
10606/**
10607 * xmlSAXParseDTD:
10608 * @sax: the SAX handler block
10609 * @ExternalID: a NAME* containing the External ID of the DTD
10610 * @SystemID: a NAME* containing the URL to the DTD
10611 *
10612 * Load and parse an external subset.
10613 *
10614 * Returns the resulting xmlDtdPtr or NULL in case of error.
10615 */
10616
10617xmlDtdPtr
10618xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10619 const xmlChar *SystemID) {
10620 xmlDtdPtr ret = NULL;
10621 xmlParserCtxtPtr ctxt;
10622 xmlParserInputPtr input = NULL;
10623 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010624 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010625
10626 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10627
10628 ctxt = xmlNewParserCtxt();
10629 if (ctxt == NULL) {
10630 return(NULL);
10631 }
10632
10633 /*
10634 * Set-up the SAX context
10635 */
10636 if (sax != NULL) {
10637 if (ctxt->sax != NULL)
10638 xmlFree(ctxt->sax);
10639 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010640 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010641 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010642
10643 /*
10644 * Canonicalise the system ID
10645 */
10646 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010647 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010648 xmlFreeParserCtxt(ctxt);
10649 return(NULL);
10650 }
Owen Taylor3473f882001-02-23 17:55:21 +000010651
10652 /*
10653 * Ask the Entity resolver to load the damn thing
10654 */
10655
10656 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010657 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010658 if (input == NULL) {
10659 if (sax != NULL) ctxt->sax = NULL;
10660 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010661 if (systemIdCanonic != NULL)
10662 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010663 return(NULL);
10664 }
10665
10666 /*
10667 * plug some encoding conversion routines here.
10668 */
10669 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010670 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10671 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10672 xmlSwitchEncoding(ctxt, enc);
10673 }
Owen Taylor3473f882001-02-23 17:55:21 +000010674
10675 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010676 input->filename = (char *) systemIdCanonic;
10677 else
10678 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010679 input->line = 1;
10680 input->col = 1;
10681 input->base = ctxt->input->cur;
10682 input->cur = ctxt->input->cur;
10683 input->free = NULL;
10684
10685 /*
10686 * let's parse that entity knowing it's an external subset.
10687 */
10688 ctxt->inSubset = 2;
10689 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10690 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10691 ExternalID, SystemID);
10692 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10693
10694 if (ctxt->myDoc != NULL) {
10695 if (ctxt->wellFormed) {
10696 ret = ctxt->myDoc->extSubset;
10697 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010698 if (ret != NULL) {
10699 xmlNodePtr tmp;
10700
10701 ret->doc = NULL;
10702 tmp = ret->children;
10703 while (tmp != NULL) {
10704 tmp->doc = NULL;
10705 tmp = tmp->next;
10706 }
10707 }
Owen Taylor3473f882001-02-23 17:55:21 +000010708 } else {
10709 ret = NULL;
10710 }
10711 xmlFreeDoc(ctxt->myDoc);
10712 ctxt->myDoc = NULL;
10713 }
10714 if (sax != NULL) ctxt->sax = NULL;
10715 xmlFreeParserCtxt(ctxt);
10716
10717 return(ret);
10718}
10719
Daniel Veillard4432df22003-09-28 18:58:27 +000010720
Owen Taylor3473f882001-02-23 17:55:21 +000010721/**
10722 * xmlParseDTD:
10723 * @ExternalID: a NAME* containing the External ID of the DTD
10724 * @SystemID: a NAME* containing the URL to the DTD
10725 *
10726 * Load and parse an external subset.
10727 *
10728 * Returns the resulting xmlDtdPtr or NULL in case of error.
10729 */
10730
10731xmlDtdPtr
10732xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10733 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10734}
Daniel Veillard4432df22003-09-28 18:58:27 +000010735#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010736
10737/************************************************************************
10738 * *
10739 * Front ends when parsing an Entity *
10740 * *
10741 ************************************************************************/
10742
10743/**
Owen Taylor3473f882001-02-23 17:55:21 +000010744 * xmlParseCtxtExternalEntity:
10745 * @ctx: the existing parsing context
10746 * @URL: the URL for the entity to load
10747 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010748 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010749 *
10750 * Parse an external general entity within an existing parsing context
10751 * An external general parsed entity is well-formed if it matches the
10752 * production labeled extParsedEnt.
10753 *
10754 * [78] extParsedEnt ::= TextDecl? content
10755 *
10756 * Returns 0 if the entity is well formed, -1 in case of args problem and
10757 * the parser error code otherwise
10758 */
10759
10760int
10761xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010762 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010763 xmlParserCtxtPtr ctxt;
10764 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010765 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010766 xmlSAXHandlerPtr oldsax = NULL;
10767 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010768 xmlChar start[4];
10769 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010770
Daniel Veillardce682bc2004-11-05 17:22:25 +000010771 if (ctx == NULL) return(-1);
10772
Owen Taylor3473f882001-02-23 17:55:21 +000010773 if (ctx->depth > 40) {
10774 return(XML_ERR_ENTITY_LOOP);
10775 }
10776
Daniel Veillardcda96922001-08-21 10:56:31 +000010777 if (lst != NULL)
10778 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010779 if ((URL == NULL) && (ID == NULL))
10780 return(-1);
10781 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10782 return(-1);
10783
10784
10785 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10786 if (ctxt == NULL) return(-1);
10787 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010788 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010789 oldsax = ctxt->sax;
10790 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010791 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010792 newDoc = xmlNewDoc(BAD_CAST "1.0");
10793 if (newDoc == NULL) {
10794 xmlFreeParserCtxt(ctxt);
10795 return(-1);
10796 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010797 if (ctx->myDoc->dict) {
10798 newDoc->dict = ctx->myDoc->dict;
10799 xmlDictReference(newDoc->dict);
10800 }
Owen Taylor3473f882001-02-23 17:55:21 +000010801 if (ctx->myDoc != NULL) {
10802 newDoc->intSubset = ctx->myDoc->intSubset;
10803 newDoc->extSubset = ctx->myDoc->extSubset;
10804 }
10805 if (ctx->myDoc->URL != NULL) {
10806 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10807 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010808 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10809 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010810 ctxt->sax = oldsax;
10811 xmlFreeParserCtxt(ctxt);
10812 newDoc->intSubset = NULL;
10813 newDoc->extSubset = NULL;
10814 xmlFreeDoc(newDoc);
10815 return(-1);
10816 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010817 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010818 nodePush(ctxt, newDoc->children);
10819 if (ctx->myDoc == NULL) {
10820 ctxt->myDoc = newDoc;
10821 } else {
10822 ctxt->myDoc = ctx->myDoc;
10823 newDoc->children->doc = ctx->myDoc;
10824 }
10825
Daniel Veillard87a764e2001-06-20 17:41:10 +000010826 /*
10827 * Get the 4 first bytes and decode the charset
10828 * if enc != XML_CHAR_ENCODING_NONE
10829 * plug some encoding conversion routines.
10830 */
10831 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010832 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10833 start[0] = RAW;
10834 start[1] = NXT(1);
10835 start[2] = NXT(2);
10836 start[3] = NXT(3);
10837 enc = xmlDetectCharEncoding(start, 4);
10838 if (enc != XML_CHAR_ENCODING_NONE) {
10839 xmlSwitchEncoding(ctxt, enc);
10840 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010841 }
10842
Owen Taylor3473f882001-02-23 17:55:21 +000010843 /*
10844 * Parse a possible text declaration first
10845 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010846 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010847 xmlParseTextDecl(ctxt);
10848 }
10849
10850 /*
10851 * Doing validity checking on chunk doesn't make sense
10852 */
10853 ctxt->instate = XML_PARSER_CONTENT;
10854 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010855 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010856 ctxt->loadsubset = ctx->loadsubset;
10857 ctxt->depth = ctx->depth + 1;
10858 ctxt->replaceEntities = ctx->replaceEntities;
10859 if (ctxt->validate) {
10860 ctxt->vctxt.error = ctx->vctxt.error;
10861 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010862 } else {
10863 ctxt->vctxt.error = NULL;
10864 ctxt->vctxt.warning = NULL;
10865 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010866 ctxt->vctxt.nodeTab = NULL;
10867 ctxt->vctxt.nodeNr = 0;
10868 ctxt->vctxt.nodeMax = 0;
10869 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010870 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10871 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010872 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10873 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10874 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010875 ctxt->dictNames = ctx->dictNames;
10876 ctxt->attsDefault = ctx->attsDefault;
10877 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010878 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010879
10880 xmlParseContent(ctxt);
10881
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010882 ctx->validate = ctxt->validate;
10883 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010884 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010885 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010886 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010887 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 }
10889 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010890 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010891 }
10892
10893 if (!ctxt->wellFormed) {
10894 if (ctxt->errNo == 0)
10895 ret = 1;
10896 else
10897 ret = ctxt->errNo;
10898 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010899 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010900 xmlNodePtr cur;
10901
10902 /*
10903 * Return the newly created nodeset after unlinking it from
10904 * they pseudo parent.
10905 */
10906 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010907 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010908 while (cur != NULL) {
10909 cur->parent = NULL;
10910 cur = cur->next;
10911 }
10912 newDoc->children->children = NULL;
10913 }
10914 ret = 0;
10915 }
10916 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010917 ctxt->dict = NULL;
10918 ctxt->attsDefault = NULL;
10919 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010920 xmlFreeParserCtxt(ctxt);
10921 newDoc->intSubset = NULL;
10922 newDoc->extSubset = NULL;
10923 xmlFreeDoc(newDoc);
10924
10925 return(ret);
10926}
10927
10928/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010929 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010930 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010931 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010932 * @sax: the SAX handler bloc (possibly NULL)
10933 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10934 * @depth: Used for loop detection, use 0
10935 * @URL: the URL for the entity to load
10936 * @ID: the System ID for the entity to load
10937 * @list: the return value for the set of parsed nodes
10938 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010939 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010940 *
10941 * Returns 0 if the entity is well formed, -1 in case of args problem and
10942 * the parser error code otherwise
10943 */
10944
Daniel Veillard7d515752003-09-26 19:12:37 +000010945static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010946xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10947 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010948 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010949 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010950 xmlParserCtxtPtr ctxt;
10951 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010952 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010953 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010954 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010955 xmlChar start[4];
10956 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010957
10958 if (depth > 40) {
10959 return(XML_ERR_ENTITY_LOOP);
10960 }
10961
10962
10963
10964 if (list != NULL)
10965 *list = NULL;
10966 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010967 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010968 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010969 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010970
10971
10972 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010973 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010974 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010975 if (oldctxt != NULL) {
10976 ctxt->_private = oldctxt->_private;
10977 ctxt->loadsubset = oldctxt->loadsubset;
10978 ctxt->validate = oldctxt->validate;
10979 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010980 ctxt->record_info = oldctxt->record_info;
10981 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10982 ctxt->node_seq.length = oldctxt->node_seq.length;
10983 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010984 } else {
10985 /*
10986 * Doing validity checking on chunk without context
10987 * doesn't make sense
10988 */
10989 ctxt->_private = NULL;
10990 ctxt->validate = 0;
10991 ctxt->external = 2;
10992 ctxt->loadsubset = 0;
10993 }
Owen Taylor3473f882001-02-23 17:55:21 +000010994 if (sax != NULL) {
10995 oldsax = ctxt->sax;
10996 ctxt->sax = sax;
10997 if (user_data != NULL)
10998 ctxt->userData = user_data;
10999 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011000 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011001 newDoc = xmlNewDoc(BAD_CAST "1.0");
11002 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011003 ctxt->node_seq.maximum = 0;
11004 ctxt->node_seq.length = 0;
11005 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011006 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011007 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011008 }
11009 if (doc != NULL) {
11010 newDoc->intSubset = doc->intSubset;
11011 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011012 newDoc->dict = doc->dict;
11013 } else if (oldctxt != NULL) {
11014 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011015 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011016 xmlDictReference(newDoc->dict);
11017
Owen Taylor3473f882001-02-23 17:55:21 +000011018 if (doc->URL != NULL) {
11019 newDoc->URL = xmlStrdup(doc->URL);
11020 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011021 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11022 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011023 if (sax != NULL)
11024 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011025 ctxt->node_seq.maximum = 0;
11026 ctxt->node_seq.length = 0;
11027 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011028 xmlFreeParserCtxt(ctxt);
11029 newDoc->intSubset = NULL;
11030 newDoc->extSubset = NULL;
11031 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011032 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011033 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011034 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011035 nodePush(ctxt, newDoc->children);
11036 if (doc == NULL) {
11037 ctxt->myDoc = newDoc;
11038 } else {
11039 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011040 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011041 }
11042
Daniel Veillard87a764e2001-06-20 17:41:10 +000011043 /*
11044 * Get the 4 first bytes and decode the charset
11045 * if enc != XML_CHAR_ENCODING_NONE
11046 * plug some encoding conversion routines.
11047 */
11048 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011049 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11050 start[0] = RAW;
11051 start[1] = NXT(1);
11052 start[2] = NXT(2);
11053 start[3] = NXT(3);
11054 enc = xmlDetectCharEncoding(start, 4);
11055 if (enc != XML_CHAR_ENCODING_NONE) {
11056 xmlSwitchEncoding(ctxt, enc);
11057 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011058 }
11059
Owen Taylor3473f882001-02-23 17:55:21 +000011060 /*
11061 * Parse a possible text declaration first
11062 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011064 xmlParseTextDecl(ctxt);
11065 }
11066
Owen Taylor3473f882001-02-23 17:55:21 +000011067 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011068 ctxt->depth = depth;
11069
11070 xmlParseContent(ctxt);
11071
Daniel Veillard561b7f82002-03-20 21:55:57 +000011072 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011073 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011074 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011075 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011076 }
11077 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011078 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011079 }
11080
11081 if (!ctxt->wellFormed) {
11082 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011083 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011084 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011085 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011086 } else {
11087 if (list != NULL) {
11088 xmlNodePtr cur;
11089
11090 /*
11091 * Return the newly created nodeset after unlinking it from
11092 * they pseudo parent.
11093 */
11094 cur = newDoc->children->children;
11095 *list = cur;
11096 while (cur != NULL) {
11097 cur->parent = NULL;
11098 cur = cur->next;
11099 }
11100 newDoc->children->children = NULL;
11101 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011102 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011103 }
11104 if (sax != NULL)
11105 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011106 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11107 oldctxt->node_seq.length = ctxt->node_seq.length;
11108 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011109 ctxt->node_seq.maximum = 0;
11110 ctxt->node_seq.length = 0;
11111 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011112 xmlFreeParserCtxt(ctxt);
11113 newDoc->intSubset = NULL;
11114 newDoc->extSubset = NULL;
11115 xmlFreeDoc(newDoc);
11116
11117 return(ret);
11118}
11119
Daniel Veillard81273902003-09-30 00:43:48 +000011120#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011121/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011122 * xmlParseExternalEntity:
11123 * @doc: the document the chunk pertains to
11124 * @sax: the SAX handler bloc (possibly NULL)
11125 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11126 * @depth: Used for loop detection, use 0
11127 * @URL: the URL for the entity to load
11128 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011129 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011130 *
11131 * Parse an external general entity
11132 * An external general parsed entity is well-formed if it matches the
11133 * production labeled extParsedEnt.
11134 *
11135 * [78] extParsedEnt ::= TextDecl? content
11136 *
11137 * Returns 0 if the entity is well formed, -1 in case of args problem and
11138 * the parser error code otherwise
11139 */
11140
11141int
11142xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011143 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011144 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011145 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011146}
11147
11148/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011149 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011150 * @doc: the document the chunk pertains to
11151 * @sax: the SAX handler bloc (possibly NULL)
11152 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11153 * @depth: Used for loop detection, use 0
11154 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011155 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011156 *
11157 * Parse a well-balanced chunk of an XML document
11158 * called by the parser
11159 * The allowed sequence for the Well Balanced Chunk is the one defined by
11160 * the content production in the XML grammar:
11161 *
11162 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11163 *
11164 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11165 * the parser error code otherwise
11166 */
11167
11168int
11169xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011170 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011171 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11172 depth, string, lst, 0 );
11173}
Daniel Veillard81273902003-09-30 00:43:48 +000011174#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011175
11176/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011177 * xmlParseBalancedChunkMemoryInternal:
11178 * @oldctxt: the existing parsing context
11179 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11180 * @user_data: the user data field for the parser context
11181 * @lst: the return value for the set of parsed nodes
11182 *
11183 *
11184 * Parse a well-balanced chunk of an XML document
11185 * called by the parser
11186 * The allowed sequence for the Well Balanced Chunk is the one defined by
11187 * the content production in the XML grammar:
11188 *
11189 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11190 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011191 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11192 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011193 *
11194 * In case recover is set to 1, the nodelist will not be empty even if
11195 * the parsed chunk is not well balanced.
11196 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011197static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011198xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11199 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11200 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011201 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011202 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011203 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011204 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011205 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011206 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011207 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011208
11209 if (oldctxt->depth > 40) {
11210 return(XML_ERR_ENTITY_LOOP);
11211 }
11212
11213
11214 if (lst != NULL)
11215 *lst = NULL;
11216 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011217 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011218
11219 size = xmlStrlen(string);
11220
11221 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011222 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011223 if (user_data != NULL)
11224 ctxt->userData = user_data;
11225 else
11226 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011227 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11228 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011229 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11230 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11231 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011232
11233 oldsax = ctxt->sax;
11234 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011235 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011236 ctxt->replaceEntities = oldctxt->replaceEntities;
11237 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011238
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011239 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011240 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011241 newDoc = xmlNewDoc(BAD_CAST "1.0");
11242 if (newDoc == NULL) {
11243 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011244 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011245 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011246 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011247 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011248 newDoc->dict = ctxt->dict;
11249 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011250 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011251 } else {
11252 ctxt->myDoc = oldctxt->myDoc;
11253 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011254 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011255 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011256 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11257 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011258 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011259 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011260 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011261 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011262 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011263 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011264 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011265 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011266 ctxt->myDoc->children = NULL;
11267 ctxt->myDoc->last = NULL;
11268 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011269 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011270 ctxt->instate = XML_PARSER_CONTENT;
11271 ctxt->depth = oldctxt->depth + 1;
11272
Daniel Veillard328f48c2002-11-15 15:24:34 +000011273 ctxt->validate = 0;
11274 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011275 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11276 /*
11277 * ID/IDREF registration will be done in xmlValidateElement below
11278 */
11279 ctxt->loadsubset |= XML_SKIP_IDS;
11280 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011281 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011282 ctxt->attsDefault = oldctxt->attsDefault;
11283 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011284
Daniel Veillard68e9e742002-11-16 15:35:11 +000011285 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011286 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011287 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011288 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011289 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011290 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011291 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011292 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011293 }
11294
11295 if (!ctxt->wellFormed) {
11296 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011297 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011298 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011299 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011300 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011301 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011302 }
11303
William M. Brack7b9154b2003-09-27 19:23:50 +000011304 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011305 xmlNodePtr cur;
11306
11307 /*
11308 * Return the newly created nodeset after unlinking it from
11309 * they pseudo parent.
11310 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011311 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011312 *lst = cur;
11313 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011314#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011315 if (oldctxt->validate && oldctxt->wellFormed &&
11316 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11317 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11318 oldctxt->myDoc, cur);
11319 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011320#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011321 cur->parent = NULL;
11322 cur = cur->next;
11323 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011324 ctxt->myDoc->children->children = NULL;
11325 }
11326 if (ctxt->myDoc != NULL) {
11327 xmlFreeNode(ctxt->myDoc->children);
11328 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011329 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011330 }
11331
11332 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011333 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011334 ctxt->attsDefault = NULL;
11335 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011336 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011337 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011338 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011339 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011340
11341 return(ret);
11342}
11343
Daniel Veillard29b17482004-08-16 00:39:03 +000011344/**
11345 * xmlParseInNodeContext:
11346 * @node: the context node
11347 * @data: the input string
11348 * @datalen: the input string length in bytes
11349 * @options: a combination of xmlParserOption
11350 * @lst: the return value for the set of parsed nodes
11351 *
11352 * Parse a well-balanced chunk of an XML document
11353 * within the context (DTD, namespaces, etc ...) of the given node.
11354 *
11355 * The allowed sequence for the data is a Well Balanced Chunk defined by
11356 * the content production in the XML grammar:
11357 *
11358 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11359 *
11360 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11361 * error code otherwise
11362 */
11363xmlParserErrors
11364xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11365 int options, xmlNodePtr *lst) {
11366#ifdef SAX2
11367 xmlParserCtxtPtr ctxt;
11368 xmlDocPtr doc = NULL;
11369 xmlNodePtr fake, cur;
11370 int nsnr = 0;
11371
11372 xmlParserErrors ret = XML_ERR_OK;
11373
11374 /*
11375 * check all input parameters, grab the document
11376 */
11377 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11378 return(XML_ERR_INTERNAL_ERROR);
11379 switch (node->type) {
11380 case XML_ELEMENT_NODE:
11381 case XML_ATTRIBUTE_NODE:
11382 case XML_TEXT_NODE:
11383 case XML_CDATA_SECTION_NODE:
11384 case XML_ENTITY_REF_NODE:
11385 case XML_PI_NODE:
11386 case XML_COMMENT_NODE:
11387 case XML_DOCUMENT_NODE:
11388 case XML_HTML_DOCUMENT_NODE:
11389 break;
11390 default:
11391 return(XML_ERR_INTERNAL_ERROR);
11392
11393 }
11394 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11395 (node->type != XML_DOCUMENT_NODE) &&
11396 (node->type != XML_HTML_DOCUMENT_NODE))
11397 node = node->parent;
11398 if (node == NULL)
11399 return(XML_ERR_INTERNAL_ERROR);
11400 if (node->type == XML_ELEMENT_NODE)
11401 doc = node->doc;
11402 else
11403 doc = (xmlDocPtr) node;
11404 if (doc == NULL)
11405 return(XML_ERR_INTERNAL_ERROR);
11406
11407 /*
11408 * allocate a context and set-up everything not related to the
11409 * node position in the tree
11410 */
11411 if (doc->type == XML_DOCUMENT_NODE)
11412 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11413#ifdef LIBXML_HTML_ENABLED
11414 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11415 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11416#endif
11417 else
11418 return(XML_ERR_INTERNAL_ERROR);
11419
11420 if (ctxt == NULL)
11421 return(XML_ERR_NO_MEMORY);
11422 fake = xmlNewComment(NULL);
11423 if (fake == NULL) {
11424 xmlFreeParserCtxt(ctxt);
11425 return(XML_ERR_NO_MEMORY);
11426 }
11427 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011428
11429 /*
11430 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11431 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11432 * we must wait until the last moment to free the original one.
11433 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011434 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011435 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011436 xmlDictFree(ctxt->dict);
11437 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011438 } else
11439 options |= XML_PARSE_NODICT;
11440
11441 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011442 xmlDetectSAX2(ctxt);
11443 ctxt->myDoc = doc;
11444
11445 if (node->type == XML_ELEMENT_NODE) {
11446 nodePush(ctxt, node);
11447 /*
11448 * initialize the SAX2 namespaces stack
11449 */
11450 cur = node;
11451 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11452 xmlNsPtr ns = cur->nsDef;
11453 const xmlChar *iprefix, *ihref;
11454
11455 while (ns != NULL) {
11456 if (ctxt->dict) {
11457 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11458 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11459 } else {
11460 iprefix = ns->prefix;
11461 ihref = ns->href;
11462 }
11463
11464 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11465 nsPush(ctxt, iprefix, ihref);
11466 nsnr++;
11467 }
11468 ns = ns->next;
11469 }
11470 cur = cur->parent;
11471 }
11472 ctxt->instate = XML_PARSER_CONTENT;
11473 }
11474
11475 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11476 /*
11477 * ID/IDREF registration will be done in xmlValidateElement below
11478 */
11479 ctxt->loadsubset |= XML_SKIP_IDS;
11480 }
11481
11482 xmlParseContent(ctxt);
11483 nsPop(ctxt, nsnr);
11484 if ((RAW == '<') && (NXT(1) == '/')) {
11485 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11486 } else if (RAW != 0) {
11487 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11488 }
11489 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11490 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11491 ctxt->wellFormed = 0;
11492 }
11493
11494 if (!ctxt->wellFormed) {
11495 if (ctxt->errNo == 0)
11496 ret = XML_ERR_INTERNAL_ERROR;
11497 else
11498 ret = (xmlParserErrors)ctxt->errNo;
11499 } else {
11500 ret = XML_ERR_OK;
11501 }
11502
11503 /*
11504 * Return the newly created nodeset after unlinking it from
11505 * the pseudo sibling.
11506 */
11507
11508 cur = fake->next;
11509 fake->next = NULL;
11510 node->last = fake;
11511
11512 if (cur != NULL) {
11513 cur->prev = NULL;
11514 }
11515
11516 *lst = cur;
11517
11518 while (cur != NULL) {
11519 cur->parent = NULL;
11520 cur = cur->next;
11521 }
11522
11523 xmlUnlinkNode(fake);
11524 xmlFreeNode(fake);
11525
11526
11527 if (ret != XML_ERR_OK) {
11528 xmlFreeNodeList(*lst);
11529 *lst = NULL;
11530 }
William M. Brackc3f81342004-10-03 01:22:44 +000011531
William M. Brackb7b54de2004-10-06 16:38:01 +000011532 if (doc->dict != NULL)
11533 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011534 xmlFreeParserCtxt(ctxt);
11535
11536 return(ret);
11537#else /* !SAX2 */
11538 return(XML_ERR_INTERNAL_ERROR);
11539#endif
11540}
11541
Daniel Veillard81273902003-09-30 00:43:48 +000011542#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011543/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011544 * xmlParseBalancedChunkMemoryRecover:
11545 * @doc: the document the chunk pertains to
11546 * @sax: the SAX handler bloc (possibly NULL)
11547 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11548 * @depth: Used for loop detection, use 0
11549 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11550 * @lst: the return value for the set of parsed nodes
11551 * @recover: return nodes even if the data is broken (use 0)
11552 *
11553 *
11554 * Parse a well-balanced chunk of an XML document
11555 * called by the parser
11556 * The allowed sequence for the Well Balanced Chunk is the one defined by
11557 * the content production in the XML grammar:
11558 *
11559 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11560 *
11561 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11562 * the parser error code otherwise
11563 *
11564 * In case recover is set to 1, the nodelist will not be empty even if
11565 * the parsed chunk is not well balanced.
11566 */
11567int
11568xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11569 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11570 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011571 xmlParserCtxtPtr ctxt;
11572 xmlDocPtr newDoc;
11573 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011574 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011575 int size;
11576 int ret = 0;
11577
11578 if (depth > 40) {
11579 return(XML_ERR_ENTITY_LOOP);
11580 }
11581
11582
Daniel Veillardcda96922001-08-21 10:56:31 +000011583 if (lst != NULL)
11584 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011585 if (string == NULL)
11586 return(-1);
11587
11588 size = xmlStrlen(string);
11589
11590 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11591 if (ctxt == NULL) return(-1);
11592 ctxt->userData = ctxt;
11593 if (sax != NULL) {
11594 oldsax = ctxt->sax;
11595 ctxt->sax = sax;
11596 if (user_data != NULL)
11597 ctxt->userData = user_data;
11598 }
11599 newDoc = xmlNewDoc(BAD_CAST "1.0");
11600 if (newDoc == NULL) {
11601 xmlFreeParserCtxt(ctxt);
11602 return(-1);
11603 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011604 if ((doc != NULL) && (doc->dict != NULL)) {
11605 xmlDictFree(ctxt->dict);
11606 ctxt->dict = doc->dict;
11607 xmlDictReference(ctxt->dict);
11608 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11609 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11610 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11611 ctxt->dictNames = 1;
11612 } else {
11613 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11614 }
Owen Taylor3473f882001-02-23 17:55:21 +000011615 if (doc != NULL) {
11616 newDoc->intSubset = doc->intSubset;
11617 newDoc->extSubset = doc->extSubset;
11618 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011619 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11620 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011621 if (sax != NULL)
11622 ctxt->sax = oldsax;
11623 xmlFreeParserCtxt(ctxt);
11624 newDoc->intSubset = NULL;
11625 newDoc->extSubset = NULL;
11626 xmlFreeDoc(newDoc);
11627 return(-1);
11628 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011629 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11630 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011631 if (doc == NULL) {
11632 ctxt->myDoc = newDoc;
11633 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011634 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011635 newDoc->children->doc = doc;
11636 }
11637 ctxt->instate = XML_PARSER_CONTENT;
11638 ctxt->depth = depth;
11639
11640 /*
11641 * Doing validity checking on chunk doesn't make sense
11642 */
11643 ctxt->validate = 0;
11644 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011645 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011646
Daniel Veillardb39bc392002-10-26 19:29:51 +000011647 if ( doc != NULL ){
11648 content = doc->children;
11649 doc->children = NULL;
11650 xmlParseContent(ctxt);
11651 doc->children = content;
11652 }
11653 else {
11654 xmlParseContent(ctxt);
11655 }
Owen Taylor3473f882001-02-23 17:55:21 +000011656 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011658 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011659 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011660 }
11661 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011662 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011663 }
11664
11665 if (!ctxt->wellFormed) {
11666 if (ctxt->errNo == 0)
11667 ret = 1;
11668 else
11669 ret = ctxt->errNo;
11670 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011671 ret = 0;
11672 }
11673
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011674 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11675 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011676
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011677 /*
11678 * Return the newly created nodeset after unlinking it from
11679 * they pseudo parent.
11680 */
11681 cur = newDoc->children->children;
11682 *lst = cur;
11683 while (cur != NULL) {
11684 xmlSetTreeDoc(cur, doc);
11685 cur->parent = NULL;
11686 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011687 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011688 newDoc->children->children = NULL;
11689 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011690
Owen Taylor3473f882001-02-23 17:55:21 +000011691 if (sax != NULL)
11692 ctxt->sax = oldsax;
11693 xmlFreeParserCtxt(ctxt);
11694 newDoc->intSubset = NULL;
11695 newDoc->extSubset = NULL;
11696 xmlFreeDoc(newDoc);
11697
11698 return(ret);
11699}
11700
11701/**
11702 * xmlSAXParseEntity:
11703 * @sax: the SAX handler block
11704 * @filename: the filename
11705 *
11706 * parse an XML external entity out of context and build a tree.
11707 * It use the given SAX function block to handle the parsing callback.
11708 * If sax is NULL, fallback to the default DOM tree building routines.
11709 *
11710 * [78] extParsedEnt ::= TextDecl? content
11711 *
11712 * This correspond to a "Well Balanced" chunk
11713 *
11714 * Returns the resulting document tree
11715 */
11716
11717xmlDocPtr
11718xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11719 xmlDocPtr ret;
11720 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011721
11722 ctxt = xmlCreateFileParserCtxt(filename);
11723 if (ctxt == NULL) {
11724 return(NULL);
11725 }
11726 if (sax != NULL) {
11727 if (ctxt->sax != NULL)
11728 xmlFree(ctxt->sax);
11729 ctxt->sax = sax;
11730 ctxt->userData = NULL;
11731 }
11732
Owen Taylor3473f882001-02-23 17:55:21 +000011733 xmlParseExtParsedEnt(ctxt);
11734
11735 if (ctxt->wellFormed)
11736 ret = ctxt->myDoc;
11737 else {
11738 ret = NULL;
11739 xmlFreeDoc(ctxt->myDoc);
11740 ctxt->myDoc = NULL;
11741 }
11742 if (sax != NULL)
11743 ctxt->sax = NULL;
11744 xmlFreeParserCtxt(ctxt);
11745
11746 return(ret);
11747}
11748
11749/**
11750 * xmlParseEntity:
11751 * @filename: the filename
11752 *
11753 * parse an XML external entity out of context and build a tree.
11754 *
11755 * [78] extParsedEnt ::= TextDecl? content
11756 *
11757 * This correspond to a "Well Balanced" chunk
11758 *
11759 * Returns the resulting document tree
11760 */
11761
11762xmlDocPtr
11763xmlParseEntity(const char *filename) {
11764 return(xmlSAXParseEntity(NULL, filename));
11765}
Daniel Veillard81273902003-09-30 00:43:48 +000011766#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011767
11768/**
11769 * xmlCreateEntityParserCtxt:
11770 * @URL: the entity URL
11771 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011772 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011773 *
11774 * Create a parser context for an external entity
11775 * Automatic support for ZLIB/Compress compressed document is provided
11776 * by default if found at compile-time.
11777 *
11778 * Returns the new parser context or NULL
11779 */
11780xmlParserCtxtPtr
11781xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11782 const xmlChar *base) {
11783 xmlParserCtxtPtr ctxt;
11784 xmlParserInputPtr inputStream;
11785 char *directory = NULL;
11786 xmlChar *uri;
11787
11788 ctxt = xmlNewParserCtxt();
11789 if (ctxt == NULL) {
11790 return(NULL);
11791 }
11792
11793 uri = xmlBuildURI(URL, base);
11794
11795 if (uri == NULL) {
11796 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11797 if (inputStream == NULL) {
11798 xmlFreeParserCtxt(ctxt);
11799 return(NULL);
11800 }
11801
11802 inputPush(ctxt, inputStream);
11803
11804 if ((ctxt->directory == NULL) && (directory == NULL))
11805 directory = xmlParserGetDirectory((char *)URL);
11806 if ((ctxt->directory == NULL) && (directory != NULL))
11807 ctxt->directory = directory;
11808 } else {
11809 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11810 if (inputStream == NULL) {
11811 xmlFree(uri);
11812 xmlFreeParserCtxt(ctxt);
11813 return(NULL);
11814 }
11815
11816 inputPush(ctxt, inputStream);
11817
11818 if ((ctxt->directory == NULL) && (directory == NULL))
11819 directory = xmlParserGetDirectory((char *)uri);
11820 if ((ctxt->directory == NULL) && (directory != NULL))
11821 ctxt->directory = directory;
11822 xmlFree(uri);
11823 }
Owen Taylor3473f882001-02-23 17:55:21 +000011824 return(ctxt);
11825}
11826
11827/************************************************************************
11828 * *
11829 * Front ends when parsing from a file *
11830 * *
11831 ************************************************************************/
11832
11833/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011834 * xmlCreateURLParserCtxt:
11835 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011836 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011837 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011838 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011839 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011840 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011841 *
11842 * Returns the new parser context or NULL
11843 */
11844xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011845xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011846{
11847 xmlParserCtxtPtr ctxt;
11848 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011849 char *directory = NULL;
11850
Owen Taylor3473f882001-02-23 17:55:21 +000011851 ctxt = xmlNewParserCtxt();
11852 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011853 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011854 return(NULL);
11855 }
11856
Daniel Veillarddf292f72005-01-16 19:00:15 +000011857 if (options)
11858 xmlCtxtUseOptions(ctxt, options);
11859 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011860
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011861 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011862 if (inputStream == NULL) {
11863 xmlFreeParserCtxt(ctxt);
11864 return(NULL);
11865 }
11866
Owen Taylor3473f882001-02-23 17:55:21 +000011867 inputPush(ctxt, inputStream);
11868 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011869 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011870 if ((ctxt->directory == NULL) && (directory != NULL))
11871 ctxt->directory = directory;
11872
11873 return(ctxt);
11874}
11875
Daniel Veillard61b93382003-11-03 14:28:31 +000011876/**
11877 * xmlCreateFileParserCtxt:
11878 * @filename: the filename
11879 *
11880 * Create a parser context for a file content.
11881 * Automatic support for ZLIB/Compress compressed document is provided
11882 * by default if found at compile-time.
11883 *
11884 * Returns the new parser context or NULL
11885 */
11886xmlParserCtxtPtr
11887xmlCreateFileParserCtxt(const char *filename)
11888{
11889 return(xmlCreateURLParserCtxt(filename, 0));
11890}
11891
Daniel Veillard81273902003-09-30 00:43:48 +000011892#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011893/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011894 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011895 * @sax: the SAX handler block
11896 * @filename: the filename
11897 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11898 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011899 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011900 *
11901 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11902 * compressed document is provided by default if found at compile-time.
11903 * It use the given SAX function block to handle the parsing callback.
11904 * If sax is NULL, fallback to the default DOM tree building routines.
11905 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011906 * User data (void *) is stored within the parser context in the
11907 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011908 *
Owen Taylor3473f882001-02-23 17:55:21 +000011909 * Returns the resulting document tree
11910 */
11911
11912xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011913xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11914 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011915 xmlDocPtr ret;
11916 xmlParserCtxtPtr ctxt;
11917 char *directory = NULL;
11918
Daniel Veillard635ef722001-10-29 11:48:19 +000011919 xmlInitParser();
11920
Owen Taylor3473f882001-02-23 17:55:21 +000011921 ctxt = xmlCreateFileParserCtxt(filename);
11922 if (ctxt == NULL) {
11923 return(NULL);
11924 }
11925 if (sax != NULL) {
11926 if (ctxt->sax != NULL)
11927 xmlFree(ctxt->sax);
11928 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011929 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011930 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011931 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011932 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011933 }
Owen Taylor3473f882001-02-23 17:55:21 +000011934
11935 if ((ctxt->directory == NULL) && (directory == NULL))
11936 directory = xmlParserGetDirectory(filename);
11937 if ((ctxt->directory == NULL) && (directory != NULL))
11938 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11939
Daniel Veillarddad3f682002-11-17 16:47:27 +000011940 ctxt->recovery = recovery;
11941
Owen Taylor3473f882001-02-23 17:55:21 +000011942 xmlParseDocument(ctxt);
11943
William M. Brackc07329e2003-09-08 01:57:30 +000011944 if ((ctxt->wellFormed) || recovery) {
11945 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011946 if (ret != NULL) {
11947 if (ctxt->input->buf->compressed > 0)
11948 ret->compression = 9;
11949 else
11950 ret->compression = ctxt->input->buf->compressed;
11951 }
William M. Brackc07329e2003-09-08 01:57:30 +000011952 }
Owen Taylor3473f882001-02-23 17:55:21 +000011953 else {
11954 ret = NULL;
11955 xmlFreeDoc(ctxt->myDoc);
11956 ctxt->myDoc = NULL;
11957 }
11958 if (sax != NULL)
11959 ctxt->sax = NULL;
11960 xmlFreeParserCtxt(ctxt);
11961
11962 return(ret);
11963}
11964
11965/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011966 * xmlSAXParseFile:
11967 * @sax: the SAX handler block
11968 * @filename: the filename
11969 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11970 * documents
11971 *
11972 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11973 * compressed document is provided by default if found at compile-time.
11974 * It use the given SAX function block to handle the parsing callback.
11975 * If sax is NULL, fallback to the default DOM tree building routines.
11976 *
11977 * Returns the resulting document tree
11978 */
11979
11980xmlDocPtr
11981xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11982 int recovery) {
11983 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11984}
11985
11986/**
Owen Taylor3473f882001-02-23 17:55:21 +000011987 * xmlRecoverDoc:
11988 * @cur: a pointer to an array of xmlChar
11989 *
11990 * parse an XML in-memory document and build a tree.
11991 * In the case the document is not Well Formed, a tree is built anyway
11992 *
11993 * Returns the resulting document tree
11994 */
11995
11996xmlDocPtr
11997xmlRecoverDoc(xmlChar *cur) {
11998 return(xmlSAXParseDoc(NULL, cur, 1));
11999}
12000
12001/**
12002 * xmlParseFile:
12003 * @filename: the filename
12004 *
12005 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12006 * compressed document is provided by default if found at compile-time.
12007 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012008 * Returns the resulting document tree if the file was wellformed,
12009 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012010 */
12011
12012xmlDocPtr
12013xmlParseFile(const char *filename) {
12014 return(xmlSAXParseFile(NULL, filename, 0));
12015}
12016
12017/**
12018 * xmlRecoverFile:
12019 * @filename: the filename
12020 *
12021 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12022 * compressed document is provided by default if found at compile-time.
12023 * In the case the document is not Well Formed, a tree is built anyway
12024 *
12025 * Returns the resulting document tree
12026 */
12027
12028xmlDocPtr
12029xmlRecoverFile(const char *filename) {
12030 return(xmlSAXParseFile(NULL, filename, 1));
12031}
12032
12033
12034/**
12035 * xmlSetupParserForBuffer:
12036 * @ctxt: an XML parser context
12037 * @buffer: a xmlChar * buffer
12038 * @filename: a file name
12039 *
12040 * Setup the parser context to parse a new buffer; Clears any prior
12041 * contents from the parser context. The buffer parameter must not be
12042 * NULL, but the filename parameter can be
12043 */
12044void
12045xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12046 const char* filename)
12047{
12048 xmlParserInputPtr input;
12049
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012050 if ((ctxt == NULL) || (buffer == NULL))
12051 return;
12052
Owen Taylor3473f882001-02-23 17:55:21 +000012053 input = xmlNewInputStream(ctxt);
12054 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012055 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012056 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012057 return;
12058 }
12059
12060 xmlClearParserCtxt(ctxt);
12061 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012062 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012063 input->base = buffer;
12064 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012065 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012066 inputPush(ctxt, input);
12067}
12068
12069/**
12070 * xmlSAXUserParseFile:
12071 * @sax: a SAX handler
12072 * @user_data: The user data returned on SAX callbacks
12073 * @filename: a file name
12074 *
12075 * parse an XML file and call the given SAX handler routines.
12076 * Automatic support for ZLIB/Compress compressed document is provided
12077 *
12078 * Returns 0 in case of success or a error number otherwise
12079 */
12080int
12081xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12082 const char *filename) {
12083 int ret = 0;
12084 xmlParserCtxtPtr ctxt;
12085
12086 ctxt = xmlCreateFileParserCtxt(filename);
12087 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012088#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012089 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012090#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012091 xmlFree(ctxt->sax);
12092 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012093 xmlDetectSAX2(ctxt);
12094
Owen Taylor3473f882001-02-23 17:55:21 +000012095 if (user_data != NULL)
12096 ctxt->userData = user_data;
12097
12098 xmlParseDocument(ctxt);
12099
12100 if (ctxt->wellFormed)
12101 ret = 0;
12102 else {
12103 if (ctxt->errNo != 0)
12104 ret = ctxt->errNo;
12105 else
12106 ret = -1;
12107 }
12108 if (sax != NULL)
12109 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012110 if (ctxt->myDoc != NULL) {
12111 xmlFreeDoc(ctxt->myDoc);
12112 ctxt->myDoc = NULL;
12113 }
Owen Taylor3473f882001-02-23 17:55:21 +000012114 xmlFreeParserCtxt(ctxt);
12115
12116 return ret;
12117}
Daniel Veillard81273902003-09-30 00:43:48 +000012118#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012119
12120/************************************************************************
12121 * *
12122 * Front ends when parsing from memory *
12123 * *
12124 ************************************************************************/
12125
12126/**
12127 * xmlCreateMemoryParserCtxt:
12128 * @buffer: a pointer to a char array
12129 * @size: the size of the array
12130 *
12131 * Create a parser context for an XML in-memory document.
12132 *
12133 * Returns the new parser context or NULL
12134 */
12135xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012136xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012137 xmlParserCtxtPtr ctxt;
12138 xmlParserInputPtr input;
12139 xmlParserInputBufferPtr buf;
12140
12141 if (buffer == NULL)
12142 return(NULL);
12143 if (size <= 0)
12144 return(NULL);
12145
12146 ctxt = xmlNewParserCtxt();
12147 if (ctxt == NULL)
12148 return(NULL);
12149
Daniel Veillard53350552003-09-18 13:35:51 +000012150 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012151 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012152 if (buf == NULL) {
12153 xmlFreeParserCtxt(ctxt);
12154 return(NULL);
12155 }
Owen Taylor3473f882001-02-23 17:55:21 +000012156
12157 input = xmlNewInputStream(ctxt);
12158 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012159 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012160 xmlFreeParserCtxt(ctxt);
12161 return(NULL);
12162 }
12163
12164 input->filename = NULL;
12165 input->buf = buf;
12166 input->base = input->buf->buffer->content;
12167 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012168 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012169
12170 inputPush(ctxt, input);
12171 return(ctxt);
12172}
12173
Daniel Veillard81273902003-09-30 00:43:48 +000012174#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012175/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012176 * xmlSAXParseMemoryWithData:
12177 * @sax: the SAX handler block
12178 * @buffer: an pointer to a char array
12179 * @size: the size of the array
12180 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12181 * documents
12182 * @data: the userdata
12183 *
12184 * parse an XML in-memory block and use the given SAX function block
12185 * to handle the parsing callback. If sax is NULL, fallback to the default
12186 * DOM tree building routines.
12187 *
12188 * User data (void *) is stored within the parser context in the
12189 * context's _private member, so it is available nearly everywhere in libxml
12190 *
12191 * Returns the resulting document tree
12192 */
12193
12194xmlDocPtr
12195xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12196 int size, int recovery, void *data) {
12197 xmlDocPtr ret;
12198 xmlParserCtxtPtr ctxt;
12199
12200 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12201 if (ctxt == NULL) return(NULL);
12202 if (sax != NULL) {
12203 if (ctxt->sax != NULL)
12204 xmlFree(ctxt->sax);
12205 ctxt->sax = sax;
12206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012207 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012208 if (data!=NULL) {
12209 ctxt->_private=data;
12210 }
12211
Daniel Veillardadba5f12003-04-04 16:09:01 +000012212 ctxt->recovery = recovery;
12213
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012214 xmlParseDocument(ctxt);
12215
12216 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12217 else {
12218 ret = NULL;
12219 xmlFreeDoc(ctxt->myDoc);
12220 ctxt->myDoc = NULL;
12221 }
12222 if (sax != NULL)
12223 ctxt->sax = NULL;
12224 xmlFreeParserCtxt(ctxt);
12225
12226 return(ret);
12227}
12228
12229/**
Owen Taylor3473f882001-02-23 17:55:21 +000012230 * xmlSAXParseMemory:
12231 * @sax: the SAX handler block
12232 * @buffer: an pointer to a char array
12233 * @size: the size of the array
12234 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12235 * documents
12236 *
12237 * parse an XML in-memory block and use the given SAX function block
12238 * to handle the parsing callback. If sax is NULL, fallback to the default
12239 * DOM tree building routines.
12240 *
12241 * Returns the resulting document tree
12242 */
12243xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012244xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12245 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012246 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012247}
12248
12249/**
12250 * xmlParseMemory:
12251 * @buffer: an pointer to a char array
12252 * @size: the size of the array
12253 *
12254 * parse an XML in-memory block and build a tree.
12255 *
12256 * Returns the resulting document tree
12257 */
12258
Daniel Veillard50822cb2001-07-26 20:05:51 +000012259xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012260 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12261}
12262
12263/**
12264 * xmlRecoverMemory:
12265 * @buffer: an pointer to a char array
12266 * @size: the size of the array
12267 *
12268 * parse an XML in-memory block and build a tree.
12269 * In the case the document is not Well Formed, a tree is built anyway
12270 *
12271 * Returns the resulting document tree
12272 */
12273
Daniel Veillard50822cb2001-07-26 20:05:51 +000012274xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012275 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12276}
12277
12278/**
12279 * xmlSAXUserParseMemory:
12280 * @sax: a SAX handler
12281 * @user_data: The user data returned on SAX callbacks
12282 * @buffer: an in-memory XML document input
12283 * @size: the length of the XML document in bytes
12284 *
12285 * A better SAX parsing routine.
12286 * parse an XML in-memory buffer and call the given SAX handler routines.
12287 *
12288 * Returns 0 in case of success or a error number otherwise
12289 */
12290int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012291 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012292 int ret = 0;
12293 xmlParserCtxtPtr ctxt;
12294 xmlSAXHandlerPtr oldsax = NULL;
12295
Daniel Veillard9e923512002-08-14 08:48:52 +000012296 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012297 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12298 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012299 oldsax = ctxt->sax;
12300 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012301 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012302 if (user_data != NULL)
12303 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012304
12305 xmlParseDocument(ctxt);
12306
12307 if (ctxt->wellFormed)
12308 ret = 0;
12309 else {
12310 if (ctxt->errNo != 0)
12311 ret = ctxt->errNo;
12312 else
12313 ret = -1;
12314 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012315 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012316 if (ctxt->myDoc != NULL) {
12317 xmlFreeDoc(ctxt->myDoc);
12318 ctxt->myDoc = NULL;
12319 }
Owen Taylor3473f882001-02-23 17:55:21 +000012320 xmlFreeParserCtxt(ctxt);
12321
12322 return ret;
12323}
Daniel Veillard81273902003-09-30 00:43:48 +000012324#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012325
12326/**
12327 * xmlCreateDocParserCtxt:
12328 * @cur: a pointer to an array of xmlChar
12329 *
12330 * Creates a parser context for an XML in-memory document.
12331 *
12332 * Returns the new parser context or NULL
12333 */
12334xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012335xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012336 int len;
12337
12338 if (cur == NULL)
12339 return(NULL);
12340 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012341 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012342}
12343
Daniel Veillard81273902003-09-30 00:43:48 +000012344#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012345/**
12346 * xmlSAXParseDoc:
12347 * @sax: the SAX handler block
12348 * @cur: a pointer to an array of xmlChar
12349 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12350 * documents
12351 *
12352 * parse an XML in-memory document and build a tree.
12353 * It use the given SAX function block to handle the parsing callback.
12354 * If sax is NULL, fallback to the default DOM tree building routines.
12355 *
12356 * Returns the resulting document tree
12357 */
12358
12359xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012360xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012361 xmlDocPtr ret;
12362 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012363 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012364
Daniel Veillard38936062004-11-04 17:45:11 +000012365 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012366
12367
12368 ctxt = xmlCreateDocParserCtxt(cur);
12369 if (ctxt == NULL) return(NULL);
12370 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012371 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012372 ctxt->sax = sax;
12373 ctxt->userData = NULL;
12374 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012375 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012376
12377 xmlParseDocument(ctxt);
12378 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12379 else {
12380 ret = NULL;
12381 xmlFreeDoc(ctxt->myDoc);
12382 ctxt->myDoc = NULL;
12383 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012384 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012385 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012386 xmlFreeParserCtxt(ctxt);
12387
12388 return(ret);
12389}
12390
12391/**
12392 * xmlParseDoc:
12393 * @cur: a pointer to an array of xmlChar
12394 *
12395 * parse an XML in-memory document and build a tree.
12396 *
12397 * Returns the resulting document tree
12398 */
12399
12400xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012401xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012402 return(xmlSAXParseDoc(NULL, cur, 0));
12403}
Daniel Veillard81273902003-09-30 00:43:48 +000012404#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012405
Daniel Veillard81273902003-09-30 00:43:48 +000012406#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012407/************************************************************************
12408 * *
12409 * Specific function to keep track of entities references *
12410 * and used by the XSLT debugger *
12411 * *
12412 ************************************************************************/
12413
12414static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12415
12416/**
12417 * xmlAddEntityReference:
12418 * @ent : A valid entity
12419 * @firstNode : A valid first node for children of entity
12420 * @lastNode : A valid last node of children entity
12421 *
12422 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12423 */
12424static void
12425xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12426 xmlNodePtr lastNode)
12427{
12428 if (xmlEntityRefFunc != NULL) {
12429 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12430 }
12431}
12432
12433
12434/**
12435 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012436 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012437 *
12438 * Set the function to call call back when a xml reference has been made
12439 */
12440void
12441xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12442{
12443 xmlEntityRefFunc = func;
12444}
Daniel Veillard81273902003-09-30 00:43:48 +000012445#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012446
12447/************************************************************************
12448 * *
12449 * Miscellaneous *
12450 * *
12451 ************************************************************************/
12452
12453#ifdef LIBXML_XPATH_ENABLED
12454#include <libxml/xpath.h>
12455#endif
12456
Daniel Veillardffa3c742005-07-21 13:24:09 +000012457extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012458static int xmlParserInitialized = 0;
12459
12460/**
12461 * xmlInitParser:
12462 *
12463 * Initialization function for the XML parser.
12464 * This is not reentrant. Call once before processing in case of
12465 * use in multithreaded programs.
12466 */
12467
12468void
12469xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012470 if (xmlParserInitialized != 0)
12471 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012472
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012473 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12474 (xmlGenericError == NULL))
12475 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012476 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012477 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012478 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012479 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012480 xmlDefaultSAXHandlerInit();
12481 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012482#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012483 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012484#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012485#ifdef LIBXML_HTML_ENABLED
12486 htmlInitAutoClose();
12487 htmlDefaultSAXHandlerInit();
12488#endif
12489#ifdef LIBXML_XPATH_ENABLED
12490 xmlXPathInit();
12491#endif
12492 xmlParserInitialized = 1;
12493}
12494
12495/**
12496 * xmlCleanupParser:
12497 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012498 * Cleanup function for the XML library. It tries to reclaim all
12499 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012500 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012501 * function should not prevent reusing the library but one should
12502 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012503 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012504 */
12505
12506void
12507xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012508 if (!xmlParserInitialized)
12509 return;
12510
Owen Taylor3473f882001-02-23 17:55:21 +000012511 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012512#ifdef LIBXML_CATALOG_ENABLED
12513 xmlCatalogCleanup();
12514#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012515 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012516 xmlCleanupInputCallbacks();
12517#ifdef LIBXML_OUTPUT_ENABLED
12518 xmlCleanupOutputCallbacks();
12519#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012520#ifdef LIBXML_SCHEMAS_ENABLED
12521 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012522 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012523#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012524 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012525 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012526 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012527 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012528 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012529}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012530
12531/************************************************************************
12532 * *
12533 * New set (2.6.0) of simpler and more flexible APIs *
12534 * *
12535 ************************************************************************/
12536
12537/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012538 * DICT_FREE:
12539 * @str: a string
12540 *
12541 * Free a string if it is not owned by the "dict" dictionnary in the
12542 * current scope
12543 */
12544#define DICT_FREE(str) \
12545 if ((str) && ((!dict) || \
12546 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12547 xmlFree((char *)(str));
12548
12549/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012550 * xmlCtxtReset:
12551 * @ctxt: an XML parser context
12552 *
12553 * Reset a parser context
12554 */
12555void
12556xmlCtxtReset(xmlParserCtxtPtr ctxt)
12557{
12558 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012559 xmlDictPtr dict;
12560
12561 if (ctxt == NULL)
12562 return;
12563
12564 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012565
12566 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12567 xmlFreeInputStream(input);
12568 }
12569 ctxt->inputNr = 0;
12570 ctxt->input = NULL;
12571
12572 ctxt->spaceNr = 0;
12573 ctxt->spaceTab[0] = -1;
12574 ctxt->space = &ctxt->spaceTab[0];
12575
12576
12577 ctxt->nodeNr = 0;
12578 ctxt->node = NULL;
12579
12580 ctxt->nameNr = 0;
12581 ctxt->name = NULL;
12582
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012583 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012584 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012585 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012586 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012587 DICT_FREE(ctxt->directory);
12588 ctxt->directory = NULL;
12589 DICT_FREE(ctxt->extSubURI);
12590 ctxt->extSubURI = NULL;
12591 DICT_FREE(ctxt->extSubSystem);
12592 ctxt->extSubSystem = NULL;
12593 if (ctxt->myDoc != NULL)
12594 xmlFreeDoc(ctxt->myDoc);
12595 ctxt->myDoc = NULL;
12596
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012597 ctxt->standalone = -1;
12598 ctxt->hasExternalSubset = 0;
12599 ctxt->hasPErefs = 0;
12600 ctxt->html = 0;
12601 ctxt->external = 0;
12602 ctxt->instate = XML_PARSER_START;
12603 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012604
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605 ctxt->wellFormed = 1;
12606 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012607 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012608 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012609#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012610 ctxt->vctxt.userData = ctxt;
12611 ctxt->vctxt.error = xmlParserValidityError;
12612 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012613#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012614 ctxt->record_info = 0;
12615 ctxt->nbChars = 0;
12616 ctxt->checkIndex = 0;
12617 ctxt->inSubset = 0;
12618 ctxt->errNo = XML_ERR_OK;
12619 ctxt->depth = 0;
12620 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12621 ctxt->catalogs = NULL;
12622 xmlInitNodeInfoSeq(&ctxt->node_seq);
12623
12624 if (ctxt->attsDefault != NULL) {
12625 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12626 ctxt->attsDefault = NULL;
12627 }
12628 if (ctxt->attsSpecial != NULL) {
12629 xmlHashFree(ctxt->attsSpecial, NULL);
12630 ctxt->attsSpecial = NULL;
12631 }
12632
Daniel Veillard4432df22003-09-28 18:58:27 +000012633#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012634 if (ctxt->catalogs != NULL)
12635 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012636#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012637 if (ctxt->lastError.code != XML_ERR_OK)
12638 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012639}
12640
12641/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012642 * xmlCtxtResetPush:
12643 * @ctxt: an XML parser context
12644 * @chunk: a pointer to an array of chars
12645 * @size: number of chars in the array
12646 * @filename: an optional file name or URI
12647 * @encoding: the document encoding, or NULL
12648 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012649 * Reset a push parser context
12650 *
12651 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012652 */
12653int
12654xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12655 int size, const char *filename, const char *encoding)
12656{
12657 xmlParserInputPtr inputStream;
12658 xmlParserInputBufferPtr buf;
12659 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12660
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012661 if (ctxt == NULL)
12662 return(1);
12663
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012664 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12665 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12666
12667 buf = xmlAllocParserInputBuffer(enc);
12668 if (buf == NULL)
12669 return(1);
12670
12671 if (ctxt == NULL) {
12672 xmlFreeParserInputBuffer(buf);
12673 return(1);
12674 }
12675
12676 xmlCtxtReset(ctxt);
12677
12678 if (ctxt->pushTab == NULL) {
12679 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12680 sizeof(xmlChar *));
12681 if (ctxt->pushTab == NULL) {
12682 xmlErrMemory(ctxt, NULL);
12683 xmlFreeParserInputBuffer(buf);
12684 return(1);
12685 }
12686 }
12687
12688 if (filename == NULL) {
12689 ctxt->directory = NULL;
12690 } else {
12691 ctxt->directory = xmlParserGetDirectory(filename);
12692 }
12693
12694 inputStream = xmlNewInputStream(ctxt);
12695 if (inputStream == NULL) {
12696 xmlFreeParserInputBuffer(buf);
12697 return(1);
12698 }
12699
12700 if (filename == NULL)
12701 inputStream->filename = NULL;
12702 else
12703 inputStream->filename = (char *)
12704 xmlCanonicPath((const xmlChar *) filename);
12705 inputStream->buf = buf;
12706 inputStream->base = inputStream->buf->buffer->content;
12707 inputStream->cur = inputStream->buf->buffer->content;
12708 inputStream->end =
12709 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12710
12711 inputPush(ctxt, inputStream);
12712
12713 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12714 (ctxt->input->buf != NULL)) {
12715 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12716 int cur = ctxt->input->cur - ctxt->input->base;
12717
12718 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12719
12720 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12721 ctxt->input->cur = ctxt->input->base + cur;
12722 ctxt->input->end =
12723 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12724 use];
12725#ifdef DEBUG_PUSH
12726 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12727#endif
12728 }
12729
12730 if (encoding != NULL) {
12731 xmlCharEncodingHandlerPtr hdlr;
12732
12733 hdlr = xmlFindCharEncodingHandler(encoding);
12734 if (hdlr != NULL) {
12735 xmlSwitchToEncoding(ctxt, hdlr);
12736 } else {
12737 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12738 "Unsupported encoding %s\n", BAD_CAST encoding);
12739 }
12740 } else if (enc != XML_CHAR_ENCODING_NONE) {
12741 xmlSwitchEncoding(ctxt, enc);
12742 }
12743
12744 return(0);
12745}
12746
12747/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012748 * xmlCtxtUseOptions:
12749 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012750 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012751 *
12752 * Applies the options to the parser context
12753 *
12754 * Returns 0 in case of success, the set of unknown or unimplemented options
12755 * in case of error.
12756 */
12757int
12758xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12759{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012760 if (ctxt == NULL)
12761 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012762 if (options & XML_PARSE_RECOVER) {
12763 ctxt->recovery = 1;
12764 options -= XML_PARSE_RECOVER;
12765 } else
12766 ctxt->recovery = 0;
12767 if (options & XML_PARSE_DTDLOAD) {
12768 ctxt->loadsubset = XML_DETECT_IDS;
12769 options -= XML_PARSE_DTDLOAD;
12770 } else
12771 ctxt->loadsubset = 0;
12772 if (options & XML_PARSE_DTDATTR) {
12773 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12774 options -= XML_PARSE_DTDATTR;
12775 }
12776 if (options & XML_PARSE_NOENT) {
12777 ctxt->replaceEntities = 1;
12778 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12779 options -= XML_PARSE_NOENT;
12780 } else
12781 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012782 if (options & XML_PARSE_PEDANTIC) {
12783 ctxt->pedantic = 1;
12784 options -= XML_PARSE_PEDANTIC;
12785 } else
12786 ctxt->pedantic = 0;
12787 if (options & XML_PARSE_NOBLANKS) {
12788 ctxt->keepBlanks = 0;
12789 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12790 options -= XML_PARSE_NOBLANKS;
12791 } else
12792 ctxt->keepBlanks = 1;
12793 if (options & XML_PARSE_DTDVALID) {
12794 ctxt->validate = 1;
12795 if (options & XML_PARSE_NOWARNING)
12796 ctxt->vctxt.warning = NULL;
12797 if (options & XML_PARSE_NOERROR)
12798 ctxt->vctxt.error = NULL;
12799 options -= XML_PARSE_DTDVALID;
12800 } else
12801 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000012802 if (options & XML_PARSE_NOWARNING) {
12803 ctxt->sax->warning = NULL;
12804 options -= XML_PARSE_NOWARNING;
12805 }
12806 if (options & XML_PARSE_NOERROR) {
12807 ctxt->sax->error = NULL;
12808 ctxt->sax->fatalError = NULL;
12809 options -= XML_PARSE_NOERROR;
12810 }
Daniel Veillard81273902003-09-30 00:43:48 +000012811#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012812 if (options & XML_PARSE_SAX1) {
12813 ctxt->sax->startElement = xmlSAX2StartElement;
12814 ctxt->sax->endElement = xmlSAX2EndElement;
12815 ctxt->sax->startElementNs = NULL;
12816 ctxt->sax->endElementNs = NULL;
12817 ctxt->sax->initialized = 1;
12818 options -= XML_PARSE_SAX1;
12819 }
Daniel Veillard81273902003-09-30 00:43:48 +000012820#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012821 if (options & XML_PARSE_NODICT) {
12822 ctxt->dictNames = 0;
12823 options -= XML_PARSE_NODICT;
12824 } else {
12825 ctxt->dictNames = 1;
12826 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012827 if (options & XML_PARSE_NOCDATA) {
12828 ctxt->sax->cdataBlock = NULL;
12829 options -= XML_PARSE_NOCDATA;
12830 }
12831 if (options & XML_PARSE_NSCLEAN) {
12832 ctxt->options |= XML_PARSE_NSCLEAN;
12833 options -= XML_PARSE_NSCLEAN;
12834 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012835 if (options & XML_PARSE_NONET) {
12836 ctxt->options |= XML_PARSE_NONET;
12837 options -= XML_PARSE_NONET;
12838 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012839 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012840 return (options);
12841}
12842
12843/**
12844 * xmlDoRead:
12845 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012846 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012847 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012848 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012849 * @reuse: keep the context for reuse
12850 *
12851 * Common front-end for the xmlRead functions
12852 *
12853 * Returns the resulting document tree or NULL
12854 */
12855static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012856xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12857 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012858{
12859 xmlDocPtr ret;
12860
12861 xmlCtxtUseOptions(ctxt, options);
12862 if (encoding != NULL) {
12863 xmlCharEncodingHandlerPtr hdlr;
12864
12865 hdlr = xmlFindCharEncodingHandler(encoding);
12866 if (hdlr != NULL)
12867 xmlSwitchToEncoding(ctxt, hdlr);
12868 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012869 if ((URL != NULL) && (ctxt->input != NULL) &&
12870 (ctxt->input->filename == NULL))
12871 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012872 xmlParseDocument(ctxt);
12873 if ((ctxt->wellFormed) || ctxt->recovery)
12874 ret = ctxt->myDoc;
12875 else {
12876 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012877 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012878 xmlFreeDoc(ctxt->myDoc);
12879 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012880 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012881 ctxt->myDoc = NULL;
12882 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012883 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012884 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012885
12886 return (ret);
12887}
12888
12889/**
12890 * xmlReadDoc:
12891 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012892 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012893 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012894 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012895 *
12896 * parse an XML in-memory document and build a tree.
12897 *
12898 * Returns the resulting document tree
12899 */
12900xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012901xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012902{
12903 xmlParserCtxtPtr ctxt;
12904
12905 if (cur == NULL)
12906 return (NULL);
12907
12908 ctxt = xmlCreateDocParserCtxt(cur);
12909 if (ctxt == NULL)
12910 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012911 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012912}
12913
12914/**
12915 * xmlReadFile:
12916 * @filename: a file or URL
12917 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012918 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012919 *
12920 * parse an XML file from the filesystem or the network.
12921 *
12922 * Returns the resulting document tree
12923 */
12924xmlDocPtr
12925xmlReadFile(const char *filename, const char *encoding, int options)
12926{
12927 xmlParserCtxtPtr ctxt;
12928
Daniel Veillard61b93382003-11-03 14:28:31 +000012929 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012930 if (ctxt == NULL)
12931 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012932 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012933}
12934
12935/**
12936 * xmlReadMemory:
12937 * @buffer: a pointer to a char array
12938 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012939 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012940 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012941 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012942 *
12943 * parse an XML in-memory document and build a tree.
12944 *
12945 * Returns the resulting document tree
12946 */
12947xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012948xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012949{
12950 xmlParserCtxtPtr ctxt;
12951
12952 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12953 if (ctxt == NULL)
12954 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012955 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012956}
12957
12958/**
12959 * xmlReadFd:
12960 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012961 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012962 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012963 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012964 *
12965 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012966 * NOTE that the file descriptor will not be closed when the
12967 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012968 *
12969 * Returns the resulting document tree
12970 */
12971xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012972xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973{
12974 xmlParserCtxtPtr ctxt;
12975 xmlParserInputBufferPtr input;
12976 xmlParserInputPtr stream;
12977
12978 if (fd < 0)
12979 return (NULL);
12980
12981 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12982 if (input == NULL)
12983 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012984 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012985 ctxt = xmlNewParserCtxt();
12986 if (ctxt == NULL) {
12987 xmlFreeParserInputBuffer(input);
12988 return (NULL);
12989 }
12990 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12991 if (stream == NULL) {
12992 xmlFreeParserInputBuffer(input);
12993 xmlFreeParserCtxt(ctxt);
12994 return (NULL);
12995 }
12996 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012997 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998}
12999
13000/**
13001 * xmlReadIO:
13002 * @ioread: an I/O read function
13003 * @ioclose: an I/O close function
13004 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013005 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013007 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 *
13009 * parse an XML document from I/O functions and source and build a tree.
13010 *
13011 * Returns the resulting document tree
13012 */
13013xmlDocPtr
13014xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013015 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013016{
13017 xmlParserCtxtPtr ctxt;
13018 xmlParserInputBufferPtr input;
13019 xmlParserInputPtr stream;
13020
13021 if (ioread == NULL)
13022 return (NULL);
13023
13024 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13025 XML_CHAR_ENCODING_NONE);
13026 if (input == NULL)
13027 return (NULL);
13028 ctxt = xmlNewParserCtxt();
13029 if (ctxt == NULL) {
13030 xmlFreeParserInputBuffer(input);
13031 return (NULL);
13032 }
13033 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13034 if (stream == NULL) {
13035 xmlFreeParserInputBuffer(input);
13036 xmlFreeParserCtxt(ctxt);
13037 return (NULL);
13038 }
13039 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013040 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013041}
13042
13043/**
13044 * xmlCtxtReadDoc:
13045 * @ctxt: an XML parser context
13046 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013047 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013049 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013050 *
13051 * parse an XML in-memory document and build a tree.
13052 * This reuses the existing @ctxt parser context
13053 *
13054 * Returns the resulting document tree
13055 */
13056xmlDocPtr
13057xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013058 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013059{
13060 xmlParserInputPtr stream;
13061
13062 if (cur == NULL)
13063 return (NULL);
13064 if (ctxt == NULL)
13065 return (NULL);
13066
13067 xmlCtxtReset(ctxt);
13068
13069 stream = xmlNewStringInputStream(ctxt, cur);
13070 if (stream == NULL) {
13071 return (NULL);
13072 }
13073 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013074 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013075}
13076
13077/**
13078 * xmlCtxtReadFile:
13079 * @ctxt: an XML parser context
13080 * @filename: a file or URL
13081 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013082 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013083 *
13084 * parse an XML file from the filesystem or the network.
13085 * This reuses the existing @ctxt parser context
13086 *
13087 * Returns the resulting document tree
13088 */
13089xmlDocPtr
13090xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13091 const char *encoding, int options)
13092{
13093 xmlParserInputPtr stream;
13094
13095 if (filename == NULL)
13096 return (NULL);
13097 if (ctxt == NULL)
13098 return (NULL);
13099
13100 xmlCtxtReset(ctxt);
13101
Daniel Veillard29614c72004-11-26 10:47:26 +000013102 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013103 if (stream == NULL) {
13104 return (NULL);
13105 }
13106 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013107 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013108}
13109
13110/**
13111 * xmlCtxtReadMemory:
13112 * @ctxt: an XML parser context
13113 * @buffer: a pointer to a char array
13114 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013115 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013116 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013117 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013118 *
13119 * parse an XML in-memory document and build a tree.
13120 * This reuses the existing @ctxt parser context
13121 *
13122 * Returns the resulting document tree
13123 */
13124xmlDocPtr
13125xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013126 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013127{
13128 xmlParserInputBufferPtr input;
13129 xmlParserInputPtr stream;
13130
13131 if (ctxt == NULL)
13132 return (NULL);
13133 if (buffer == NULL)
13134 return (NULL);
13135
13136 xmlCtxtReset(ctxt);
13137
13138 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13139 if (input == NULL) {
13140 return(NULL);
13141 }
13142
13143 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13144 if (stream == NULL) {
13145 xmlFreeParserInputBuffer(input);
13146 return(NULL);
13147 }
13148
13149 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013150 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013151}
13152
13153/**
13154 * xmlCtxtReadFd:
13155 * @ctxt: an XML parser context
13156 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013157 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013158 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013159 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013160 *
13161 * parse an XML from a file descriptor and build a tree.
13162 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013163 * NOTE that the file descriptor will not be closed when the
13164 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013165 *
13166 * Returns the resulting document tree
13167 */
13168xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013169xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13170 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013171{
13172 xmlParserInputBufferPtr input;
13173 xmlParserInputPtr stream;
13174
13175 if (fd < 0)
13176 return (NULL);
13177 if (ctxt == NULL)
13178 return (NULL);
13179
13180 xmlCtxtReset(ctxt);
13181
13182
13183 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13184 if (input == NULL)
13185 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013186 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013187 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13188 if (stream == NULL) {
13189 xmlFreeParserInputBuffer(input);
13190 return (NULL);
13191 }
13192 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013193 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013194}
13195
13196/**
13197 * xmlCtxtReadIO:
13198 * @ctxt: an XML parser context
13199 * @ioread: an I/O read function
13200 * @ioclose: an I/O close function
13201 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013202 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013203 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013204 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013205 *
13206 * parse an XML document from I/O functions and source and build a tree.
13207 * This reuses the existing @ctxt parser context
13208 *
13209 * Returns the resulting document tree
13210 */
13211xmlDocPtr
13212xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13213 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013214 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013215 const char *encoding, int options)
13216{
13217 xmlParserInputBufferPtr input;
13218 xmlParserInputPtr stream;
13219
13220 if (ioread == NULL)
13221 return (NULL);
13222 if (ctxt == NULL)
13223 return (NULL);
13224
13225 xmlCtxtReset(ctxt);
13226
13227 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13228 XML_CHAR_ENCODING_NONE);
13229 if (input == NULL)
13230 return (NULL);
13231 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13232 if (stream == NULL) {
13233 xmlFreeParserInputBuffer(input);
13234 return (NULL);
13235 }
13236 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013237 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013238}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013239
13240#define bottom_parser
13241#include "elfgcchack.h"