blob: 4c996dd30e8c3b8a4fc093f28e10e4428f4ed126 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003458 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003459 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003460 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003461 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 goto get_more_space;
3463 }
3464 if (*in == '<') {
3465 nbchar = in - ctxt->input->cur;
3466 if (nbchar > 0) {
3467 const xmlChar *tmp = ctxt->input->cur;
3468 ctxt->input->cur = in;
3469
Daniel Veillard34099b42004-11-04 17:34:35 +00003470 if ((ctxt->sax != NULL) &&
3471 (ctxt->sax->ignorableWhitespace !=
3472 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003473 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003474 if (ctxt->sax->ignorableWhitespace != NULL)
3475 ctxt->sax->ignorableWhitespace(ctxt->userData,
3476 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003477 } else if (ctxt->sax->characters != NULL)
3478 ctxt->sax->characters(ctxt->userData,
3479 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003480 } else if ((ctxt->sax != NULL) &&
3481 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003482 ctxt->sax->characters(ctxt->userData,
3483 tmp, nbchar);
3484 }
3485 }
3486 return;
3487 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003488
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003489get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003491 while (test_char_data[*in]) {
3492 in++;
3493 ccol++;
3494 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003495 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003496 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003497 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003498 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003499 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003500 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003501 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003502 }
3503 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003504 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003506 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003507 return;
3508 }
3509 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003510 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 goto get_more;
3512 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003513 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003514 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003515 if ((ctxt->sax != NULL) &&
3516 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003517 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003518 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003519 const xmlChar *tmp = ctxt->input->cur;
3520 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003522 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003523 if (ctxt->sax->ignorableWhitespace != NULL)
3524 ctxt->sax->ignorableWhitespace(ctxt->userData,
3525 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003526 } else if (ctxt->sax->characters != NULL)
3527 ctxt->sax->characters(ctxt->userData,
3528 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003529 line = ctxt->input->line;
3530 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003531 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003532 if (ctxt->sax->characters != NULL)
3533 ctxt->sax->characters(ctxt->userData,
3534 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003535 line = ctxt->input->line;
3536 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003537 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003538 }
3539 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003540 if (*in == 0xD) {
3541 in++;
3542 if (*in == 0xA) {
3543 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003544 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003545 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003546 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003547 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 in--;
3549 }
3550 if (*in == '<') {
3551 return;
3552 }
3553 if (*in == '&') {
3554 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003555 }
3556 SHRINK;
3557 GROW;
3558 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003559 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003560 nbchar = 0;
3561 }
Daniel Veillard50582112001-03-26 22:52:16 +00003562 ctxt->input->line = line;
3563 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003564 xmlParseCharDataComplex(ctxt, cdata);
3565}
3566
Daniel Veillard01c13b52002-12-10 15:19:08 +00003567/**
3568 * xmlParseCharDataComplex:
3569 * @ctxt: an XML parser context
3570 * @cdata: int indicating whether we are within a CDATA section
3571 *
3572 * parse a CharData section.this is the fallback function
3573 * of xmlParseCharData() when the parsing requires handling
3574 * of non-ASCII characters.
3575 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003576void
3577xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3579 int nbchar = 0;
3580 int cur, l;
3581 int count = 0;
3582
3583 SHRINK;
3584 GROW;
3585 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003586 while ((cur != '<') && /* checked */
3587 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003588 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 if ((cur == ']') && (NXT(1) == ']') &&
3590 (NXT(2) == '>')) {
3591 if (cdata) break;
3592 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003593 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003594 }
3595 }
3596 COPY_BUF(l,buf,nbchar,cur);
3597 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003598 buf[nbchar] = 0;
3599
Owen Taylor3473f882001-02-23 17:55:21 +00003600 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003601 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003602 */
3603 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003604 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003605 if (ctxt->sax->ignorableWhitespace != NULL)
3606 ctxt->sax->ignorableWhitespace(ctxt->userData,
3607 buf, nbchar);
3608 } else {
3609 if (ctxt->sax->characters != NULL)
3610 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3611 }
3612 }
3613 nbchar = 0;
3614 }
3615 count++;
3616 if (count > 50) {
3617 GROW;
3618 count = 0;
3619 }
3620 NEXTL(l);
3621 cur = CUR_CHAR(l);
3622 }
3623 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003624 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003626 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003627 */
3628 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003629 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (ctxt->sax->ignorableWhitespace != NULL)
3631 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3632 } else {
3633 if (ctxt->sax->characters != NULL)
3634 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3635 }
3636 }
3637 }
3638}
3639
3640/**
3641 * xmlParseExternalID:
3642 * @ctxt: an XML parser context
3643 * @publicID: a xmlChar** receiving PubidLiteral
3644 * @strict: indicate whether we should restrict parsing to only
3645 * production [75], see NOTE below
3646 *
3647 * Parse an External ID or a Public ID
3648 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003649 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003650 * 'PUBLIC' S PubidLiteral S SystemLiteral
3651 *
3652 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3653 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3654 *
3655 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3656 *
3657 * Returns the function returns SystemLiteral and in the second
3658 * case publicID receives PubidLiteral, is strict is off
3659 * it is possible to return NULL and have publicID set.
3660 */
3661
3662xmlChar *
3663xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3664 xmlChar *URI = NULL;
3665
3666 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003667
3668 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003669 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003670 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003671 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3673 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 SKIP_BLANKS;
3676 URI = xmlParseSystemLiteral(ctxt);
3677 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003678 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003679 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003680 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003681 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003682 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003684 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 SKIP_BLANKS;
3687 *publicID = xmlParsePubidLiteral(ctxt);
3688 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003689 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003690 }
3691 if (strict) {
3692 /*
3693 * We don't handle [83] so "S SystemLiteral" is required.
3694 */
William M. Brack76e95df2003-10-18 16:20:14 +00003695 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003697 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 } else {
3700 /*
3701 * We handle [83] so we return immediately, if
3702 * "S SystemLiteral" is not detected. From a purely parsing
3703 * point of view that's a nice mess.
3704 */
3705 const xmlChar *ptr;
3706 GROW;
3707
3708 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003709 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003710
William M. Brack76e95df2003-10-18 16:20:14 +00003711 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003712 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3713 }
3714 SKIP_BLANKS;
3715 URI = xmlParseSystemLiteral(ctxt);
3716 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003717 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 }
3719 }
3720 return(URI);
3721}
3722
3723/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003724 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003725 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003726 * @buf: the already parsed part of the buffer
3727 * @len: number of bytes filles in the buffer
3728 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003729 *
3730 * Skip an XML (SGML) comment <!-- .... -->
3731 * The spec says that "For compatibility, the string "--" (double-hyphen)
3732 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003733 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003734 *
3735 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3736 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003737static void
3738xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003739 int q, ql;
3740 int r, rl;
3741 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 xmlParserInputPtr input = ctxt->input;
3743 int count = 0;
3744
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003746 len = 0;
3747 size = XML_PARSER_BUFFER_SIZE;
3748 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3749 if (buf == NULL) {
3750 xmlErrMemory(ctxt, NULL);
3751 return;
3752 }
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003755 if (q == 0)
3756 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003757 NEXTL(ql);
3758 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003759 if (r == 0)
3760 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 NEXTL(rl);
3762 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003763 if (cur == 0)
3764 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003765 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003766 ((cur != '>') ||
3767 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003768 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003769 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003770 }
3771 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003772 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003773 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003774 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3775 if (new_buf == NULL) {
3776 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 return;
3779 }
William M. Bracka3215c72004-07-31 16:24:01 +00003780 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 COPY_BUF(ql,buf,len,q);
3783 q = r;
3784 ql = rl;
3785 r = cur;
3786 rl = l;
3787
3788 count++;
3789 if (count > 50) {
3790 GROW;
3791 count = 0;
3792 }
3793 NEXTL(l);
3794 cur = CUR_CHAR(l);
3795 if (cur == 0) {
3796 SHRINK;
3797 GROW;
3798 cur = CUR_CHAR(l);
3799 }
3800 }
3801 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003802 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003803 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003804 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 xmlFree(buf);
3806 } else {
3807 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003808 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3809 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 NEXT;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3813 (!ctxt->disableSAX))
3814 ctxt->sax->comment(ctxt->userData, buf);
3815 xmlFree(buf);
3816 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003817 return;
3818not_terminated:
3819 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3820 "Comment not terminated\n", NULL);
3821 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003822}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003823/**
3824 * xmlParseComment:
3825 * @ctxt: an XML parser context
3826 *
3827 * Skip an XML (SGML) comment <!-- .... -->
3828 * The spec says that "For compatibility, the string "--" (double-hyphen)
3829 * must not occur within comments. "
3830 *
3831 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3832 */
3833void
3834xmlParseComment(xmlParserCtxtPtr ctxt) {
3835 xmlChar *buf = NULL;
3836 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003837 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003838 xmlParserInputState state;
3839 const xmlChar *in;
3840 int nbchar = 0, ccol;
3841
3842 /*
3843 * Check that there is a comment right here.
3844 */
3845 if ((RAW != '<') || (NXT(1) != '!') ||
3846 (NXT(2) != '-') || (NXT(3) != '-')) return;
3847
3848 state = ctxt->instate;
3849 ctxt->instate = XML_PARSER_COMMENT;
3850 SKIP(4);
3851 SHRINK;
3852 GROW;
3853
3854 /*
3855 * Accelerated common case where input don't need to be
3856 * modified before passing it to the handler.
3857 */
3858 in = ctxt->input->cur;
3859 do {
3860 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003861 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003862 ctxt->input->line++; ctxt->input->col = 1;
3863 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003864 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003865 }
3866get_more:
3867 ccol = ctxt->input->col;
3868 while (((*in > '-') && (*in <= 0x7F)) ||
3869 ((*in >= 0x20) && (*in < '-')) ||
3870 (*in == 0x09)) {
3871 in++;
3872 ccol++;
3873 }
3874 ctxt->input->col = ccol;
3875 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003876 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003877 ctxt->input->line++; ctxt->input->col = 1;
3878 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003879 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003880 goto get_more;
3881 }
3882 nbchar = in - ctxt->input->cur;
3883 /*
3884 * save current set of data
3885 */
3886 if (nbchar > 0) {
3887 if ((ctxt->sax != NULL) &&
3888 (ctxt->sax->comment != NULL)) {
3889 if (buf == NULL) {
3890 if ((*in == '-') && (in[1] == '-'))
3891 size = nbchar + 1;
3892 else
3893 size = XML_PARSER_BUFFER_SIZE + nbchar;
3894 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3895 if (buf == NULL) {
3896 xmlErrMemory(ctxt, NULL);
3897 ctxt->instate = state;
3898 return;
3899 }
3900 len = 0;
3901 } else if (len + nbchar + 1 >= size) {
3902 xmlChar *new_buf;
3903 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3904 new_buf = (xmlChar *) xmlRealloc(buf,
3905 size * sizeof(xmlChar));
3906 if (new_buf == NULL) {
3907 xmlFree (buf);
3908 xmlErrMemory(ctxt, NULL);
3909 ctxt->instate = state;
3910 return;
3911 }
3912 buf = new_buf;
3913 }
3914 memcpy(&buf[len], ctxt->input->cur, nbchar);
3915 len += nbchar;
3916 buf[len] = 0;
3917 }
3918 }
3919 ctxt->input->cur = in;
3920 if (*in == 0xA)
3921
3922 if (*in == 0xD) {
3923 in++;
3924 if (*in == 0xA) {
3925 ctxt->input->cur = in;
3926 in++;
3927 ctxt->input->line++; ctxt->input->col = 1;
3928 continue; /* while */
3929 }
3930 in--;
3931 }
3932 SHRINK;
3933 GROW;
3934 in = ctxt->input->cur;
3935 if (*in == '-') {
3936 if (in[1] == '-') {
3937 if (in[2] == '>') {
3938 SKIP(3);
3939 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3940 (!ctxt->disableSAX)) {
3941 if (buf != NULL)
3942 ctxt->sax->comment(ctxt->userData, buf);
3943 else
3944 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3945 }
3946 if (buf != NULL)
3947 xmlFree(buf);
3948 ctxt->instate = state;
3949 return;
3950 }
3951 if (buf != NULL)
3952 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3953 "Comment not terminated \n<!--%.50s\n",
3954 buf);
3955 else
3956 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3957 "Comment not terminated \n", NULL);
3958 in++;
3959 ctxt->input->col++;
3960 }
3961 in++;
3962 ctxt->input->col++;
3963 goto get_more;
3964 }
3965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3966 xmlParseCommentComplex(ctxt, buf, len, size);
3967 ctxt->instate = state;
3968 return;
3969}
3970
Owen Taylor3473f882001-02-23 17:55:21 +00003971
3972/**
3973 * xmlParsePITarget:
3974 * @ctxt: an XML parser context
3975 *
3976 * parse the name of a PI
3977 *
3978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3979 *
3980 * Returns the PITarget name or NULL
3981 */
3982
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003983const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003984xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003985 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003986
3987 name = xmlParseName(ctxt);
3988 if ((name != NULL) &&
3989 ((name[0] == 'x') || (name[0] == 'X')) &&
3990 ((name[1] == 'm') || (name[1] == 'M')) &&
3991 ((name[2] == 'l') || (name[2] == 'L'))) {
3992 int i;
3993 if ((name[0] == 'x') && (name[1] == 'm') &&
3994 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003996 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003997 return(name);
3998 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004000 return(name);
4001 }
4002 for (i = 0;;i++) {
4003 if (xmlW3CPIs[i] == NULL) break;
4004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4005 return(name);
4006 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4008 "xmlParsePITarget: invalid name prefix 'xml'\n",
4009 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
4011 return(name);
4012}
4013
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004014#ifdef LIBXML_CATALOG_ENABLED
4015/**
4016 * xmlParseCatalogPI:
4017 * @ctxt: an XML parser context
4018 * @catalog: the PI value string
4019 *
4020 * parse an XML Catalog Processing Instruction.
4021 *
4022 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4023 *
4024 * Occurs only if allowed by the user and if happening in the Misc
4025 * part of the document before any doctype informations
4026 * This will add the given catalog to the parsing context in order
4027 * to be used if there is a resolution need further down in the document
4028 */
4029
4030static void
4031xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4032 xmlChar *URL = NULL;
4033 const xmlChar *tmp, *base;
4034 xmlChar marker;
4035
4036 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004037 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004038 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4039 goto error;
4040 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004041 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004042 if (*tmp != '=') {
4043 return;
4044 }
4045 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004046 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004047 marker = *tmp;
4048 if ((marker != '\'') && (marker != '"'))
4049 goto error;
4050 tmp++;
4051 base = tmp;
4052 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4053 if (*tmp == 0)
4054 goto error;
4055 URL = xmlStrndup(base, tmp - base);
4056 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004057 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004058 if (*tmp != 0)
4059 goto error;
4060
4061 if (URL != NULL) {
4062 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4063 xmlFree(URL);
4064 }
4065 return;
4066
4067error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004068 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4069 "Catalog PI syntax error: %s\n",
4070 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004071 if (URL != NULL)
4072 xmlFree(URL);
4073}
4074#endif
4075
Owen Taylor3473f882001-02-23 17:55:21 +00004076/**
4077 * xmlParsePI:
4078 * @ctxt: an XML parser context
4079 *
4080 * parse an XML Processing Instruction.
4081 *
4082 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4083 *
4084 * The processing is transfered to SAX once parsed.
4085 */
4086
4087void
4088xmlParsePI(xmlParserCtxtPtr ctxt) {
4089 xmlChar *buf = NULL;
4090 int len = 0;
4091 int size = XML_PARSER_BUFFER_SIZE;
4092 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004093 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004094 xmlParserInputState state;
4095 int count = 0;
4096
4097 if ((RAW == '<') && (NXT(1) == '?')) {
4098 xmlParserInputPtr input = ctxt->input;
4099 state = ctxt->instate;
4100 ctxt->instate = XML_PARSER_PI;
4101 /*
4102 * this is a Processing Instruction.
4103 */
4104 SKIP(2);
4105 SHRINK;
4106
4107 /*
4108 * Parse the target name and check for special support like
4109 * namespace.
4110 */
4111 target = xmlParsePITarget(ctxt);
4112 if (target != NULL) {
4113 if ((RAW == '?') && (NXT(1) == '>')) {
4114 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004115 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4116 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004117 }
4118 SKIP(2);
4119
4120 /*
4121 * SAX: PI detected.
4122 */
4123 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4124 (ctxt->sax->processingInstruction != NULL))
4125 ctxt->sax->processingInstruction(ctxt->userData,
4126 target, NULL);
4127 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004128 return;
4129 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004130 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004131 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004132 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004133 ctxt->instate = state;
4134 return;
4135 }
4136 cur = CUR;
4137 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004138 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4139 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 }
4141 SKIP_BLANKS;
4142 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004143 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004144 ((cur != '?') || (NXT(1) != '>'))) {
4145 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004146 xmlChar *tmp;
4147
Owen Taylor3473f882001-02-23 17:55:21 +00004148 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004149 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4150 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004151 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004152 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 ctxt->instate = state;
4154 return;
4155 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004156 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004157 }
4158 count++;
4159 if (count > 50) {
4160 GROW;
4161 count = 0;
4162 }
4163 COPY_BUF(l,buf,len,cur);
4164 NEXTL(l);
4165 cur = CUR_CHAR(l);
4166 if (cur == 0) {
4167 SHRINK;
4168 GROW;
4169 cur = CUR_CHAR(l);
4170 }
4171 }
4172 buf[len] = 0;
4173 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004174 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4175 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 } else {
4177 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004178 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4179 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004180 }
4181 SKIP(2);
4182
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004183#ifdef LIBXML_CATALOG_ENABLED
4184 if (((state == XML_PARSER_MISC) ||
4185 (state == XML_PARSER_START)) &&
4186 (xmlStrEqual(target, XML_CATALOG_PI))) {
4187 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4188 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4189 (allow == XML_CATA_ALLOW_ALL))
4190 xmlParseCatalogPI(ctxt, buf);
4191 }
4192#endif
4193
4194
Owen Taylor3473f882001-02-23 17:55:21 +00004195 /*
4196 * SAX: PI detected.
4197 */
4198 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4199 (ctxt->sax->processingInstruction != NULL))
4200 ctxt->sax->processingInstruction(ctxt->userData,
4201 target, buf);
4202 }
4203 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004204 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004205 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 }
4207 ctxt->instate = state;
4208 }
4209}
4210
4211/**
4212 * xmlParseNotationDecl:
4213 * @ctxt: an XML parser context
4214 *
4215 * parse a notation declaration
4216 *
4217 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4218 *
4219 * Hence there is actually 3 choices:
4220 * 'PUBLIC' S PubidLiteral
4221 * 'PUBLIC' S PubidLiteral S SystemLiteral
4222 * and 'SYSTEM' S SystemLiteral
4223 *
4224 * See the NOTE on xmlParseExternalID().
4225 */
4226
4227void
4228xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004229 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 xmlChar *Pubid;
4231 xmlChar *Systemid;
4232
Daniel Veillarda07050d2003-10-19 14:46:32 +00004233 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004234 xmlParserInputPtr input = ctxt->input;
4235 SHRINK;
4236 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004237 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004238 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4239 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004240 return;
4241 }
4242 SKIP_BLANKS;
4243
Daniel Veillard76d66f42001-05-16 21:05:17 +00004244 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 return;
4248 }
William M. Brack76e95df2003-10-18 16:20:14 +00004249 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004250 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004251 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004252 return;
4253 }
4254 SKIP_BLANKS;
4255
4256 /*
4257 * Parse the IDs.
4258 */
4259 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4260 SKIP_BLANKS;
4261
4262 if (RAW == '>') {
4263 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4265 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 NEXT;
4268 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4269 (ctxt->sax->notationDecl != NULL))
4270 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4271 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004272 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004273 }
Owen Taylor3473f882001-02-23 17:55:21 +00004274 if (Systemid != NULL) xmlFree(Systemid);
4275 if (Pubid != NULL) xmlFree(Pubid);
4276 }
4277}
4278
4279/**
4280 * xmlParseEntityDecl:
4281 * @ctxt: an XML parser context
4282 *
4283 * parse <!ENTITY declarations
4284 *
4285 * [70] EntityDecl ::= GEDecl | PEDecl
4286 *
4287 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4288 *
4289 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4290 *
4291 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4292 *
4293 * [74] PEDef ::= EntityValue | ExternalID
4294 *
4295 * [76] NDataDecl ::= S 'NDATA' S Name
4296 *
4297 * [ VC: Notation Declared ]
4298 * The Name must match the declared name of a notation.
4299 */
4300
4301void
4302xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004303 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004304 xmlChar *value = NULL;
4305 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004306 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004307 int isParameter = 0;
4308 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004309 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004310
Daniel Veillard4c778d82005-01-23 17:37:44 +00004311 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004312 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004313 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SHRINK;
4315 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004316 skipped = SKIP_BLANKS;
4317 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4319 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004320 }
Owen Taylor3473f882001-02-23 17:55:21 +00004321
4322 if (RAW == '%') {
4323 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004324 skipped = SKIP_BLANKS;
4325 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004326 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4327 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004328 }
Owen Taylor3473f882001-02-23 17:55:21 +00004329 isParameter = 1;
4330 }
4331
Daniel Veillard76d66f42001-05-16 21:05:17 +00004332 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004334 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4335 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004336 return;
4337 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004338 skipped = SKIP_BLANKS;
4339 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4341 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
Owen Taylor3473f882001-02-23 17:55:21 +00004343
Daniel Veillardf5582f12002-06-11 10:08:16 +00004344 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004345 /*
4346 * handle the various case of definitions...
4347 */
4348 if (isParameter) {
4349 if ((RAW == '"') || (RAW == '\'')) {
4350 value = xmlParseEntityValue(ctxt, &orig);
4351 if (value) {
4352 if ((ctxt->sax != NULL) &&
4353 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4354 ctxt->sax->entityDecl(ctxt->userData, name,
4355 XML_INTERNAL_PARAMETER_ENTITY,
4356 NULL, NULL, value);
4357 }
4358 } else {
4359 URI = xmlParseExternalID(ctxt, &literal, 1);
4360 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004361 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 if (URI) {
4364 xmlURIPtr uri;
4365
4366 uri = xmlParseURI((const char *) URI);
4367 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004368 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4369 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004370 /*
4371 * This really ought to be a well formedness error
4372 * but the XML Core WG decided otherwise c.f. issue
4373 * E26 of the XML erratas.
4374 */
Owen Taylor3473f882001-02-23 17:55:21 +00004375 } else {
4376 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004377 /*
4378 * Okay this is foolish to block those but not
4379 * invalid URIs.
4380 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004381 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004382 } else {
4383 if ((ctxt->sax != NULL) &&
4384 (!ctxt->disableSAX) &&
4385 (ctxt->sax->entityDecl != NULL))
4386 ctxt->sax->entityDecl(ctxt->userData, name,
4387 XML_EXTERNAL_PARAMETER_ENTITY,
4388 literal, URI, NULL);
4389 }
4390 xmlFreeURI(uri);
4391 }
4392 }
4393 }
4394 } else {
4395 if ((RAW == '"') || (RAW == '\'')) {
4396 value = xmlParseEntityValue(ctxt, &orig);
4397 if ((ctxt->sax != NULL) &&
4398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4399 ctxt->sax->entityDecl(ctxt->userData, name,
4400 XML_INTERNAL_GENERAL_ENTITY,
4401 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004402 /*
4403 * For expat compatibility in SAX mode.
4404 */
4405 if ((ctxt->myDoc == NULL) ||
4406 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4407 if (ctxt->myDoc == NULL) {
4408 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4409 }
4410 if (ctxt->myDoc->intSubset == NULL)
4411 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4412 BAD_CAST "fake", NULL, NULL);
4413
Daniel Veillard1af9a412003-08-20 22:54:39 +00004414 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4415 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004416 }
Owen Taylor3473f882001-02-23 17:55:21 +00004417 } else {
4418 URI = xmlParseExternalID(ctxt, &literal, 1);
4419 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004421 }
4422 if (URI) {
4423 xmlURIPtr uri;
4424
4425 uri = xmlParseURI((const char *)URI);
4426 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004429 /*
4430 * This really ought to be a well formedness error
4431 * but the XML Core WG decided otherwise c.f. issue
4432 * E26 of the XML erratas.
4433 */
Owen Taylor3473f882001-02-23 17:55:21 +00004434 } else {
4435 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004436 /*
4437 * Okay this is foolish to block those but not
4438 * invalid URIs.
4439 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004441 }
4442 xmlFreeURI(uri);
4443 }
4444 }
William M. Brack76e95df2003-10-18 16:20:14 +00004445 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004446 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4447 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004448 }
4449 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004450 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004451 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004452 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4454 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004455 }
4456 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004457 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004458 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4459 (ctxt->sax->unparsedEntityDecl != NULL))
4460 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4461 literal, URI, ndata);
4462 } else {
4463 if ((ctxt->sax != NULL) &&
4464 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4465 ctxt->sax->entityDecl(ctxt->userData, name,
4466 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4467 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004468 /*
4469 * For expat compatibility in SAX mode.
4470 * assuming the entity repalcement was asked for
4471 */
4472 if ((ctxt->replaceEntities != 0) &&
4473 ((ctxt->myDoc == NULL) ||
4474 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4475 if (ctxt->myDoc == NULL) {
4476 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4477 }
4478
4479 if (ctxt->myDoc->intSubset == NULL)
4480 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4481 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004482 xmlSAX2EntityDecl(ctxt, name,
4483 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4484 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004485 }
Owen Taylor3473f882001-02-23 17:55:21 +00004486 }
4487 }
4488 }
4489 SKIP_BLANKS;
4490 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004491 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004492 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004493 } else {
4494 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004495 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4496 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004497 }
4498 NEXT;
4499 }
4500 if (orig != NULL) {
4501 /*
4502 * Ugly mechanism to save the raw entity value.
4503 */
4504 xmlEntityPtr cur = NULL;
4505
4506 if (isParameter) {
4507 if ((ctxt->sax != NULL) &&
4508 (ctxt->sax->getParameterEntity != NULL))
4509 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4510 } else {
4511 if ((ctxt->sax != NULL) &&
4512 (ctxt->sax->getEntity != NULL))
4513 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004514 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004515 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004516 }
Owen Taylor3473f882001-02-23 17:55:21 +00004517 }
4518 if (cur != NULL) {
4519 if (cur->orig != NULL)
4520 xmlFree(orig);
4521 else
4522 cur->orig = orig;
4523 } else
4524 xmlFree(orig);
4525 }
Owen Taylor3473f882001-02-23 17:55:21 +00004526 if (value != NULL) xmlFree(value);
4527 if (URI != NULL) xmlFree(URI);
4528 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 }
4530}
4531
4532/**
4533 * xmlParseDefaultDecl:
4534 * @ctxt: an XML parser context
4535 * @value: Receive a possible fixed default value for the attribute
4536 *
4537 * Parse an attribute default declaration
4538 *
4539 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4540 *
4541 * [ VC: Required Attribute ]
4542 * if the default declaration is the keyword #REQUIRED, then the
4543 * attribute must be specified for all elements of the type in the
4544 * attribute-list declaration.
4545 *
4546 * [ VC: Attribute Default Legal ]
4547 * The declared default value must meet the lexical constraints of
4548 * the declared attribute type c.f. xmlValidateAttributeDecl()
4549 *
4550 * [ VC: Fixed Attribute Default ]
4551 * if an attribute has a default value declared with the #FIXED
4552 * keyword, instances of that attribute must match the default value.
4553 *
4554 * [ WFC: No < in Attribute Values ]
4555 * handled in xmlParseAttValue()
4556 *
4557 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4558 * or XML_ATTRIBUTE_FIXED.
4559 */
4560
4561int
4562xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4563 int val;
4564 xmlChar *ret;
4565
4566 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004567 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004568 SKIP(9);
4569 return(XML_ATTRIBUTE_REQUIRED);
4570 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004571 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004572 SKIP(8);
4573 return(XML_ATTRIBUTE_IMPLIED);
4574 }
4575 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004576 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004577 SKIP(6);
4578 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004579 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004580 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4581 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004582 }
4583 SKIP_BLANKS;
4584 }
4585 ret = xmlParseAttValue(ctxt);
4586 ctxt->instate = XML_PARSER_DTD;
4587 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004588 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004589 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004590 } else
4591 *value = ret;
4592 return(val);
4593}
4594
4595/**
4596 * xmlParseNotationType:
4597 * @ctxt: an XML parser context
4598 *
4599 * parse an Notation attribute type.
4600 *
4601 * Note: the leading 'NOTATION' S part has already being parsed...
4602 *
4603 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4604 *
4605 * [ VC: Notation Attributes ]
4606 * Values of this type must match one of the notation names included
4607 * in the declaration; all notation names in the declaration must be declared.
4608 *
4609 * Returns: the notation attribute tree built while parsing
4610 */
4611
4612xmlEnumerationPtr
4613xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004614 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004615 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4616
4617 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004618 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004619 return(NULL);
4620 }
4621 SHRINK;
4622 do {
4623 NEXT;
4624 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004625 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004626 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004627 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4628 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004629 return(ret);
4630 }
4631 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 if (cur == NULL) return(ret);
4633 if (last == NULL) ret = last = cur;
4634 else {
4635 last->next = cur;
4636 last = cur;
4637 }
4638 SKIP_BLANKS;
4639 } while (RAW == '|');
4640 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004641 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004642 if ((last != NULL) && (last != ret))
4643 xmlFreeEnumeration(last);
4644 return(ret);
4645 }
4646 NEXT;
4647 return(ret);
4648}
4649
4650/**
4651 * xmlParseEnumerationType:
4652 * @ctxt: an XML parser context
4653 *
4654 * parse an Enumeration attribute type.
4655 *
4656 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4657 *
4658 * [ VC: Enumeration ]
4659 * Values of this type must match one of the Nmtoken tokens in
4660 * the declaration
4661 *
4662 * Returns: the enumeration attribute tree built while parsing
4663 */
4664
4665xmlEnumerationPtr
4666xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4667 xmlChar *name;
4668 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4669
4670 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004671 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004672 return(NULL);
4673 }
4674 SHRINK;
4675 do {
4676 NEXT;
4677 SKIP_BLANKS;
4678 name = xmlParseNmtoken(ctxt);
4679 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004680 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004681 return(ret);
4682 }
4683 cur = xmlCreateEnumeration(name);
4684 xmlFree(name);
4685 if (cur == NULL) return(ret);
4686 if (last == NULL) ret = last = cur;
4687 else {
4688 last->next = cur;
4689 last = cur;
4690 }
4691 SKIP_BLANKS;
4692 } while (RAW == '|');
4693 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004694 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 return(ret);
4696 }
4697 NEXT;
4698 return(ret);
4699}
4700
4701/**
4702 * xmlParseEnumeratedType:
4703 * @ctxt: an XML parser context
4704 * @tree: the enumeration tree built while parsing
4705 *
4706 * parse an Enumerated attribute type.
4707 *
4708 * [57] EnumeratedType ::= NotationType | Enumeration
4709 *
4710 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4711 *
4712 *
4713 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4714 */
4715
4716int
4717xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004718 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004719 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004720 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4722 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004723 return(0);
4724 }
4725 SKIP_BLANKS;
4726 *tree = xmlParseNotationType(ctxt);
4727 if (*tree == NULL) return(0);
4728 return(XML_ATTRIBUTE_NOTATION);
4729 }
4730 *tree = xmlParseEnumerationType(ctxt);
4731 if (*tree == NULL) return(0);
4732 return(XML_ATTRIBUTE_ENUMERATION);
4733}
4734
4735/**
4736 * xmlParseAttributeType:
4737 * @ctxt: an XML parser context
4738 * @tree: the enumeration tree built while parsing
4739 *
4740 * parse the Attribute list def for an element
4741 *
4742 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4743 *
4744 * [55] StringType ::= 'CDATA'
4745 *
4746 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4747 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4748 *
4749 * Validity constraints for attribute values syntax are checked in
4750 * xmlValidateAttributeValue()
4751 *
4752 * [ VC: ID ]
4753 * Values of type ID must match the Name production. A name must not
4754 * appear more than once in an XML document as a value of this type;
4755 * i.e., ID values must uniquely identify the elements which bear them.
4756 *
4757 * [ VC: One ID per Element Type ]
4758 * No element type may have more than one ID attribute specified.
4759 *
4760 * [ VC: ID Attribute Default ]
4761 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4762 *
4763 * [ VC: IDREF ]
4764 * Values of type IDREF must match the Name production, and values
4765 * of type IDREFS must match Names; each IDREF Name must match the value
4766 * of an ID attribute on some element in the XML document; i.e. IDREF
4767 * values must match the value of some ID attribute.
4768 *
4769 * [ VC: Entity Name ]
4770 * Values of type ENTITY must match the Name production, values
4771 * of type ENTITIES must match Names; each Entity Name must match the
4772 * name of an unparsed entity declared in the DTD.
4773 *
4774 * [ VC: Name Token ]
4775 * Values of type NMTOKEN must match the Nmtoken production; values
4776 * of type NMTOKENS must match Nmtokens.
4777 *
4778 * Returns the attribute type
4779 */
4780int
4781xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4782 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004783 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004784 SKIP(5);
4785 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004786 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004787 SKIP(6);
4788 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004789 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004790 SKIP(5);
4791 return(XML_ATTRIBUTE_IDREF);
4792 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4793 SKIP(2);
4794 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004795 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004796 SKIP(6);
4797 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004798 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004799 SKIP(8);
4800 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004801 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004802 SKIP(8);
4803 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004804 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004805 SKIP(7);
4806 return(XML_ATTRIBUTE_NMTOKEN);
4807 }
4808 return(xmlParseEnumeratedType(ctxt, tree));
4809}
4810
4811/**
4812 * xmlParseAttributeListDecl:
4813 * @ctxt: an XML parser context
4814 *
4815 * : parse the Attribute list def for an element
4816 *
4817 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4818 *
4819 * [53] AttDef ::= S Name S AttType S DefaultDecl
4820 *
4821 */
4822void
4823xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004824 const xmlChar *elemName;
4825 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004826 xmlEnumerationPtr tree;
4827
Daniel Veillarda07050d2003-10-19 14:46:32 +00004828 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004829 xmlParserInputPtr input = ctxt->input;
4830
4831 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004832 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004833 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004834 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004835 }
4836 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004837 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004838 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004839 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4840 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004841 return;
4842 }
4843 SKIP_BLANKS;
4844 GROW;
4845 while (RAW != '>') {
4846 const xmlChar *check = CUR_PTR;
4847 int type;
4848 int def;
4849 xmlChar *defaultValue = NULL;
4850
4851 GROW;
4852 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004853 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004854 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4856 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 break;
4858 }
4859 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004860 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004862 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004863 if (defaultValue != NULL)
4864 xmlFree(defaultValue);
4865 break;
4866 }
4867 SKIP_BLANKS;
4868
4869 type = xmlParseAttributeType(ctxt, &tree);
4870 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004871 if (defaultValue != NULL)
4872 xmlFree(defaultValue);
4873 break;
4874 }
4875
4876 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004877 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004878 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4879 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004880 if (defaultValue != NULL)
4881 xmlFree(defaultValue);
4882 if (tree != NULL)
4883 xmlFreeEnumeration(tree);
4884 break;
4885 }
4886 SKIP_BLANKS;
4887
4888 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4889 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004890 if (defaultValue != NULL)
4891 xmlFree(defaultValue);
4892 if (tree != NULL)
4893 xmlFreeEnumeration(tree);
4894 break;
4895 }
4896
4897 GROW;
4898 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004899 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004900 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004901 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004902 if (defaultValue != NULL)
4903 xmlFree(defaultValue);
4904 if (tree != NULL)
4905 xmlFreeEnumeration(tree);
4906 break;
4907 }
4908 SKIP_BLANKS;
4909 }
4910 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004911 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4912 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004913 if (defaultValue != NULL)
4914 xmlFree(defaultValue);
4915 if (tree != NULL)
4916 xmlFreeEnumeration(tree);
4917 break;
4918 }
4919 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4920 (ctxt->sax->attributeDecl != NULL))
4921 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4922 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004923 else if (tree != NULL)
4924 xmlFreeEnumeration(tree);
4925
4926 if ((ctxt->sax2) && (defaultValue != NULL) &&
4927 (def != XML_ATTRIBUTE_IMPLIED) &&
4928 (def != XML_ATTRIBUTE_REQUIRED)) {
4929 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4930 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004931 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4932 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4933 }
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (defaultValue != NULL)
4935 xmlFree(defaultValue);
4936 GROW;
4937 }
4938 if (RAW == '>') {
4939 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004942 }
4943 NEXT;
4944 }
Owen Taylor3473f882001-02-23 17:55:21 +00004945 }
4946}
4947
4948/**
4949 * xmlParseElementMixedContentDecl:
4950 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004951 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004952 *
4953 * parse the declaration for a Mixed Element content
4954 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4955 *
4956 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4957 * '(' S? '#PCDATA' S? ')'
4958 *
4959 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4960 *
4961 * [ VC: No Duplicate Types ]
4962 * The same name must not appear more than once in a single
4963 * mixed-content declaration.
4964 *
4965 * returns: the list of the xmlElementContentPtr describing the element choices
4966 */
4967xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004968xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004969 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004970 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004971
4972 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004973 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004974 SKIP(7);
4975 SKIP_BLANKS;
4976 SHRINK;
4977 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004978 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004979 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4980"Element content declaration doesn't start and stop in the same entity\n",
4981 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004982 }
Owen Taylor3473f882001-02-23 17:55:21 +00004983 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004984 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if (RAW == '*') {
4986 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4987 NEXT;
4988 }
4989 return(ret);
4990 }
4991 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004992 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 if (ret == NULL) return(NULL);
4994 }
4995 while (RAW == '|') {
4996 NEXT;
4997 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004998 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if (ret == NULL) return(NULL);
5000 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005001 if (cur != NULL)
5002 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005003 cur = ret;
5004 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005005 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005006 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005007 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005008 if (n->c1 != NULL)
5009 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005010 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005011 if (n != NULL)
5012 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005014 }
5015 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005016 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005017 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005018 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005019 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005020 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005021 return(NULL);
5022 }
5023 SKIP_BLANKS;
5024 GROW;
5025 }
5026 if ((RAW == ')') && (NXT(1) == '*')) {
5027 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005028 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005029 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005030 if (cur->c2 != NULL)
5031 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 }
5033 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005034 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005035 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5036"Element content declaration doesn't start and stop in the same entity\n",
5037 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005038 }
Owen Taylor3473f882001-02-23 17:55:21 +00005039 SKIP(2);
5040 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005041 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005042 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005043 return(NULL);
5044 }
5045
5046 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005047 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005048 }
5049 return(ret);
5050}
5051
5052/**
5053 * xmlParseElementChildrenContentDecl:
5054 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005055 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005056 *
5057 * parse the declaration for a Mixed Element content
5058 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5059 *
5060 *
5061 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5062 *
5063 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5064 *
5065 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5066 *
5067 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5068 *
5069 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5070 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005071 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005072 * opening or closing parentheses in a choice, seq, or Mixed
5073 * construct is contained in the replacement text for a parameter
5074 * entity, both must be contained in the same replacement text. For
5075 * interoperability, if a parameter-entity reference appears in a
5076 * choice, seq, or Mixed construct, its replacement text should not
5077 * be empty, and neither the first nor last non-blank character of
5078 * the replacement text should be a connector (| or ,).
5079 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005080 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005081 * hierarchy.
5082 */
5083xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005084xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005085 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005086 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005087 xmlChar type = 0;
5088
5089 SKIP_BLANKS;
5090 GROW;
5091 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005092 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005093
Owen Taylor3473f882001-02-23 17:55:21 +00005094 /* Recurse on first child */
5095 NEXT;
5096 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005097 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005098 SKIP_BLANKS;
5099 GROW;
5100 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005101 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005103 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 return(NULL);
5105 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005106 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005107 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005108 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005109 return(NULL);
5110 }
Owen Taylor3473f882001-02-23 17:55:21 +00005111 GROW;
5112 if (RAW == '?') {
5113 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5114 NEXT;
5115 } else if (RAW == '*') {
5116 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5117 NEXT;
5118 } else if (RAW == '+') {
5119 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5120 NEXT;
5121 } else {
5122 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5123 }
Owen Taylor3473f882001-02-23 17:55:21 +00005124 GROW;
5125 }
5126 SKIP_BLANKS;
5127 SHRINK;
5128 while (RAW != ')') {
5129 /*
5130 * Each loop we parse one separator and one element.
5131 */
5132 if (RAW == ',') {
5133 if (type == 0) type = CUR;
5134
5135 /*
5136 * Detect "Name | Name , Name" error
5137 */
5138 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005139 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005140 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005142 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005143 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005145 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005146 return(NULL);
5147 }
5148 NEXT;
5149
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005150 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005151 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005152 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005153 xmlFreeDocElementContent(ctxt->myDoc, last);
5154 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 return(NULL);
5156 }
5157 if (last == NULL) {
5158 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005159 if (ret != NULL)
5160 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 ret = cur = op;
5162 } else {
5163 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005164 if (op != NULL)
5165 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005166 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005167 if (last != NULL)
5168 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005169 cur =op;
5170 last = NULL;
5171 }
5172 } else if (RAW == '|') {
5173 if (type == 0) type = CUR;
5174
5175 /*
5176 * Detect "Name , Name | Name" error
5177 */
5178 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005179 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005180 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005181 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005182 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005183 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005184 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005185 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 return(NULL);
5187 }
5188 NEXT;
5189
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005190 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005191 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005192 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005193 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005195 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 return(NULL);
5197 }
5198 if (last == NULL) {
5199 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005200 if (ret != NULL)
5201 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005202 ret = cur = op;
5203 } else {
5204 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005205 if (op != NULL)
5206 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005207 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005208 if (last != NULL)
5209 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005210 cur =op;
5211 last = NULL;
5212 }
5213 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005214 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005215 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005216 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 return(NULL);
5218 }
5219 GROW;
5220 SKIP_BLANKS;
5221 GROW;
5222 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005223 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005224 /* Recurse on second child */
5225 NEXT;
5226 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005227 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005228 SKIP_BLANKS;
5229 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005230 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005232 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005234 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005235 return(NULL);
5236 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005237 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005238 if (RAW == '?') {
5239 last->ocur = XML_ELEMENT_CONTENT_OPT;
5240 NEXT;
5241 } else if (RAW == '*') {
5242 last->ocur = XML_ELEMENT_CONTENT_MULT;
5243 NEXT;
5244 } else if (RAW == '+') {
5245 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5246 NEXT;
5247 } else {
5248 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5249 }
5250 }
5251 SKIP_BLANKS;
5252 GROW;
5253 }
5254 if ((cur != NULL) && (last != NULL)) {
5255 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005256 if (last != NULL)
5257 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005258 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005260 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5261"Element content declaration doesn't start and stop in the same entity\n",
5262 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005263 }
Owen Taylor3473f882001-02-23 17:55:21 +00005264 NEXT;
5265 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005266 if (ret != NULL) {
5267 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5268 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5269 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5270 else
5271 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273 NEXT;
5274 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005275 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005276 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005277 cur = ret;
5278 /*
5279 * Some normalization:
5280 * (a | b* | c?)* == (a | b | c)*
5281 */
5282 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5283 if ((cur->c1 != NULL) &&
5284 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5285 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5286 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5287 if ((cur->c2 != NULL) &&
5288 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5289 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5290 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5291 cur = cur->c2;
5292 }
5293 }
Owen Taylor3473f882001-02-23 17:55:21 +00005294 NEXT;
5295 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005296 if (ret != NULL) {
5297 int found = 0;
5298
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005299 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5300 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5301 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005302 else
5303 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005304 /*
5305 * Some normalization:
5306 * (a | b*)+ == (a | b)*
5307 * (a | b?)+ == (a | b)*
5308 */
5309 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5310 if ((cur->c1 != NULL) &&
5311 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5312 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5313 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5314 found = 1;
5315 }
5316 if ((cur->c2 != NULL) &&
5317 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5318 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5319 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5320 found = 1;
5321 }
5322 cur = cur->c2;
5323 }
5324 if (found)
5325 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5326 }
Owen Taylor3473f882001-02-23 17:55:21 +00005327 NEXT;
5328 }
5329 return(ret);
5330}
5331
5332/**
5333 * xmlParseElementContentDecl:
5334 * @ctxt: an XML parser context
5335 * @name: the name of the element being defined.
5336 * @result: the Element Content pointer will be stored here if any
5337 *
5338 * parse the declaration for an Element content either Mixed or Children,
5339 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5340 *
5341 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5342 *
5343 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5344 */
5345
5346int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005347xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005348 xmlElementContentPtr *result) {
5349
5350 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005352 int res;
5353
5354 *result = NULL;
5355
5356 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005357 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005358 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 return(-1);
5360 }
5361 NEXT;
5362 GROW;
5363 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005364 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005365 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005366 res = XML_ELEMENT_TYPE_MIXED;
5367 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005368 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 res = XML_ELEMENT_TYPE_ELEMENT;
5370 }
Owen Taylor3473f882001-02-23 17:55:21 +00005371 SKIP_BLANKS;
5372 *result = tree;
5373 return(res);
5374}
5375
5376/**
5377 * xmlParseElementDecl:
5378 * @ctxt: an XML parser context
5379 *
5380 * parse an Element declaration.
5381 *
5382 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5383 *
5384 * [ VC: Unique Element Type Declaration ]
5385 * No element type may be declared more than once
5386 *
5387 * Returns the type of the element, or -1 in case of error
5388 */
5389int
5390xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005391 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005392 int ret = -1;
5393 xmlElementContentPtr content = NULL;
5394
Daniel Veillard4c778d82005-01-23 17:37:44 +00005395 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005396 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005397 xmlParserInputPtr input = ctxt->input;
5398
5399 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005400 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005401 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5402 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005403 }
5404 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005405 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005407 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5408 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005409 return(-1);
5410 }
5411 while ((RAW == 0) && (ctxt->inputNr > 1))
5412 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005413 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5415 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005416 }
5417 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005418 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005419 SKIP(5);
5420 /*
5421 * Element must always be empty.
5422 */
5423 ret = XML_ELEMENT_TYPE_EMPTY;
5424 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5425 (NXT(2) == 'Y')) {
5426 SKIP(3);
5427 /*
5428 * Element is a generic container.
5429 */
5430 ret = XML_ELEMENT_TYPE_ANY;
5431 } else if (RAW == '(') {
5432 ret = xmlParseElementContentDecl(ctxt, name, &content);
5433 } else {
5434 /*
5435 * [ WFC: PEs in Internal Subset ] error handling.
5436 */
5437 if ((RAW == '%') && (ctxt->external == 0) &&
5438 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005439 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005440 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005441 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005442 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005443 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5444 }
Owen Taylor3473f882001-02-23 17:55:21 +00005445 return(-1);
5446 }
5447
5448 SKIP_BLANKS;
5449 /*
5450 * Pop-up of finished entities.
5451 */
5452 while ((RAW == 0) && (ctxt->inputNr > 1))
5453 xmlPopInput(ctxt);
5454 SKIP_BLANKS;
5455
5456 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005457 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005458 if (content != NULL) {
5459 xmlFreeDocElementContent(ctxt->myDoc, content);
5460 }
Owen Taylor3473f882001-02-23 17:55:21 +00005461 } else {
5462 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005463 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5464 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005465 }
5466
5467 NEXT;
5468 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005469 (ctxt->sax->elementDecl != NULL)) {
5470 if (content != NULL)
5471 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5473 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005474 if ((content != NULL) && (content->parent == NULL)) {
5475 /*
5476 * this is a trick: if xmlAddElementDecl is called,
5477 * instead of copying the full tree it is plugged directly
5478 * if called from the parser. Avoid duplicating the
5479 * interfaces or change the API/ABI
5480 */
5481 xmlFreeDocElementContent(ctxt->myDoc, content);
5482 }
5483 } else if (content != NULL) {
5484 xmlFreeDocElementContent(ctxt->myDoc, content);
5485 }
Owen Taylor3473f882001-02-23 17:55:21 +00005486 }
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
5488 return(ret);
5489}
5490
5491/**
Owen Taylor3473f882001-02-23 17:55:21 +00005492 * xmlParseConditionalSections
5493 * @ctxt: an XML parser context
5494 *
5495 * [61] conditionalSect ::= includeSect | ignoreSect
5496 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5497 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5498 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5499 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5500 */
5501
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005502static void
Owen Taylor3473f882001-02-23 17:55:21 +00005503xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5504 SKIP(3);
5505 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005506 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005507 SKIP(7);
5508 SKIP_BLANKS;
5509 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005510 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005511 } else {
5512 NEXT;
5513 }
5514 if (xmlParserDebugEntities) {
5515 if ((ctxt->input != NULL) && (ctxt->input->filename))
5516 xmlGenericError(xmlGenericErrorContext,
5517 "%s(%d): ", ctxt->input->filename,
5518 ctxt->input->line);
5519 xmlGenericError(xmlGenericErrorContext,
5520 "Entering INCLUDE Conditional Section\n");
5521 }
5522
5523 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5524 (NXT(2) != '>'))) {
5525 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005526 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005527
5528 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5529 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005530 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 NEXT;
5532 } else if (RAW == '%') {
5533 xmlParsePEReference(ctxt);
5534 } else
5535 xmlParseMarkupDecl(ctxt);
5536
5537 /*
5538 * Pop-up of finished entities.
5539 */
5540 while ((RAW == 0) && (ctxt->inputNr > 1))
5541 xmlPopInput(ctxt);
5542
Daniel Veillardfdc91562002-07-01 21:52:03 +00005543 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005544 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005545 break;
5546 }
5547 }
5548 if (xmlParserDebugEntities) {
5549 if ((ctxt->input != NULL) && (ctxt->input->filename))
5550 xmlGenericError(xmlGenericErrorContext,
5551 "%s(%d): ", ctxt->input->filename,
5552 ctxt->input->line);
5553 xmlGenericError(xmlGenericErrorContext,
5554 "Leaving INCLUDE Conditional Section\n");
5555 }
5556
Daniel Veillarda07050d2003-10-19 14:46:32 +00005557 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005558 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005559 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005560 int depth = 0;
5561
5562 SKIP(6);
5563 SKIP_BLANKS;
5564 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005565 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005566 } else {
5567 NEXT;
5568 }
5569 if (xmlParserDebugEntities) {
5570 if ((ctxt->input != NULL) && (ctxt->input->filename))
5571 xmlGenericError(xmlGenericErrorContext,
5572 "%s(%d): ", ctxt->input->filename,
5573 ctxt->input->line);
5574 xmlGenericError(xmlGenericErrorContext,
5575 "Entering IGNORE Conditional Section\n");
5576 }
5577
5578 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005579 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005580 * But disable SAX event generating DTD building in the meantime
5581 */
5582 state = ctxt->disableSAX;
5583 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005584 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005585 ctxt->instate = XML_PARSER_IGNORE;
5586
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005587 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005588 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5589 depth++;
5590 SKIP(3);
5591 continue;
5592 }
5593 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5594 if (--depth >= 0) SKIP(3);
5595 continue;
5596 }
5597 NEXT;
5598 continue;
5599 }
5600
5601 ctxt->disableSAX = state;
5602 ctxt->instate = instate;
5603
5604 if (xmlParserDebugEntities) {
5605 if ((ctxt->input != NULL) && (ctxt->input->filename))
5606 xmlGenericError(xmlGenericErrorContext,
5607 "%s(%d): ", ctxt->input->filename,
5608 ctxt->input->line);
5609 xmlGenericError(xmlGenericErrorContext,
5610 "Leaving IGNORE Conditional Section\n");
5611 }
5612
5613 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005614 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005615 }
5616
5617 if (RAW == 0)
5618 SHRINK;
5619
5620 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005621 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005622 } else {
5623 SKIP(3);
5624 }
5625}
5626
5627/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005628 * xmlParseMarkupDecl:
5629 * @ctxt: an XML parser context
5630 *
5631 * parse Markup declarations
5632 *
5633 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5634 * NotationDecl | PI | Comment
5635 *
5636 * [ VC: Proper Declaration/PE Nesting ]
5637 * Parameter-entity replacement text must be properly nested with
5638 * markup declarations. That is to say, if either the first character
5639 * or the last character of a markup declaration (markupdecl above) is
5640 * contained in the replacement text for a parameter-entity reference,
5641 * both must be contained in the same replacement text.
5642 *
5643 * [ WFC: PEs in Internal Subset ]
5644 * In the internal DTD subset, parameter-entity references can occur
5645 * only where markup declarations can occur, not within markup declarations.
5646 * (This does not apply to references that occur in external parameter
5647 * entities or to the external subset.)
5648 */
5649void
5650xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5651 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005652 if (CUR == '<') {
5653 if (NXT(1) == '!') {
5654 switch (NXT(2)) {
5655 case 'E':
5656 if (NXT(3) == 'L')
5657 xmlParseElementDecl(ctxt);
5658 else if (NXT(3) == 'N')
5659 xmlParseEntityDecl(ctxt);
5660 break;
5661 case 'A':
5662 xmlParseAttributeListDecl(ctxt);
5663 break;
5664 case 'N':
5665 xmlParseNotationDecl(ctxt);
5666 break;
5667 case '-':
5668 xmlParseComment(ctxt);
5669 break;
5670 default:
5671 /* there is an error but it will be detected later */
5672 break;
5673 }
5674 } else if (NXT(1) == '?') {
5675 xmlParsePI(ctxt);
5676 }
5677 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005678 /*
5679 * This is only for internal subset. On external entities,
5680 * the replacement is done before parsing stage
5681 */
5682 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5683 xmlParsePEReference(ctxt);
5684
5685 /*
5686 * Conditional sections are allowed from entities included
5687 * by PE References in the internal subset.
5688 */
5689 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5690 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5691 xmlParseConditionalSections(ctxt);
5692 }
5693 }
5694
5695 ctxt->instate = XML_PARSER_DTD;
5696}
5697
5698/**
5699 * xmlParseTextDecl:
5700 * @ctxt: an XML parser context
5701 *
5702 * parse an XML declaration header for external entities
5703 *
5704 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5705 *
5706 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5707 */
5708
5709void
5710xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5711 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005712 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005713
5714 /*
5715 * We know that '<?xml' is here.
5716 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005717 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005718 SKIP(5);
5719 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005720 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005721 return;
5722 }
5723
William M. Brack76e95df2003-10-18 16:20:14 +00005724 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005725 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5726 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005727 }
5728 SKIP_BLANKS;
5729
5730 /*
5731 * We may have the VersionInfo here.
5732 */
5733 version = xmlParseVersionInfo(ctxt);
5734 if (version == NULL)
5735 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005736 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005737 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005738 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5739 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005740 }
5741 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005742 ctxt->input->version = version;
5743
5744 /*
5745 * We must have the encoding declaration
5746 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005747 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005748 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5749 /*
5750 * The XML REC instructs us to stop parsing right here
5751 */
5752 return;
5753 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005754 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5755 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5756 "Missing encoding in text declaration\n");
5757 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005758
5759 SKIP_BLANKS;
5760 if ((RAW == '?') && (NXT(1) == '>')) {
5761 SKIP(2);
5762 } else if (RAW == '>') {
5763 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005764 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005765 NEXT;
5766 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005767 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005768 MOVETO_ENDTAG(CUR_PTR);
5769 NEXT;
5770 }
5771}
5772
5773/**
Owen Taylor3473f882001-02-23 17:55:21 +00005774 * xmlParseExternalSubset:
5775 * @ctxt: an XML parser context
5776 * @ExternalID: the external identifier
5777 * @SystemID: the system identifier (or URL)
5778 *
5779 * parse Markup declarations from an external subset
5780 *
5781 * [30] extSubset ::= textDecl? extSubsetDecl
5782 *
5783 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5784 */
5785void
5786xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5787 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005788 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005789 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005790 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005791 xmlParseTextDecl(ctxt);
5792 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5793 /*
5794 * The XML REC instructs us to stop parsing right here
5795 */
5796 ctxt->instate = XML_PARSER_EOF;
5797 return;
5798 }
5799 }
5800 if (ctxt->myDoc == NULL) {
5801 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5802 }
5803 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5804 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5805
5806 ctxt->instate = XML_PARSER_DTD;
5807 ctxt->external = 1;
5808 while (((RAW == '<') && (NXT(1) == '?')) ||
5809 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005810 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005811 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005812 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005813
5814 GROW;
5815 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5816 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005817 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005818 NEXT;
5819 } else if (RAW == '%') {
5820 xmlParsePEReference(ctxt);
5821 } else
5822 xmlParseMarkupDecl(ctxt);
5823
5824 /*
5825 * Pop-up of finished entities.
5826 */
5827 while ((RAW == 0) && (ctxt->inputNr > 1))
5828 xmlPopInput(ctxt);
5829
Daniel Veillardfdc91562002-07-01 21:52:03 +00005830 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005831 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005832 break;
5833 }
5834 }
5835
5836 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005837 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839
5840}
5841
5842/**
5843 * xmlParseReference:
5844 * @ctxt: an XML parser context
5845 *
5846 * parse and handle entity references in content, depending on the SAX
5847 * interface, this may end-up in a call to character() if this is a
5848 * CharRef, a predefined entity, if there is no reference() callback.
5849 * or if the parser was asked to switch to that mode.
5850 *
5851 * [67] Reference ::= EntityRef | CharRef
5852 */
5853void
5854xmlParseReference(xmlParserCtxtPtr ctxt) {
5855 xmlEntityPtr ent;
5856 xmlChar *val;
5857 if (RAW != '&') return;
5858
5859 if (NXT(1) == '#') {
5860 int i = 0;
5861 xmlChar out[10];
5862 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005863 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005864
5865 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5866 /*
5867 * So we are using non-UTF-8 buffers
5868 * Check that the char fit on 8bits, if not
5869 * generate a CharRef.
5870 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005871 if (value <= 0xFF) {
5872 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005873 out[1] = 0;
5874 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5875 (!ctxt->disableSAX))
5876 ctxt->sax->characters(ctxt->userData, out, 1);
5877 } else {
5878 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005879 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005880 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005881 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5883 (!ctxt->disableSAX))
5884 ctxt->sax->reference(ctxt->userData, out);
5885 }
5886 } else {
5887 /*
5888 * Just encode the value in UTF-8
5889 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005890 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 out[i] = 0;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5893 (!ctxt->disableSAX))
5894 ctxt->sax->characters(ctxt->userData, out, i);
5895 }
5896 } else {
5897 ent = xmlParseEntityRef(ctxt);
5898 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005899 if (!ctxt->wellFormed)
5900 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005901 if ((ent->name != NULL) &&
5902 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5903 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005904 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005905
5906
5907 /*
5908 * The first reference to the entity trigger a parsing phase
5909 * where the ent->children is filled with the result from
5910 * the parsing.
5911 */
5912 if (ent->children == NULL) {
5913 xmlChar *value;
5914 value = ent->content;
5915
5916 /*
5917 * Check that this entity is well formed
5918 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005919 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005920 (value[1] == 0) && (value[0] == '<') &&
5921 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5922 /*
5923 * DONE: get definite answer on this !!!
5924 * Lots of entity decls are used to declare a single
5925 * char
5926 * <!ENTITY lt "<">
5927 * Which seems to be valid since
5928 * 2.4: The ampersand character (&) and the left angle
5929 * bracket (<) may appear in their literal form only
5930 * when used ... They are also legal within the literal
5931 * entity value of an internal entity declaration;i
5932 * see "4.3.2 Well-Formed Parsed Entities".
5933 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5934 * Looking at the OASIS test suite and James Clark
5935 * tests, this is broken. However the XML REC uses
5936 * it. Is the XML REC not well-formed ????
5937 * This is a hack to avoid this problem
5938 *
5939 * ANSWER: since lt gt amp .. are already defined,
5940 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005941 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005942 * is lousy but acceptable.
5943 */
5944 list = xmlNewDocText(ctxt->myDoc, value);
5945 if (list != NULL) {
5946 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5947 (ent->children == NULL)) {
5948 ent->children = list;
5949 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005950 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 list->parent = (xmlNodePtr) ent;
5952 } else {
5953 xmlFreeNodeList(list);
5954 }
5955 } else if (list != NULL) {
5956 xmlFreeNodeList(list);
5957 }
5958 } else {
5959 /*
5960 * 4.3.2: An internal general parsed entity is well-formed
5961 * if its replacement text matches the production labeled
5962 * content.
5963 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005964
5965 void *user_data;
5966 /*
5967 * This is a bit hackish but this seems the best
5968 * way to make sure both SAX and DOM entity support
5969 * behaves okay.
5970 */
5971 if (ctxt->userData == ctxt)
5972 user_data = NULL;
5973 else
5974 user_data = ctxt->userData;
5975
Owen Taylor3473f882001-02-23 17:55:21 +00005976 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5977 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005978 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5979 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005980 ctxt->depth--;
5981 } else if (ent->etype ==
5982 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5983 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005984 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005985 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005986 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005987 ctxt->depth--;
5988 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005989 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005990 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5991 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005992 }
5993 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005994 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005995 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005996 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005997 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5998 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005999 (ent->children == NULL)) {
6000 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006001 if (ctxt->replaceEntities) {
6002 /*
6003 * Prune it directly in the generated document
6004 * except for single text nodes.
6005 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006006 if (((list->type == XML_TEXT_NODE) &&
6007 (list->next == NULL)) ||
6008 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006009 list->parent = (xmlNodePtr) ent;
6010 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006011 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006012 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006013 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006014 while (list != NULL) {
6015 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006016 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006017 if (list->next == NULL)
6018 ent->last = list;
6019 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006020 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006021 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006022#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006023 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6024 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006025#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006026 }
6027 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006028 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006029 while (list != NULL) {
6030 list->parent = (xmlNodePtr) ent;
6031 if (list->next == NULL)
6032 ent->last = list;
6033 list = list->next;
6034 }
Owen Taylor3473f882001-02-23 17:55:21 +00006035 }
6036 } else {
6037 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006038 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006039 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006040 } else if ((ret != XML_ERR_OK) &&
6041 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006042 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006043 } else if (list != NULL) {
6044 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006045 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006046 }
6047 }
6048 }
6049 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6050 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6051 /*
6052 * Create a node.
6053 */
6054 ctxt->sax->reference(ctxt->userData, ent->name);
6055 return;
6056 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006057 /*
6058 * There is a problem on the handling of _private for entities
6059 * (bug 155816): Should we copy the content of the field from
6060 * the entity (possibly overwriting some value set by the user
6061 * when a copy is created), should we leave it alone, or should
6062 * we try to take care of different situations? The problem
6063 * is exacerbated by the usage of this field by the xmlReader.
6064 * To fix this bug, we look at _private on the created node
6065 * and, if it's NULL, we copy in whatever was in the entity.
6066 * If it's not NULL we leave it alone. This is somewhat of a
6067 * hack - maybe we should have further tests to determine
6068 * what to do.
6069 */
Owen Taylor3473f882001-02-23 17:55:21 +00006070 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6071 /*
6072 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006073 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006074 * In the first occurrence list contains the replacement.
6075 * progressive == 2 means we are operating on the Reader
6076 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006077 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006078 if (((list == NULL) && (ent->owner == 0)) ||
6079 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006080 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006081
6082 /*
6083 * when operating on a reader, the entities definitions
6084 * are always owning the entities subtree.
6085 if (ctxt->parseMode == XML_PARSE_READER)
6086 ent->owner = 1;
6087 */
6088
Daniel Veillard62f313b2001-07-04 19:49:14 +00006089 cur = ent->children;
6090 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006091 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006092 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006093 if (nw->_private == NULL)
6094 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006095 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006096 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006097 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006098 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006099 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006100 if (cur == ent->last) {
6101 /*
6102 * needed to detect some strange empty
6103 * node cases in the reader tests
6104 */
6105 if ((ctxt->parseMode == XML_PARSE_READER) &&
6106 (nw->type == XML_ELEMENT_NODE) &&
6107 (nw->children == NULL))
6108 nw->extra = 1;
6109
Daniel Veillard62f313b2001-07-04 19:49:14 +00006110 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006111 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006112 cur = cur->next;
6113 }
Daniel Veillard81273902003-09-30 00:43:48 +00006114#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006115 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006116 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006117#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006118 } else if (list == NULL) {
6119 xmlNodePtr nw = NULL, cur, next, last,
6120 firstChild = NULL;
6121 /*
6122 * Copy the entity child list and make it the new
6123 * entity child list. The goal is to make sure any
6124 * ID or REF referenced will be the one from the
6125 * document content and not the entity copy.
6126 */
6127 cur = ent->children;
6128 ent->children = NULL;
6129 last = ent->last;
6130 ent->last = NULL;
6131 while (cur != NULL) {
6132 next = cur->next;
6133 cur->next = NULL;
6134 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006135 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006136 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006137 if (nw->_private == NULL)
6138 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006139 if (firstChild == NULL){
6140 firstChild = cur;
6141 }
6142 xmlAddChild((xmlNodePtr) ent, nw);
6143 xmlAddChild(ctxt->node, cur);
6144 }
6145 if (cur == last)
6146 break;
6147 cur = next;
6148 }
6149 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006150#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006151 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6152 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006153#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006154 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006155 const xmlChar *nbktext;
6156
Daniel Veillard62f313b2001-07-04 19:49:14 +00006157 /*
6158 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006159 * node with a possible previous text one which
6160 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006161 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006162 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6163 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006164 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006165 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006166 if ((ent->last != ent->children) &&
6167 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006168 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006169 xmlAddChildList(ctxt->node, ent->children);
6170 }
6171
Owen Taylor3473f882001-02-23 17:55:21 +00006172 /*
6173 * This is to avoid a nasty side effect, see
6174 * characters() in SAX.c
6175 */
6176 ctxt->nodemem = 0;
6177 ctxt->nodelen = 0;
6178 return;
6179 } else {
6180 /*
6181 * Probably running in SAX mode
6182 */
6183 xmlParserInputPtr input;
6184
6185 input = xmlNewEntityInputStream(ctxt, ent);
6186 xmlPushInput(ctxt, input);
6187 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006188 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6189 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006190 xmlParseTextDecl(ctxt);
6191 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6192 /*
6193 * The XML REC instructs us to stop parsing right here
6194 */
6195 ctxt->instate = XML_PARSER_EOF;
6196 return;
6197 }
6198 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006199 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6200 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006201 }
6202 }
6203 return;
6204 }
6205 }
6206 } else {
6207 val = ent->content;
6208 if (val == NULL) return;
6209 /*
6210 * inline the entity.
6211 */
6212 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6213 (!ctxt->disableSAX))
6214 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6215 }
6216 }
6217}
6218
6219/**
6220 * xmlParseEntityRef:
6221 * @ctxt: an XML parser context
6222 *
6223 * parse ENTITY references declarations
6224 *
6225 * [68] EntityRef ::= '&' Name ';'
6226 *
6227 * [ WFC: Entity Declared ]
6228 * In a document without any DTD, a document with only an internal DTD
6229 * subset which contains no parameter entity references, or a document
6230 * with "standalone='yes'", the Name given in the entity reference
6231 * must match that in an entity declaration, except that well-formed
6232 * documents need not declare any of the following entities: amp, lt,
6233 * gt, apos, quot. The declaration of a parameter entity must precede
6234 * any reference to it. Similarly, the declaration of a general entity
6235 * must precede any reference to it which appears in a default value in an
6236 * attribute-list declaration. Note that if entities are declared in the
6237 * external subset or in external parameter entities, a non-validating
6238 * processor is not obligated to read and process their declarations;
6239 * for such documents, the rule that an entity must be declared is a
6240 * well-formedness constraint only if standalone='yes'.
6241 *
6242 * [ WFC: Parsed Entity ]
6243 * An entity reference must not contain the name of an unparsed entity
6244 *
6245 * Returns the xmlEntityPtr if found, or NULL otherwise.
6246 */
6247xmlEntityPtr
6248xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006249 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006250 xmlEntityPtr ent = NULL;
6251
6252 GROW;
6253
6254 if (RAW == '&') {
6255 NEXT;
6256 name = xmlParseName(ctxt);
6257 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006258 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6259 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006260 } else {
6261 if (RAW == ';') {
6262 NEXT;
6263 /*
6264 * Ask first SAX for entity resolution, otherwise try the
6265 * predefined set.
6266 */
6267 if (ctxt->sax != NULL) {
6268 if (ctxt->sax->getEntity != NULL)
6269 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006270 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006271 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006272 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6273 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006274 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006275 }
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
6277 /*
6278 * [ WFC: Entity Declared ]
6279 * In a document without any DTD, a document with only an
6280 * internal DTD subset which contains no parameter entity
6281 * references, or a document with "standalone='yes'", the
6282 * Name given in the entity reference must match that in an
6283 * entity declaration, except that well-formed documents
6284 * need not declare any of the following entities: amp, lt,
6285 * gt, apos, quot.
6286 * The declaration of a parameter entity must precede any
6287 * reference to it.
6288 * Similarly, the declaration of a general entity must
6289 * precede any reference to it which appears in a default
6290 * value in an attribute-list declaration. Note that if
6291 * entities are declared in the external subset or in
6292 * external parameter entities, a non-validating processor
6293 * is not obligated to read and process their declarations;
6294 * for such documents, the rule that an entity must be
6295 * declared is a well-formedness constraint only if
6296 * standalone='yes'.
6297 */
6298 if (ent == NULL) {
6299 if ((ctxt->standalone == 1) ||
6300 ((ctxt->hasExternalSubset == 0) &&
6301 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006302 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006303 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006305 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006306 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006307 if ((ctxt->inSubset == 0) &&
6308 (ctxt->sax != NULL) &&
6309 (ctxt->sax->reference != NULL)) {
6310 ctxt->sax->reference(ctxt, name);
6311 }
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006313 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006314 }
6315
6316 /*
6317 * [ WFC: Parsed Entity ]
6318 * An entity reference must not contain the name of an
6319 * unparsed entity
6320 */
6321 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006322 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006323 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 }
6325
6326 /*
6327 * [ WFC: No External Entity References ]
6328 * Attribute values cannot contain direct or indirect
6329 * entity references to external entities.
6330 */
6331 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6332 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006333 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6334 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 }
6336 /*
6337 * [ WFC: No < in Attribute Values ]
6338 * The replacement text of any entity referred to directly or
6339 * indirectly in an attribute value (other than "&lt;") must
6340 * not contain a <.
6341 */
6342 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6343 (ent != NULL) &&
6344 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6345 (ent->content != NULL) &&
6346 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006347 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006348 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 }
6350
6351 /*
6352 * Internal check, no parameter entities here ...
6353 */
6354 else {
6355 switch (ent->etype) {
6356 case XML_INTERNAL_PARAMETER_ENTITY:
6357 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006358 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6359 "Attempt to reference the parameter entity '%s'\n",
6360 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006361 break;
6362 default:
6363 break;
6364 }
6365 }
6366
6367 /*
6368 * [ WFC: No Recursion ]
6369 * A parsed entity must not contain a recursive reference
6370 * to itself, either directly or indirectly.
6371 * Done somewhere else
6372 */
6373
6374 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006375 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006376 }
Owen Taylor3473f882001-02-23 17:55:21 +00006377 }
6378 }
6379 return(ent);
6380}
6381
6382/**
6383 * xmlParseStringEntityRef:
6384 * @ctxt: an XML parser context
6385 * @str: a pointer to an index in the string
6386 *
6387 * parse ENTITY references declarations, but this version parses it from
6388 * a string value.
6389 *
6390 * [68] EntityRef ::= '&' Name ';'
6391 *
6392 * [ WFC: Entity Declared ]
6393 * In a document without any DTD, a document with only an internal DTD
6394 * subset which contains no parameter entity references, or a document
6395 * with "standalone='yes'", the Name given in the entity reference
6396 * must match that in an entity declaration, except that well-formed
6397 * documents need not declare any of the following entities: amp, lt,
6398 * gt, apos, quot. The declaration of a parameter entity must precede
6399 * any reference to it. Similarly, the declaration of a general entity
6400 * must precede any reference to it which appears in a default value in an
6401 * attribute-list declaration. Note that if entities are declared in the
6402 * external subset or in external parameter entities, a non-validating
6403 * processor is not obligated to read and process their declarations;
6404 * for such documents, the rule that an entity must be declared is a
6405 * well-formedness constraint only if standalone='yes'.
6406 *
6407 * [ WFC: Parsed Entity ]
6408 * An entity reference must not contain the name of an unparsed entity
6409 *
6410 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6411 * is updated to the current location in the string.
6412 */
6413xmlEntityPtr
6414xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6415 xmlChar *name;
6416 const xmlChar *ptr;
6417 xmlChar cur;
6418 xmlEntityPtr ent = NULL;
6419
6420 if ((str == NULL) || (*str == NULL))
6421 return(NULL);
6422 ptr = *str;
6423 cur = *ptr;
6424 if (cur == '&') {
6425 ptr++;
6426 cur = *ptr;
6427 name = xmlParseStringName(ctxt, &ptr);
6428 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006429 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6430 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006431 } else {
6432 if (*ptr == ';') {
6433 ptr++;
6434 /*
6435 * Ask first SAX for entity resolution, otherwise try the
6436 * predefined set.
6437 */
6438 if (ctxt->sax != NULL) {
6439 if (ctxt->sax->getEntity != NULL)
6440 ent = ctxt->sax->getEntity(ctxt->userData, name);
6441 if (ent == NULL)
6442 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006443 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006444 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006445 }
Owen Taylor3473f882001-02-23 17:55:21 +00006446 }
6447 /*
6448 * [ WFC: Entity Declared ]
6449 * In a document without any DTD, a document with only an
6450 * internal DTD subset which contains no parameter entity
6451 * references, or a document with "standalone='yes'", the
6452 * Name given in the entity reference must match that in an
6453 * entity declaration, except that well-formed documents
6454 * need not declare any of the following entities: amp, lt,
6455 * gt, apos, quot.
6456 * The declaration of a parameter entity must precede any
6457 * reference to it.
6458 * Similarly, the declaration of a general entity must
6459 * precede any reference to it which appears in a default
6460 * value in an attribute-list declaration. Note that if
6461 * entities are declared in the external subset or in
6462 * external parameter entities, a non-validating processor
6463 * is not obligated to read and process their declarations;
6464 * for such documents, the rule that an entity must be
6465 * declared is a well-formedness constraint only if
6466 * standalone='yes'.
6467 */
6468 if (ent == NULL) {
6469 if ((ctxt->standalone == 1) ||
6470 ((ctxt->hasExternalSubset == 0) &&
6471 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006472 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006473 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006475 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006476 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006477 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006478 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006479 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006480 }
6481
6482 /*
6483 * [ WFC: Parsed Entity ]
6484 * An entity reference must not contain the name of an
6485 * unparsed entity
6486 */
6487 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006488 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006489 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006490 }
6491
6492 /*
6493 * [ WFC: No External Entity References ]
6494 * Attribute values cannot contain direct or indirect
6495 * entity references to external entities.
6496 */
6497 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6498 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006499 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006500 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006501 }
6502 /*
6503 * [ WFC: No < in Attribute Values ]
6504 * The replacement text of any entity referred to directly or
6505 * indirectly in an attribute value (other than "&lt;") must
6506 * not contain a <.
6507 */
6508 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6509 (ent != NULL) &&
6510 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6511 (ent->content != NULL) &&
6512 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006513 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6514 "'<' in entity '%s' is not allowed in attributes values\n",
6515 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006516 }
6517
6518 /*
6519 * Internal check, no parameter entities here ...
6520 */
6521 else {
6522 switch (ent->etype) {
6523 case XML_INTERNAL_PARAMETER_ENTITY:
6524 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006525 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6526 "Attempt to reference the parameter entity '%s'\n",
6527 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006528 break;
6529 default:
6530 break;
6531 }
6532 }
6533
6534 /*
6535 * [ WFC: No Recursion ]
6536 * A parsed entity must not contain a recursive reference
6537 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006538 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006539 */
6540
6541 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006542 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006543 }
6544 xmlFree(name);
6545 }
6546 }
6547 *str = ptr;
6548 return(ent);
6549}
6550
6551/**
6552 * xmlParsePEReference:
6553 * @ctxt: an XML parser context
6554 *
6555 * parse PEReference declarations
6556 * The entity content is handled directly by pushing it's content as
6557 * a new input stream.
6558 *
6559 * [69] PEReference ::= '%' Name ';'
6560 *
6561 * [ WFC: No Recursion ]
6562 * A parsed entity must not contain a recursive
6563 * reference to itself, either directly or indirectly.
6564 *
6565 * [ WFC: Entity Declared ]
6566 * In a document without any DTD, a document with only an internal DTD
6567 * subset which contains no parameter entity references, or a document
6568 * with "standalone='yes'", ... ... The declaration of a parameter
6569 * entity must precede any reference to it...
6570 *
6571 * [ VC: Entity Declared ]
6572 * In a document with an external subset or external parameter entities
6573 * with "standalone='no'", ... ... The declaration of a parameter entity
6574 * must precede any reference to it...
6575 *
6576 * [ WFC: In DTD ]
6577 * Parameter-entity references may only appear in the DTD.
6578 * NOTE: misleading but this is handled.
6579 */
6580void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006581xmlParsePEReference(xmlParserCtxtPtr ctxt)
6582{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006583 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006584 xmlEntityPtr entity = NULL;
6585 xmlParserInputPtr input;
6586
6587 if (RAW == '%') {
6588 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006589 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006590 if (name == NULL) {
6591 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6592 "xmlParsePEReference: no name\n");
6593 } else {
6594 if (RAW == ';') {
6595 NEXT;
6596 if ((ctxt->sax != NULL) &&
6597 (ctxt->sax->getParameterEntity != NULL))
6598 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6599 name);
6600 if (entity == NULL) {
6601 /*
6602 * [ WFC: Entity Declared ]
6603 * In a document without any DTD, a document with only an
6604 * internal DTD subset which contains no parameter entity
6605 * references, or a document with "standalone='yes'", ...
6606 * ... The declaration of a parameter entity must precede
6607 * any reference to it...
6608 */
6609 if ((ctxt->standalone == 1) ||
6610 ((ctxt->hasExternalSubset == 0) &&
6611 (ctxt->hasPErefs == 0))) {
6612 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6613 "PEReference: %%%s; not found\n",
6614 name);
6615 } else {
6616 /*
6617 * [ VC: Entity Declared ]
6618 * In a document with an external subset or external
6619 * parameter entities with "standalone='no'", ...
6620 * ... The declaration of a parameter entity must
6621 * precede any reference to it...
6622 */
6623 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6624 "PEReference: %%%s; not found\n",
6625 name, NULL);
6626 ctxt->valid = 0;
6627 }
6628 } else {
6629 /*
6630 * Internal checking in case the entity quest barfed
6631 */
6632 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6633 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6634 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6635 "Internal: %%%s; is not a parameter entity\n",
6636 name, NULL);
6637 } else if (ctxt->input->free != deallocblankswrapper) {
6638 input =
6639 xmlNewBlanksWrapperInputStream(ctxt, entity);
6640 xmlPushInput(ctxt, input);
6641 } else {
6642 /*
6643 * TODO !!!
6644 * handle the extra spaces added before and after
6645 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6646 */
6647 input = xmlNewEntityInputStream(ctxt, entity);
6648 xmlPushInput(ctxt, input);
6649 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006650 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006651 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006652 xmlParseTextDecl(ctxt);
6653 if (ctxt->errNo ==
6654 XML_ERR_UNSUPPORTED_ENCODING) {
6655 /*
6656 * The XML REC instructs us to stop parsing
6657 * right here
6658 */
6659 ctxt->instate = XML_PARSER_EOF;
6660 return;
6661 }
6662 }
6663 }
6664 }
6665 ctxt->hasPErefs = 1;
6666 } else {
6667 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6668 }
6669 }
Owen Taylor3473f882001-02-23 17:55:21 +00006670 }
6671}
6672
6673/**
6674 * xmlParseStringPEReference:
6675 * @ctxt: an XML parser context
6676 * @str: a pointer to an index in the string
6677 *
6678 * parse PEReference declarations
6679 *
6680 * [69] PEReference ::= '%' Name ';'
6681 *
6682 * [ WFC: No Recursion ]
6683 * A parsed entity must not contain a recursive
6684 * reference to itself, either directly or indirectly.
6685 *
6686 * [ WFC: Entity Declared ]
6687 * In a document without any DTD, a document with only an internal DTD
6688 * subset which contains no parameter entity references, or a document
6689 * with "standalone='yes'", ... ... The declaration of a parameter
6690 * entity must precede any reference to it...
6691 *
6692 * [ VC: Entity Declared ]
6693 * In a document with an external subset or external parameter entities
6694 * with "standalone='no'", ... ... The declaration of a parameter entity
6695 * must precede any reference to it...
6696 *
6697 * [ WFC: In DTD ]
6698 * Parameter-entity references may only appear in the DTD.
6699 * NOTE: misleading but this is handled.
6700 *
6701 * Returns the string of the entity content.
6702 * str is updated to the current value of the index
6703 */
6704xmlEntityPtr
6705xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6706 const xmlChar *ptr;
6707 xmlChar cur;
6708 xmlChar *name;
6709 xmlEntityPtr entity = NULL;
6710
6711 if ((str == NULL) || (*str == NULL)) return(NULL);
6712 ptr = *str;
6713 cur = *ptr;
6714 if (cur == '%') {
6715 ptr++;
6716 cur = *ptr;
6717 name = xmlParseStringName(ctxt, &ptr);
6718 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6720 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006721 } else {
6722 cur = *ptr;
6723 if (cur == ';') {
6724 ptr++;
6725 cur = *ptr;
6726 if ((ctxt->sax != NULL) &&
6727 (ctxt->sax->getParameterEntity != NULL))
6728 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6729 name);
6730 if (entity == NULL) {
6731 /*
6732 * [ WFC: Entity Declared ]
6733 * In a document without any DTD, a document with only an
6734 * internal DTD subset which contains no parameter entity
6735 * references, or a document with "standalone='yes'", ...
6736 * ... The declaration of a parameter entity must precede
6737 * any reference to it...
6738 */
6739 if ((ctxt->standalone == 1) ||
6740 ((ctxt->hasExternalSubset == 0) &&
6741 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006743 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006744 } else {
6745 /*
6746 * [ VC: Entity Declared ]
6747 * In a document with an external subset or external
6748 * parameter entities with "standalone='no'", ...
6749 * ... The declaration of a parameter entity must
6750 * precede any reference to it...
6751 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006752 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6753 "PEReference: %%%s; not found\n",
6754 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006755 ctxt->valid = 0;
6756 }
6757 } else {
6758 /*
6759 * Internal checking in case the entity quest barfed
6760 */
6761 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6762 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006763 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6764 "%%%s; is not a parameter entity\n",
6765 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006766 }
6767 }
6768 ctxt->hasPErefs = 1;
6769 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006770 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006771 }
6772 xmlFree(name);
6773 }
6774 }
6775 *str = ptr;
6776 return(entity);
6777}
6778
6779/**
6780 * xmlParseDocTypeDecl:
6781 * @ctxt: an XML parser context
6782 *
6783 * parse a DOCTYPE declaration
6784 *
6785 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6786 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6787 *
6788 * [ VC: Root Element Type ]
6789 * The Name in the document type declaration must match the element
6790 * type of the root element.
6791 */
6792
6793void
6794xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006795 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 xmlChar *ExternalID = NULL;
6797 xmlChar *URI = NULL;
6798
6799 /*
6800 * We know that '<!DOCTYPE' has been detected.
6801 */
6802 SKIP(9);
6803
6804 SKIP_BLANKS;
6805
6806 /*
6807 * Parse the DOCTYPE name.
6808 */
6809 name = xmlParseName(ctxt);
6810 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006811 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6812 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006813 }
6814 ctxt->intSubName = name;
6815
6816 SKIP_BLANKS;
6817
6818 /*
6819 * Check for SystemID and ExternalID
6820 */
6821 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6822
6823 if ((URI != NULL) || (ExternalID != NULL)) {
6824 ctxt->hasExternalSubset = 1;
6825 }
6826 ctxt->extSubURI = URI;
6827 ctxt->extSubSystem = ExternalID;
6828
6829 SKIP_BLANKS;
6830
6831 /*
6832 * Create and update the internal subset.
6833 */
6834 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6835 (!ctxt->disableSAX))
6836 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6837
6838 /*
6839 * Is there any internal subset declarations ?
6840 * they are handled separately in xmlParseInternalSubset()
6841 */
6842 if (RAW == '[')
6843 return;
6844
6845 /*
6846 * We should be at the end of the DOCTYPE declaration.
6847 */
6848 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006849 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006850 }
6851 NEXT;
6852}
6853
6854/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006855 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006856 * @ctxt: an XML parser context
6857 *
6858 * parse the internal subset declaration
6859 *
6860 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6861 */
6862
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006863static void
Owen Taylor3473f882001-02-23 17:55:21 +00006864xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6865 /*
6866 * Is there any DTD definition ?
6867 */
6868 if (RAW == '[') {
6869 ctxt->instate = XML_PARSER_DTD;
6870 NEXT;
6871 /*
6872 * Parse the succession of Markup declarations and
6873 * PEReferences.
6874 * Subsequence (markupdecl | PEReference | S)*
6875 */
6876 while (RAW != ']') {
6877 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006878 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006879
6880 SKIP_BLANKS;
6881 xmlParseMarkupDecl(ctxt);
6882 xmlParsePEReference(ctxt);
6883
6884 /*
6885 * Pop-up of finished entities.
6886 */
6887 while ((RAW == 0) && (ctxt->inputNr > 1))
6888 xmlPopInput(ctxt);
6889
6890 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006891 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006892 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006893 break;
6894 }
6895 }
6896 if (RAW == ']') {
6897 NEXT;
6898 SKIP_BLANKS;
6899 }
6900 }
6901
6902 /*
6903 * We should be at the end of the DOCTYPE declaration.
6904 */
6905 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006906 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006907 }
6908 NEXT;
6909}
6910
Daniel Veillard81273902003-09-30 00:43:48 +00006911#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006912/**
6913 * xmlParseAttribute:
6914 * @ctxt: an XML parser context
6915 * @value: a xmlChar ** used to store the value of the attribute
6916 *
6917 * parse an attribute
6918 *
6919 * [41] Attribute ::= Name Eq AttValue
6920 *
6921 * [ WFC: No External Entity References ]
6922 * Attribute values cannot contain direct or indirect entity references
6923 * to external entities.
6924 *
6925 * [ WFC: No < in Attribute Values ]
6926 * The replacement text of any entity referred to directly or indirectly in
6927 * an attribute value (other than "&lt;") must not contain a <.
6928 *
6929 * [ VC: Attribute Value Type ]
6930 * The attribute must have been declared; the value must be of the type
6931 * declared for it.
6932 *
6933 * [25] Eq ::= S? '=' S?
6934 *
6935 * With namespace:
6936 *
6937 * [NS 11] Attribute ::= QName Eq AttValue
6938 *
6939 * Also the case QName == xmlns:??? is handled independently as a namespace
6940 * definition.
6941 *
6942 * Returns the attribute name, and the value in *value.
6943 */
6944
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006945const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006946xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006947 const xmlChar *name;
6948 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006949
6950 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006951 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 name = xmlParseName(ctxt);
6953 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006954 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006955 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006956 return(NULL);
6957 }
6958
6959 /*
6960 * read the value
6961 */
6962 SKIP_BLANKS;
6963 if (RAW == '=') {
6964 NEXT;
6965 SKIP_BLANKS;
6966 val = xmlParseAttValue(ctxt);
6967 ctxt->instate = XML_PARSER_CONTENT;
6968 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006969 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006970 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006971 return(NULL);
6972 }
6973
6974 /*
6975 * Check that xml:lang conforms to the specification
6976 * No more registered as an error, just generate a warning now
6977 * since this was deprecated in XML second edition
6978 */
6979 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6980 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006981 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6982 "Malformed value for xml:lang : %s\n",
6983 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006984 }
6985 }
6986
6987 /*
6988 * Check that xml:space conforms to the specification
6989 */
6990 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6991 if (xmlStrEqual(val, BAD_CAST "default"))
6992 *(ctxt->space) = 0;
6993 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6994 *(ctxt->space) = 1;
6995 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006996 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006997"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00006998 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006999 }
7000 }
7001
7002 *value = val;
7003 return(name);
7004}
7005
7006/**
7007 * xmlParseStartTag:
7008 * @ctxt: an XML parser context
7009 *
7010 * parse a start of tag either for rule element or
7011 * EmptyElement. In both case we don't parse the tag closing chars.
7012 *
7013 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7014 *
7015 * [ WFC: Unique Att Spec ]
7016 * No attribute name may appear more than once in the same start-tag or
7017 * empty-element tag.
7018 *
7019 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7020 *
7021 * [ WFC: Unique Att Spec ]
7022 * No attribute name may appear more than once in the same start-tag or
7023 * empty-element tag.
7024 *
7025 * With namespace:
7026 *
7027 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7028 *
7029 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7030 *
7031 * Returns the element name parsed
7032 */
7033
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007034const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007035xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007036 const xmlChar *name;
7037 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007038 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007039 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007040 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007041 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007042 int i;
7043
7044 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007045 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007046
7047 name = xmlParseName(ctxt);
7048 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007049 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007050 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007051 return(NULL);
7052 }
7053
7054 /*
7055 * Now parse the attributes, it ends up with the ending
7056 *
7057 * (S Attribute)* S?
7058 */
7059 SKIP_BLANKS;
7060 GROW;
7061
Daniel Veillard21a0f912001-02-25 19:54:14 +00007062 while ((RAW != '>') &&
7063 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007064 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007065 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007066 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007067
7068 attname = xmlParseAttribute(ctxt, &attvalue);
7069 if ((attname != NULL) && (attvalue != NULL)) {
7070 /*
7071 * [ WFC: Unique Att Spec ]
7072 * No attribute name may appear more than once in the same
7073 * start-tag or empty-element tag.
7074 */
7075 for (i = 0; i < nbatts;i += 2) {
7076 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007077 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007078 xmlFree(attvalue);
7079 goto failed;
7080 }
7081 }
Owen Taylor3473f882001-02-23 17:55:21 +00007082 /*
7083 * Add the pair to atts
7084 */
7085 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007086 maxatts = 22; /* allow for 10 attrs by default */
7087 atts = (const xmlChar **)
7088 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007089 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007090 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007091 if (attvalue != NULL)
7092 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007093 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007094 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007095 ctxt->atts = atts;
7096 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007097 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007098 const xmlChar **n;
7099
Owen Taylor3473f882001-02-23 17:55:21 +00007100 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007101 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007102 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007103 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007104 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007105 if (attvalue != NULL)
7106 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007107 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007108 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007109 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007110 ctxt->atts = atts;
7111 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007112 }
7113 atts[nbatts++] = attname;
7114 atts[nbatts++] = attvalue;
7115 atts[nbatts] = NULL;
7116 atts[nbatts + 1] = NULL;
7117 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007118 if (attvalue != NULL)
7119 xmlFree(attvalue);
7120 }
7121
7122failed:
7123
Daniel Veillard3772de32002-12-17 10:31:45 +00007124 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007125 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7126 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007127 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7129 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007130 }
7131 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007132 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7133 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007134 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7135 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007136 break;
7137 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007138 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007139 GROW;
7140 }
7141
7142 /*
7143 * SAX: Start of Element !
7144 */
7145 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007146 (!ctxt->disableSAX)) {
7147 if (nbatts > 0)
7148 ctxt->sax->startElement(ctxt->userData, name, atts);
7149 else
7150 ctxt->sax->startElement(ctxt->userData, name, NULL);
7151 }
Owen Taylor3473f882001-02-23 17:55:21 +00007152
7153 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007154 /* Free only the content strings */
7155 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007156 if (atts[i] != NULL)
7157 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007158 }
7159 return(name);
7160}
7161
7162/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007163 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007164 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007165 * @line: line of the start tag
7166 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007167 *
7168 * parse an end of tag
7169 *
7170 * [42] ETag ::= '</' Name S? '>'
7171 *
7172 * With namespace
7173 *
7174 * [NS 9] ETag ::= '</' QName S? '>'
7175 */
7176
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007177static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007178xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007179 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007180
7181 GROW;
7182 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007183 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007184 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007185 return;
7186 }
7187 SKIP(2);
7188
Daniel Veillard46de64e2002-05-29 08:21:33 +00007189 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007190
7191 /*
7192 * We should definitely be at the ending "S? '>'" part
7193 */
7194 GROW;
7195 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007196 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007197 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007198 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007199 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007200
7201 /*
7202 * [ WFC: Element Type Match ]
7203 * The Name in an element's end-tag must match the element type in the
7204 * start-tag.
7205 *
7206 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007207 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007208 if (name == NULL) name = BAD_CAST "unparseable";
7209 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007210 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007211 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007212 }
7213
7214 /*
7215 * SAX: End of Tag
7216 */
7217 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7218 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007219 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007220
Daniel Veillarde57ec792003-09-10 10:50:59 +00007221 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007222 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007223 return;
7224}
7225
7226/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007227 * xmlParseEndTag:
7228 * @ctxt: an XML parser context
7229 *
7230 * parse an end of tag
7231 *
7232 * [42] ETag ::= '</' Name S? '>'
7233 *
7234 * With namespace
7235 *
7236 * [NS 9] ETag ::= '</' QName S? '>'
7237 */
7238
7239void
7240xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 xmlParseEndTag1(ctxt, 0);
7242}
Daniel Veillard81273902003-09-30 00:43:48 +00007243#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244
7245/************************************************************************
7246 * *
7247 * SAX 2 specific operations *
7248 * *
7249 ************************************************************************/
7250
7251static const xmlChar *
7252xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7253 int len = 0, l;
7254 int c;
7255 int count = 0;
7256
7257 /*
7258 * Handler for more complex cases
7259 */
7260 GROW;
7261 c = CUR_CHAR(l);
7262 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007263 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007264 return(NULL);
7265 }
7266
7267 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007268 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007269 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007270 (IS_COMBINING(c)) ||
7271 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272 if (count++ > 100) {
7273 count = 0;
7274 GROW;
7275 }
7276 len += l;
7277 NEXTL(l);
7278 c = CUR_CHAR(l);
7279 }
7280 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7281}
7282
7283/*
7284 * xmlGetNamespace:
7285 * @ctxt: an XML parser context
7286 * @prefix: the prefix to lookup
7287 *
7288 * Lookup the namespace name for the @prefix (which ca be NULL)
7289 * The prefix must come from the @ctxt->dict dictionnary
7290 *
7291 * Returns the namespace name or NULL if not bound
7292 */
7293static const xmlChar *
7294xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7295 int i;
7296
Daniel Veillarde57ec792003-09-10 10:50:59 +00007297 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007298 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007299 if (ctxt->nsTab[i] == prefix) {
7300 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7301 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007302 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007303 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304 return(NULL);
7305}
7306
7307/**
7308 * xmlParseNCName:
7309 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007310 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311 *
7312 * parse an XML name.
7313 *
7314 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7315 * CombiningChar | Extender
7316 *
7317 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7318 *
7319 * Returns the Name parsed or NULL
7320 */
7321
7322static const xmlChar *
7323xmlParseNCName(xmlParserCtxtPtr ctxt) {
7324 const xmlChar *in;
7325 const xmlChar *ret;
7326 int count = 0;
7327
7328 /*
7329 * Accelerator for simple ASCII names
7330 */
7331 in = ctxt->input->cur;
7332 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7333 ((*in >= 0x41) && (*in <= 0x5A)) ||
7334 (*in == '_')) {
7335 in++;
7336 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7337 ((*in >= 0x41) && (*in <= 0x5A)) ||
7338 ((*in >= 0x30) && (*in <= 0x39)) ||
7339 (*in == '_') || (*in == '-') ||
7340 (*in == '.'))
7341 in++;
7342 if ((*in > 0) && (*in < 0x80)) {
7343 count = in - ctxt->input->cur;
7344 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7345 ctxt->input->cur = in;
7346 ctxt->nbChars += count;
7347 ctxt->input->col += count;
7348 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007349 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007350 }
7351 return(ret);
7352 }
7353 }
7354 return(xmlParseNCNameComplex(ctxt));
7355}
7356
7357/**
7358 * xmlParseQName:
7359 * @ctxt: an XML parser context
7360 * @prefix: pointer to store the prefix part
7361 *
7362 * parse an XML Namespace QName
7363 *
7364 * [6] QName ::= (Prefix ':')? LocalPart
7365 * [7] Prefix ::= NCName
7366 * [8] LocalPart ::= NCName
7367 *
7368 * Returns the Name parsed or NULL
7369 */
7370
7371static const xmlChar *
7372xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7373 const xmlChar *l, *p;
7374
7375 GROW;
7376
7377 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007378 if (l == NULL) {
7379 if (CUR == ':') {
7380 l = xmlParseName(ctxt);
7381 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007382 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7383 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007384 *prefix = NULL;
7385 return(l);
7386 }
7387 }
7388 return(NULL);
7389 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390 if (CUR == ':') {
7391 NEXT;
7392 p = l;
7393 l = xmlParseNCName(ctxt);
7394 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007395 xmlChar *tmp;
7396
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007397 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7398 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007399 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7400 p = xmlDictLookup(ctxt->dict, tmp, -1);
7401 if (tmp != NULL) xmlFree(tmp);
7402 *prefix = NULL;
7403 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404 }
7405 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007406 xmlChar *tmp;
7407
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007408 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7409 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007410 NEXT;
7411 tmp = (xmlChar *) xmlParseName(ctxt);
7412 if (tmp != NULL) {
7413 tmp = xmlBuildQName(tmp, l, NULL, 0);
7414 l = xmlDictLookup(ctxt->dict, tmp, -1);
7415 if (tmp != NULL) xmlFree(tmp);
7416 *prefix = p;
7417 return(l);
7418 }
7419 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7420 l = xmlDictLookup(ctxt->dict, tmp, -1);
7421 if (tmp != NULL) xmlFree(tmp);
7422 *prefix = p;
7423 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007424 }
7425 *prefix = p;
7426 } else
7427 *prefix = NULL;
7428 return(l);
7429}
7430
7431/**
7432 * xmlParseQNameAndCompare:
7433 * @ctxt: an XML parser context
7434 * @name: the localname
7435 * @prefix: the prefix, if any.
7436 *
7437 * parse an XML name and compares for match
7438 * (specialized for endtag parsing)
7439 *
7440 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7441 * and the name for mismatch
7442 */
7443
7444static const xmlChar *
7445xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7446 xmlChar const *prefix) {
7447 const xmlChar *cmp = name;
7448 const xmlChar *in;
7449 const xmlChar *ret;
7450 const xmlChar *prefix2;
7451
7452 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7453
7454 GROW;
7455 in = ctxt->input->cur;
7456
7457 cmp = prefix;
7458 while (*in != 0 && *in == *cmp) {
7459 ++in;
7460 ++cmp;
7461 }
7462 if ((*cmp == 0) && (*in == ':')) {
7463 in++;
7464 cmp = name;
7465 while (*in != 0 && *in == *cmp) {
7466 ++in;
7467 ++cmp;
7468 }
William M. Brack76e95df2003-10-18 16:20:14 +00007469 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007470 /* success */
7471 ctxt->input->cur = in;
7472 return((const xmlChar*) 1);
7473 }
7474 }
7475 /*
7476 * all strings coms from the dictionary, equality can be done directly
7477 */
7478 ret = xmlParseQName (ctxt, &prefix2);
7479 if ((ret == name) && (prefix == prefix2))
7480 return((const xmlChar*) 1);
7481 return ret;
7482}
7483
7484/**
7485 * xmlParseAttValueInternal:
7486 * @ctxt: an XML parser context
7487 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007488 * @alloc: whether the attribute was reallocated as a new string
7489 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007490 *
7491 * parse a value for an attribute.
7492 * NOTE: if no normalization is needed, the routine will return pointers
7493 * directly from the data buffer.
7494 *
7495 * 3.3.3 Attribute-Value Normalization:
7496 * Before the value of an attribute is passed to the application or
7497 * checked for validity, the XML processor must normalize it as follows:
7498 * - a character reference is processed by appending the referenced
7499 * character to the attribute value
7500 * - an entity reference is processed by recursively processing the
7501 * replacement text of the entity
7502 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7503 * appending #x20 to the normalized value, except that only a single
7504 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7505 * parsed entity or the literal entity value of an internal parsed entity
7506 * - other characters are processed by appending them to the normalized value
7507 * If the declared value is not CDATA, then the XML processor must further
7508 * process the normalized attribute value by discarding any leading and
7509 * trailing space (#x20) characters, and by replacing sequences of space
7510 * (#x20) characters by a single space (#x20) character.
7511 * All attributes for which no declaration has been read should be treated
7512 * by a non-validating parser as if declared CDATA.
7513 *
7514 * Returns the AttValue parsed or NULL. The value has to be freed by the
7515 * caller if it was copied, this can be detected by val[*len] == 0.
7516 */
7517
7518static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007519xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7520 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007521{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007522 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007523 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 xmlChar *ret = NULL;
7525
7526 GROW;
7527 in = (xmlChar *) CUR_PTR;
7528 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007529 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007530 return (NULL);
7531 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007532 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007533
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007534 /*
7535 * try to handle in this routine the most common case where no
7536 * allocation of a new string is required and where content is
7537 * pure ASCII.
7538 */
7539 limit = *in++;
7540 end = ctxt->input->end;
7541 start = in;
7542 if (in >= end) {
7543 const xmlChar *oldbase = ctxt->input->base;
7544 GROW;
7545 if (oldbase != ctxt->input->base) {
7546 long delta = ctxt->input->base - oldbase;
7547 start = start + delta;
7548 in = in + delta;
7549 }
7550 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007551 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007552 if (normalize) {
7553 /*
7554 * Skip any leading spaces
7555 */
7556 while ((in < end) && (*in != limit) &&
7557 ((*in == 0x20) || (*in == 0x9) ||
7558 (*in == 0xA) || (*in == 0xD))) {
7559 in++;
7560 start = in;
7561 if (in >= end) {
7562 const xmlChar *oldbase = ctxt->input->base;
7563 GROW;
7564 if (oldbase != ctxt->input->base) {
7565 long delta = ctxt->input->base - oldbase;
7566 start = start + delta;
7567 in = in + delta;
7568 }
7569 end = ctxt->input->end;
7570 }
7571 }
7572 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7573 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7574 if ((*in++ == 0x20) && (*in == 0x20)) break;
7575 if (in >= end) {
7576 const xmlChar *oldbase = ctxt->input->base;
7577 GROW;
7578 if (oldbase != ctxt->input->base) {
7579 long delta = ctxt->input->base - oldbase;
7580 start = start + delta;
7581 in = in + delta;
7582 }
7583 end = ctxt->input->end;
7584 }
7585 }
7586 last = in;
7587 /*
7588 * skip the trailing blanks
7589 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007590 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007591 while ((in < end) && (*in != limit) &&
7592 ((*in == 0x20) || (*in == 0x9) ||
7593 (*in == 0xA) || (*in == 0xD))) {
7594 in++;
7595 if (in >= end) {
7596 const xmlChar *oldbase = ctxt->input->base;
7597 GROW;
7598 if (oldbase != ctxt->input->base) {
7599 long delta = ctxt->input->base - oldbase;
7600 start = start + delta;
7601 in = in + delta;
7602 last = last + delta;
7603 }
7604 end = ctxt->input->end;
7605 }
7606 }
7607 if (*in != limit) goto need_complex;
7608 } else {
7609 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7610 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7611 in++;
7612 if (in >= end) {
7613 const xmlChar *oldbase = ctxt->input->base;
7614 GROW;
7615 if (oldbase != ctxt->input->base) {
7616 long delta = ctxt->input->base - oldbase;
7617 start = start + delta;
7618 in = in + delta;
7619 }
7620 end = ctxt->input->end;
7621 }
7622 }
7623 last = in;
7624 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007626 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007628 *len = last - start;
7629 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007630 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007631 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007632 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 }
7634 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007635 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007636 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007637need_complex:
7638 if (alloc) *alloc = 1;
7639 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007640}
7641
7642/**
7643 * xmlParseAttribute2:
7644 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007645 * @pref: the element prefix
7646 * @elem: the element name
7647 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007648 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007649 * @len: an int * to save the length of the attribute
7650 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651 *
7652 * parse an attribute in the new SAX2 framework.
7653 *
7654 * Returns the attribute name, and the value in *value, .
7655 */
7656
7657static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007658xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7659 const xmlChar *pref, const xmlChar *elem,
7660 const xmlChar **prefix, xmlChar **value,
7661 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007662 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007663 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007664 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007665
7666 *value = NULL;
7667 GROW;
7668 name = xmlParseQName(ctxt, prefix);
7669 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007670 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7671 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 return(NULL);
7673 }
7674
7675 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007676 * get the type if needed
7677 */
7678 if (ctxt->attsSpecial != NULL) {
7679 int type;
7680
7681 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7682 pref, elem, *prefix, name);
7683 if (type != 0) normalize = 1;
7684 }
7685
7686 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007687 * read the value
7688 */
7689 SKIP_BLANKS;
7690 if (RAW == '=') {
7691 NEXT;
7692 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007693 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007694 ctxt->instate = XML_PARSER_CONTENT;
7695 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007696 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007697 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007698 return(NULL);
7699 }
7700
Daniel Veillardd8925572005-06-08 22:34:55 +00007701 if (*prefix == ctxt->str_xml) {
7702 /*
7703 * Check that xml:lang conforms to the specification
7704 * No more registered as an error, just generate a warning now
7705 * since this was deprecated in XML second edition
7706 */
7707 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7708 internal_val = xmlStrndup(val, *len);
7709 if (!xmlCheckLanguageID(internal_val)) {
7710 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7711 "Malformed value for xml:lang : %s\n",
7712 internal_val, NULL);
7713 }
7714 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715
Daniel Veillardd8925572005-06-08 22:34:55 +00007716 /*
7717 * Check that xml:space conforms to the specification
7718 */
7719 if (xmlStrEqual(name, BAD_CAST "space")) {
7720 internal_val = xmlStrndup(val, *len);
7721 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7722 *(ctxt->space) = 0;
7723 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7724 *(ctxt->space) = 1;
7725 else {
7726 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007728 internal_val, NULL);
7729 }
7730 }
7731 if (internal_val) {
7732 xmlFree(internal_val);
7733 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007734 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735
7736 *value = val;
7737 return(name);
7738}
7739
7740/**
7741 * xmlParseStartTag2:
7742 * @ctxt: an XML parser context
7743 *
7744 * parse a start of tag either for rule element or
7745 * EmptyElement. In both case we don't parse the tag closing chars.
7746 * This routine is called when running SAX2 parsing
7747 *
7748 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7749 *
7750 * [ WFC: Unique Att Spec ]
7751 * No attribute name may appear more than once in the same start-tag or
7752 * empty-element tag.
7753 *
7754 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7755 *
7756 * [ WFC: Unique Att Spec ]
7757 * No attribute name may appear more than once in the same start-tag or
7758 * empty-element tag.
7759 *
7760 * With namespace:
7761 *
7762 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7763 *
7764 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7765 *
7766 * Returns the element name parsed
7767 */
7768
7769static const xmlChar *
7770xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007771 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 const xmlChar *localname;
7773 const xmlChar *prefix;
7774 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007775 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007776 const xmlChar *nsname;
7777 xmlChar *attvalue;
7778 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007779 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007780 int nratts, nbatts, nbdef;
7781 int i, j, nbNs, attval;
7782 const xmlChar *base;
7783 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007784 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785
7786 if (RAW != '<') return(NULL);
7787 NEXT1;
7788
7789 /*
7790 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7791 * point since the attribute values may be stored as pointers to
7792 * the buffer and calling SHRINK would destroy them !
7793 * The Shrinking is only possible once the full set of attribute
7794 * callbacks have been done.
7795 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007796reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007797 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798 base = ctxt->input->base;
7799 cur = ctxt->input->cur - ctxt->input->base;
7800 nbatts = 0;
7801 nratts = 0;
7802 nbdef = 0;
7803 nbNs = 0;
7804 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007805 /* Forget any namespaces added during an earlier parse of this element. */
7806 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007807
7808 localname = xmlParseQName(ctxt, &prefix);
7809 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007810 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7811 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 return(NULL);
7813 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007814 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815
7816 /*
7817 * Now parse the attributes, it ends up with the ending
7818 *
7819 * (S Attribute)* S?
7820 */
7821 SKIP_BLANKS;
7822 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007823 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007824
7825 while ((RAW != '>') &&
7826 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007827 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007828 const xmlChar *q = CUR_PTR;
7829 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007830 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007831
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007832 attname = xmlParseAttribute2(ctxt, prefix, localname,
7833 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007834 if ((attname != NULL) && (attvalue != NULL)) {
7835 if (len < 0) len = xmlStrlen(attvalue);
7836 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007837 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7838 xmlURIPtr uri;
7839
7840 if (*URL != 0) {
7841 uri = xmlParseURI((const char *) URL);
7842 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007843 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7844 "xmlns: %s not a valid URI\n",
7845 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007846 } else {
7847 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007848 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7849 "xmlns: URI %s is not absolute\n",
7850 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007851 }
7852 xmlFreeURI(uri);
7853 }
7854 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007855 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007856 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007857 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007858 for (j = 1;j <= nbNs;j++)
7859 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7860 break;
7861 if (j <= nbNs)
7862 xmlErrAttributeDup(ctxt, NULL, attname);
7863 else
7864 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007865 if (alloc != 0) xmlFree(attvalue);
7866 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007867 continue;
7868 }
7869 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007870 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7871 xmlURIPtr uri;
7872
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007873 if (attname == ctxt->str_xml) {
7874 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007875 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7876 "xml namespace prefix mapped to wrong URI\n",
7877 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007878 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007879 /*
7880 * Do not keep a namespace definition node
7881 */
7882 if (alloc != 0) xmlFree(attvalue);
7883 SKIP_BLANKS;
7884 continue;
7885 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007886 uri = xmlParseURI((const char *) URL);
7887 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007888 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7889 "xmlns:%s: '%s' is not a valid URI\n",
7890 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007891 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007892 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007893 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7894 "xmlns:%s: URI %s is not absolute\n",
7895 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007896 }
7897 xmlFreeURI(uri);
7898 }
7899
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007901 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007903 for (j = 1;j <= nbNs;j++)
7904 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7905 break;
7906 if (j <= nbNs)
7907 xmlErrAttributeDup(ctxt, aprefix, attname);
7908 else
7909 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007910 if (alloc != 0) xmlFree(attvalue);
7911 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007912 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913 continue;
7914 }
7915
7916 /*
7917 * Add the pair to atts
7918 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7920 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921 if (attvalue[len] == 0)
7922 xmlFree(attvalue);
7923 goto failed;
7924 }
7925 maxatts = ctxt->maxatts;
7926 atts = ctxt->atts;
7927 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007928 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 atts[nbatts++] = attname;
7930 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007931 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007932 atts[nbatts++] = attvalue;
7933 attvalue += len;
7934 atts[nbatts++] = attvalue;
7935 /*
7936 * tag if some deallocation is needed
7937 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007938 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007939 } else {
7940 if ((attvalue != NULL) && (attvalue[len] == 0))
7941 xmlFree(attvalue);
7942 }
7943
7944failed:
7945
7946 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007947 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007948 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7949 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007950 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7952 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007953 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 }
7955 SKIP_BLANKS;
7956 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7957 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007958 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007960 break;
7961 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007963 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 }
7965
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007967 * The attributes defaulting
7968 */
7969 if (ctxt->attsDefault != NULL) {
7970 xmlDefAttrsPtr defaults;
7971
7972 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7973 if (defaults != NULL) {
7974 for (i = 0;i < defaults->nbAttrs;i++) {
7975 attname = defaults->values[4 * i];
7976 aprefix = defaults->values[4 * i + 1];
7977
7978 /*
7979 * special work for namespaces defaulted defs
7980 */
7981 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7982 /*
7983 * check that it's not a defined namespace
7984 */
7985 for (j = 1;j <= nbNs;j++)
7986 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7987 break;
7988 if (j <= nbNs) continue;
7989
7990 nsname = xmlGetNamespace(ctxt, NULL);
7991 if (nsname != defaults->values[4 * i + 2]) {
7992 if (nsPush(ctxt, NULL,
7993 defaults->values[4 * i + 2]) > 0)
7994 nbNs++;
7995 }
7996 } else if (aprefix == ctxt->str_xmlns) {
7997 /*
7998 * check that it's not a defined namespace
7999 */
8000 for (j = 1;j <= nbNs;j++)
8001 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8002 break;
8003 if (j <= nbNs) continue;
8004
8005 nsname = xmlGetNamespace(ctxt, attname);
8006 if (nsname != defaults->values[2]) {
8007 if (nsPush(ctxt, attname,
8008 defaults->values[4 * i + 2]) > 0)
8009 nbNs++;
8010 }
8011 } else {
8012 /*
8013 * check that it's not a defined attribute
8014 */
8015 for (j = 0;j < nbatts;j+=5) {
8016 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8017 break;
8018 }
8019 if (j < nbatts) continue;
8020
8021 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8022 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008023 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008024 }
8025 maxatts = ctxt->maxatts;
8026 atts = ctxt->atts;
8027 }
8028 atts[nbatts++] = attname;
8029 atts[nbatts++] = aprefix;
8030 if (aprefix == NULL)
8031 atts[nbatts++] = NULL;
8032 else
8033 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8034 atts[nbatts++] = defaults->values[4 * i + 2];
8035 atts[nbatts++] = defaults->values[4 * i + 3];
8036 nbdef++;
8037 }
8038 }
8039 }
8040 }
8041
Daniel Veillarde70c8772003-11-25 07:21:18 +00008042 /*
8043 * The attributes checkings
8044 */
8045 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008046 /*
8047 * The default namespace does not apply to attribute names.
8048 */
8049 if (atts[i + 1] != NULL) {
8050 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8051 if (nsname == NULL) {
8052 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8053 "Namespace prefix %s for %s on %s is not defined\n",
8054 atts[i + 1], atts[i], localname);
8055 }
8056 atts[i + 2] = nsname;
8057 } else
8058 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008059 /*
8060 * [ WFC: Unique Att Spec ]
8061 * No attribute name may appear more than once in the same
8062 * start-tag or empty-element tag.
8063 * As extended by the Namespace in XML REC.
8064 */
8065 for (j = 0; j < i;j += 5) {
8066 if (atts[i] == atts[j]) {
8067 if (atts[i+1] == atts[j+1]) {
8068 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8069 break;
8070 }
8071 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8072 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8073 "Namespaced Attribute %s in '%s' redefined\n",
8074 atts[i], nsname, NULL);
8075 break;
8076 }
8077 }
8078 }
8079 }
8080
Daniel Veillarde57ec792003-09-10 10:50:59 +00008081 nsname = xmlGetNamespace(ctxt, prefix);
8082 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008083 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8084 "Namespace prefix %s on %s is not defined\n",
8085 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008086 }
8087 *pref = prefix;
8088 *URI = nsname;
8089
8090 /*
8091 * SAX: Start of Element !
8092 */
8093 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8094 (!ctxt->disableSAX)) {
8095 if (nbNs > 0)
8096 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8097 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8098 nbatts / 5, nbdef, atts);
8099 else
8100 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8101 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8102 }
8103
8104 /*
8105 * Free up attribute allocated strings if needed
8106 */
8107 if (attval != 0) {
8108 for (i = 3,j = 0; j < nratts;i += 5,j++)
8109 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8110 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008111 }
8112
8113 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008114
8115base_changed:
8116 /*
8117 * the attribute strings are valid iif the base didn't changed
8118 */
8119 if (attval != 0) {
8120 for (i = 3,j = 0; j < nratts;i += 5,j++)
8121 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8122 xmlFree((xmlChar *) atts[i]);
8123 }
8124 ctxt->input->cur = ctxt->input->base + cur;
8125 if (ctxt->wellFormed == 1) {
8126 goto reparse;
8127 }
8128 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008129}
8130
8131/**
8132 * xmlParseEndTag2:
8133 * @ctxt: an XML parser context
8134 * @line: line of the start tag
8135 * @nsNr: number of namespaces on the start tag
8136 *
8137 * parse an end of tag
8138 *
8139 * [42] ETag ::= '</' Name S? '>'
8140 *
8141 * With namespace
8142 *
8143 * [NS 9] ETag ::= '</' QName S? '>'
8144 */
8145
8146static void
8147xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008148 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008149 const xmlChar *name;
8150
8151 GROW;
8152 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008153 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008154 return;
8155 }
8156 SKIP(2);
8157
William M. Brack13dfa872004-09-18 04:52:08 +00008158 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008159 if (ctxt->input->cur[tlen] == '>') {
8160 ctxt->input->cur += tlen + 1;
8161 goto done;
8162 }
8163 ctxt->input->cur += tlen;
8164 name = (xmlChar*)1;
8165 } else {
8166 if (prefix == NULL)
8167 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8168 else
8169 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8170 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008171
8172 /*
8173 * We should definitely be at the ending "S? '>'" part
8174 */
8175 GROW;
8176 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008177 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008178 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179 } else
8180 NEXT1;
8181
8182 /*
8183 * [ WFC: Element Type Match ]
8184 * The Name in an element's end-tag must match the element type in the
8185 * start-tag.
8186 *
8187 */
8188 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008189 if (name == NULL) name = BAD_CAST "unparseable";
8190 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008191 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008192 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193 }
8194
8195 /*
8196 * SAX: End of Tag
8197 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008198done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008199 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8200 (!ctxt->disableSAX))
8201 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8202
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203 spacePop(ctxt);
8204 if (nsNr != 0)
8205 nsPop(ctxt, nsNr);
8206 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008207}
8208
8209/**
Owen Taylor3473f882001-02-23 17:55:21 +00008210 * xmlParseCDSect:
8211 * @ctxt: an XML parser context
8212 *
8213 * Parse escaped pure raw content.
8214 *
8215 * [18] CDSect ::= CDStart CData CDEnd
8216 *
8217 * [19] CDStart ::= '<![CDATA['
8218 *
8219 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8220 *
8221 * [21] CDEnd ::= ']]>'
8222 */
8223void
8224xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8225 xmlChar *buf = NULL;
8226 int len = 0;
8227 int size = XML_PARSER_BUFFER_SIZE;
8228 int r, rl;
8229 int s, sl;
8230 int cur, l;
8231 int count = 0;
8232
Daniel Veillard8f597c32003-10-06 08:19:27 +00008233 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008234 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008235 SKIP(9);
8236 } else
8237 return;
8238
8239 ctxt->instate = XML_PARSER_CDATA_SECTION;
8240 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008241 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008242 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008243 ctxt->instate = XML_PARSER_CONTENT;
8244 return;
8245 }
8246 NEXTL(rl);
8247 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008248 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008249 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 ctxt->instate = XML_PARSER_CONTENT;
8251 return;
8252 }
8253 NEXTL(sl);
8254 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008256 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008257 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008258 return;
8259 }
William M. Brack871611b2003-10-18 04:53:14 +00008260 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008261 ((r != ']') || (s != ']') || (cur != '>'))) {
8262 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008263 xmlChar *tmp;
8264
Owen Taylor3473f882001-02-23 17:55:21 +00008265 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008266 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8267 if (tmp == NULL) {
8268 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008269 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008270 return;
8271 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008272 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008273 }
8274 COPY_BUF(rl,buf,len,r);
8275 r = s;
8276 rl = sl;
8277 s = cur;
8278 sl = l;
8279 count++;
8280 if (count > 50) {
8281 GROW;
8282 count = 0;
8283 }
8284 NEXTL(l);
8285 cur = CUR_CHAR(l);
8286 }
8287 buf[len] = 0;
8288 ctxt->instate = XML_PARSER_CONTENT;
8289 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008290 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008291 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008292 xmlFree(buf);
8293 return;
8294 }
8295 NEXTL(l);
8296
8297 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008298 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008299 */
8300 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8301 if (ctxt->sax->cdataBlock != NULL)
8302 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008303 else if (ctxt->sax->characters != NULL)
8304 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008305 }
8306 xmlFree(buf);
8307}
8308
8309/**
8310 * xmlParseContent:
8311 * @ctxt: an XML parser context
8312 *
8313 * Parse a content:
8314 *
8315 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8316 */
8317
8318void
8319xmlParseContent(xmlParserCtxtPtr ctxt) {
8320 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008321 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008322 ((RAW != '<') || (NXT(1) != '/'))) {
8323 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008324 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008325 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008326
8327 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008328 * First case : a Processing Instruction.
8329 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008330 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008331 xmlParsePI(ctxt);
8332 }
8333
8334 /*
8335 * Second case : a CDSection
8336 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008337 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008338 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008339 xmlParseCDSect(ctxt);
8340 }
8341
8342 /*
8343 * Third case : a comment
8344 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008345 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008346 (NXT(2) == '-') && (NXT(3) == '-')) {
8347 xmlParseComment(ctxt);
8348 ctxt->instate = XML_PARSER_CONTENT;
8349 }
8350
8351 /*
8352 * Fourth case : a sub-element.
8353 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008354 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008355 xmlParseElement(ctxt);
8356 }
8357
8358 /*
8359 * Fifth case : a reference. If if has not been resolved,
8360 * parsing returns it's Name, create the node
8361 */
8362
Daniel Veillard21a0f912001-02-25 19:54:14 +00008363 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008364 xmlParseReference(ctxt);
8365 }
8366
8367 /*
8368 * Last case, text. Note that References are handled directly.
8369 */
8370 else {
8371 xmlParseCharData(ctxt, 0);
8372 }
8373
8374 GROW;
8375 /*
8376 * Pop-up of finished entities.
8377 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008378 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008379 xmlPopInput(ctxt);
8380 SHRINK;
8381
Daniel Veillardfdc91562002-07-01 21:52:03 +00008382 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008383 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8384 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008385 ctxt->instate = XML_PARSER_EOF;
8386 break;
8387 }
8388 }
8389}
8390
8391/**
8392 * xmlParseElement:
8393 * @ctxt: an XML parser context
8394 *
8395 * parse an XML element, this is highly recursive
8396 *
8397 * [39] element ::= EmptyElemTag | STag content ETag
8398 *
8399 * [ WFC: Element Type Match ]
8400 * The Name in an element's end-tag must match the element type in the
8401 * start-tag.
8402 *
Owen Taylor3473f882001-02-23 17:55:21 +00008403 */
8404
8405void
8406xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008407 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008408 const xmlChar *prefix;
8409 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008410 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008411 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008412 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008414
8415 /* Capture start position */
8416 if (ctxt->record_info) {
8417 node_info.begin_pos = ctxt->input->consumed +
8418 (CUR_PTR - ctxt->input->base);
8419 node_info.begin_line = ctxt->input->line;
8420 }
8421
8422 if (ctxt->spaceNr == 0)
8423 spacePush(ctxt, -1);
8424 else
8425 spacePush(ctxt, *ctxt->space);
8426
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008427 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008428#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008429 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008430#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008431 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008432#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008433 else
8434 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008435#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008436 if (name == NULL) {
8437 spacePop(ctxt);
8438 return;
8439 }
8440 namePush(ctxt, name);
8441 ret = ctxt->node;
8442
Daniel Veillard4432df22003-09-28 18:58:27 +00008443#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008444 /*
8445 * [ VC: Root Element Type ]
8446 * The Name in the document type declaration must match the element
8447 * type of the root element.
8448 */
8449 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8450 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8451 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008452#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008453
8454 /*
8455 * Check for an Empty Element.
8456 */
8457 if ((RAW == '/') && (NXT(1) == '>')) {
8458 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 if (ctxt->sax2) {
8460 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8461 (!ctxt->disableSAX))
8462 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008463#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008464 } else {
8465 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8466 (!ctxt->disableSAX))
8467 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008468#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008469 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008470 namePop(ctxt);
8471 spacePop(ctxt);
8472 if (nsNr != ctxt->nsNr)
8473 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008474 if ( ret != NULL && ctxt->record_info ) {
8475 node_info.end_pos = ctxt->input->consumed +
8476 (CUR_PTR - ctxt->input->base);
8477 node_info.end_line = ctxt->input->line;
8478 node_info.node = ret;
8479 xmlParserAddNodeInfo(ctxt, &node_info);
8480 }
8481 return;
8482 }
8483 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008484 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008485 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008486 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8487 "Couldn't find end of Start Tag %s line %d\n",
8488 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008489
8490 /*
8491 * end of parsing of this node.
8492 */
8493 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008495 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 if (nsNr != ctxt->nsNr)
8497 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008498
8499 /*
8500 * Capture end position and add node
8501 */
8502 if ( ret != NULL && ctxt->record_info ) {
8503 node_info.end_pos = ctxt->input->consumed +
8504 (CUR_PTR - ctxt->input->base);
8505 node_info.end_line = ctxt->input->line;
8506 node_info.node = ret;
8507 xmlParserAddNodeInfo(ctxt, &node_info);
8508 }
8509 return;
8510 }
8511
8512 /*
8513 * Parse the content of the element:
8514 */
8515 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008516 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008517 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008518 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008519 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008520
8521 /*
8522 * end of parsing of this node.
8523 */
8524 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008526 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527 if (nsNr != ctxt->nsNr)
8528 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008529 return;
8530 }
8531
8532 /*
8533 * parse the end of tag: '</' should be here.
8534 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008535 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008536 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008537 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008538 }
8539#ifdef LIBXML_SAX1_ENABLED
8540 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008542#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008543
8544 /*
8545 * Capture end position and add node
8546 */
8547 if ( ret != NULL && ctxt->record_info ) {
8548 node_info.end_pos = ctxt->input->consumed +
8549 (CUR_PTR - ctxt->input->base);
8550 node_info.end_line = ctxt->input->line;
8551 node_info.node = ret;
8552 xmlParserAddNodeInfo(ctxt, &node_info);
8553 }
8554}
8555
8556/**
8557 * xmlParseVersionNum:
8558 * @ctxt: an XML parser context
8559 *
8560 * parse the XML version value.
8561 *
8562 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8563 *
8564 * Returns the string giving the XML version number, or NULL
8565 */
8566xmlChar *
8567xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8568 xmlChar *buf = NULL;
8569 int len = 0;
8570 int size = 10;
8571 xmlChar cur;
8572
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008573 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008574 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008575 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008576 return(NULL);
8577 }
8578 cur = CUR;
8579 while (((cur >= 'a') && (cur <= 'z')) ||
8580 ((cur >= 'A') && (cur <= 'Z')) ||
8581 ((cur >= '0') && (cur <= '9')) ||
8582 (cur == '_') || (cur == '.') ||
8583 (cur == ':') || (cur == '-')) {
8584 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008585 xmlChar *tmp;
8586
Owen Taylor3473f882001-02-23 17:55:21 +00008587 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008588 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8589 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008590 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008591 return(NULL);
8592 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008593 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
8595 buf[len++] = cur;
8596 NEXT;
8597 cur=CUR;
8598 }
8599 buf[len] = 0;
8600 return(buf);
8601}
8602
8603/**
8604 * xmlParseVersionInfo:
8605 * @ctxt: an XML parser context
8606 *
8607 * parse the XML version.
8608 *
8609 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8610 *
8611 * [25] Eq ::= S? '=' S?
8612 *
8613 * Returns the version string, e.g. "1.0"
8614 */
8615
8616xmlChar *
8617xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8618 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008619
Daniel Veillarda07050d2003-10-19 14:46:32 +00008620 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008621 SKIP(7);
8622 SKIP_BLANKS;
8623 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008624 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 return(NULL);
8626 }
8627 NEXT;
8628 SKIP_BLANKS;
8629 if (RAW == '"') {
8630 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008631 version = xmlParseVersionNum(ctxt);
8632 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008633 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 } else
8635 NEXT;
8636 } else if (RAW == '\''){
8637 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008638 version = xmlParseVersionNum(ctxt);
8639 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008640 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008641 } else
8642 NEXT;
8643 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008644 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008645 }
8646 }
8647 return(version);
8648}
8649
8650/**
8651 * xmlParseEncName:
8652 * @ctxt: an XML parser context
8653 *
8654 * parse the XML encoding name
8655 *
8656 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8657 *
8658 * Returns the encoding name value or NULL
8659 */
8660xmlChar *
8661xmlParseEncName(xmlParserCtxtPtr ctxt) {
8662 xmlChar *buf = NULL;
8663 int len = 0;
8664 int size = 10;
8665 xmlChar cur;
8666
8667 cur = CUR;
8668 if (((cur >= 'a') && (cur <= 'z')) ||
8669 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008670 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008671 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008672 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008673 return(NULL);
8674 }
8675
8676 buf[len++] = cur;
8677 NEXT;
8678 cur = CUR;
8679 while (((cur >= 'a') && (cur <= 'z')) ||
8680 ((cur >= 'A') && (cur <= 'Z')) ||
8681 ((cur >= '0') && (cur <= '9')) ||
8682 (cur == '.') || (cur == '_') ||
8683 (cur == '-')) {
8684 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008685 xmlChar *tmp;
8686
Owen Taylor3473f882001-02-23 17:55:21 +00008687 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008688 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8689 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008690 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008691 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008692 return(NULL);
8693 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008694 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008695 }
8696 buf[len++] = cur;
8697 NEXT;
8698 cur = CUR;
8699 if (cur == 0) {
8700 SHRINK;
8701 GROW;
8702 cur = CUR;
8703 }
8704 }
8705 buf[len] = 0;
8706 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008707 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008708 }
8709 return(buf);
8710}
8711
8712/**
8713 * xmlParseEncodingDecl:
8714 * @ctxt: an XML parser context
8715 *
8716 * parse the XML encoding declaration
8717 *
8718 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8719 *
8720 * this setups the conversion filters.
8721 *
8722 * Returns the encoding value or NULL
8723 */
8724
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008725const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008726xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8727 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008728
8729 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008730 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008731 SKIP(8);
8732 SKIP_BLANKS;
8733 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008734 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008735 return(NULL);
8736 }
8737 NEXT;
8738 SKIP_BLANKS;
8739 if (RAW == '"') {
8740 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008741 encoding = xmlParseEncName(ctxt);
8742 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008743 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008744 } else
8745 NEXT;
8746 } else if (RAW == '\''){
8747 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008748 encoding = xmlParseEncName(ctxt);
8749 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008750 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008751 } else
8752 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008753 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008754 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008755 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008756 /*
8757 * UTF-16 encoding stwich has already taken place at this stage,
8758 * more over the little-endian/big-endian selection is already done
8759 */
8760 if ((encoding != NULL) &&
8761 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8762 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008763 if (ctxt->encoding != NULL)
8764 xmlFree((xmlChar *) ctxt->encoding);
8765 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008766 }
8767 /*
8768 * UTF-8 encoding is handled natively
8769 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008770 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008771 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8772 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008773 if (ctxt->encoding != NULL)
8774 xmlFree((xmlChar *) ctxt->encoding);
8775 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008776 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008777 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008778 xmlCharEncodingHandlerPtr handler;
8779
8780 if (ctxt->input->encoding != NULL)
8781 xmlFree((xmlChar *) ctxt->input->encoding);
8782 ctxt->input->encoding = encoding;
8783
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008784 handler = xmlFindCharEncodingHandler((const char *) encoding);
8785 if (handler != NULL) {
8786 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008787 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008788 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008789 "Unsupported encoding %s\n", encoding);
8790 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008791 }
8792 }
8793 }
8794 return(encoding);
8795}
8796
8797/**
8798 * xmlParseSDDecl:
8799 * @ctxt: an XML parser context
8800 *
8801 * parse the XML standalone declaration
8802 *
8803 * [32] SDDecl ::= S 'standalone' Eq
8804 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8805 *
8806 * [ VC: Standalone Document Declaration ]
8807 * TODO The standalone document declaration must have the value "no"
8808 * if any external markup declarations contain declarations of:
8809 * - attributes with default values, if elements to which these
8810 * attributes apply appear in the document without specifications
8811 * of values for these attributes, or
8812 * - entities (other than amp, lt, gt, apos, quot), if references
8813 * to those entities appear in the document, or
8814 * - attributes with values subject to normalization, where the
8815 * attribute appears in the document with a value which will change
8816 * as a result of normalization, or
8817 * - element types with element content, if white space occurs directly
8818 * within any instance of those types.
8819 *
8820 * Returns 1 if standalone, 0 otherwise
8821 */
8822
8823int
8824xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8825 int standalone = -1;
8826
8827 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008828 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008829 SKIP(10);
8830 SKIP_BLANKS;
8831 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008832 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008833 return(standalone);
8834 }
8835 NEXT;
8836 SKIP_BLANKS;
8837 if (RAW == '\''){
8838 NEXT;
8839 if ((RAW == 'n') && (NXT(1) == 'o')) {
8840 standalone = 0;
8841 SKIP(2);
8842 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8843 (NXT(2) == 's')) {
8844 standalone = 1;
8845 SKIP(3);
8846 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008847 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008848 }
8849 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008851 } else
8852 NEXT;
8853 } else if (RAW == '"'){
8854 NEXT;
8855 if ((RAW == 'n') && (NXT(1) == 'o')) {
8856 standalone = 0;
8857 SKIP(2);
8858 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8859 (NXT(2) == 's')) {
8860 standalone = 1;
8861 SKIP(3);
8862 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008863 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008864 }
8865 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008866 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008867 } else
8868 NEXT;
8869 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008870 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008871 }
8872 }
8873 return(standalone);
8874}
8875
8876/**
8877 * xmlParseXMLDecl:
8878 * @ctxt: an XML parser context
8879 *
8880 * parse an XML declaration header
8881 *
8882 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8883 */
8884
8885void
8886xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8887 xmlChar *version;
8888
8889 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008890 * This value for standalone indicates that the document has an
8891 * XML declaration but it does not have a standalone attribute.
8892 * It will be overwritten later if a standalone attribute is found.
8893 */
8894 ctxt->input->standalone = -2;
8895
8896 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008897 * We know that '<?xml' is here.
8898 */
8899 SKIP(5);
8900
William M. Brack76e95df2003-10-18 16:20:14 +00008901 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8903 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008904 }
8905 SKIP_BLANKS;
8906
8907 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008908 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008909 */
8910 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008911 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008912 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008913 } else {
8914 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8915 /*
8916 * TODO: Blueberry should be detected here
8917 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008918 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8919 "Unsupported version '%s'\n",
8920 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008921 }
8922 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008923 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008924 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008925 }
Owen Taylor3473f882001-02-23 17:55:21 +00008926
8927 /*
8928 * We may have the encoding declaration
8929 */
William M. Brack76e95df2003-10-18 16:20:14 +00008930 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008931 if ((RAW == '?') && (NXT(1) == '>')) {
8932 SKIP(2);
8933 return;
8934 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008936 }
8937 xmlParseEncodingDecl(ctxt);
8938 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8939 /*
8940 * The XML REC instructs us to stop parsing right here
8941 */
8942 return;
8943 }
8944
8945 /*
8946 * We may have the standalone status.
8947 */
William M. Brack76e95df2003-10-18 16:20:14 +00008948 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008949 if ((RAW == '?') && (NXT(1) == '>')) {
8950 SKIP(2);
8951 return;
8952 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008954 }
8955 SKIP_BLANKS;
8956 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8957
8958 SKIP_BLANKS;
8959 if ((RAW == '?') && (NXT(1) == '>')) {
8960 SKIP(2);
8961 } else if (RAW == '>') {
8962 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008963 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008964 NEXT;
8965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 MOVETO_ENDTAG(CUR_PTR);
8968 NEXT;
8969 }
8970}
8971
8972/**
8973 * xmlParseMisc:
8974 * @ctxt: an XML parser context
8975 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008976 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008977 *
8978 * [27] Misc ::= Comment | PI | S
8979 */
8980
8981void
8982xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008983 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008984 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008985 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008986 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008987 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008988 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008989 NEXT;
8990 } else
8991 xmlParseComment(ctxt);
8992 }
8993}
8994
8995/**
8996 * xmlParseDocument:
8997 * @ctxt: an XML parser context
8998 *
8999 * parse an XML document (and build a tree if using the standard SAX
9000 * interface).
9001 *
9002 * [1] document ::= prolog element Misc*
9003 *
9004 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9005 *
9006 * Returns 0, -1 in case of error. the parser context is augmented
9007 * as a result of the parsing.
9008 */
9009
9010int
9011xmlParseDocument(xmlParserCtxtPtr ctxt) {
9012 xmlChar start[4];
9013 xmlCharEncoding enc;
9014
9015 xmlInitParser();
9016
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009017 if ((ctxt == NULL) || (ctxt->input == NULL))
9018 return(-1);
9019
Owen Taylor3473f882001-02-23 17:55:21 +00009020 GROW;
9021
9022 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009023 * SAX: detecting the level.
9024 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009025 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026
9027 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009028 * SAX: beginning of the document processing.
9029 */
9030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9031 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9032
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009033 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9034 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009035 /*
9036 * Get the 4 first bytes and decode the charset
9037 * if enc != XML_CHAR_ENCODING_NONE
9038 * plug some encoding conversion routines.
9039 */
9040 start[0] = RAW;
9041 start[1] = NXT(1);
9042 start[2] = NXT(2);
9043 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009044 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009045 if (enc != XML_CHAR_ENCODING_NONE) {
9046 xmlSwitchEncoding(ctxt, enc);
9047 }
Owen Taylor3473f882001-02-23 17:55:21 +00009048 }
9049
9050
9051 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009052 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009053 }
9054
9055 /*
9056 * Check for the XMLDecl in the Prolog.
9057 */
9058 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009059 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009060
9061 /*
9062 * Note that we will switch encoding on the fly.
9063 */
9064 xmlParseXMLDecl(ctxt);
9065 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9066 /*
9067 * The XML REC instructs us to stop parsing right here
9068 */
9069 return(-1);
9070 }
9071 ctxt->standalone = ctxt->input->standalone;
9072 SKIP_BLANKS;
9073 } else {
9074 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9075 }
9076 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9077 ctxt->sax->startDocument(ctxt->userData);
9078
9079 /*
9080 * The Misc part of the Prolog
9081 */
9082 GROW;
9083 xmlParseMisc(ctxt);
9084
9085 /*
9086 * Then possibly doc type declaration(s) and more Misc
9087 * (doctypedecl Misc*)?
9088 */
9089 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009090 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009091
9092 ctxt->inSubset = 1;
9093 xmlParseDocTypeDecl(ctxt);
9094 if (RAW == '[') {
9095 ctxt->instate = XML_PARSER_DTD;
9096 xmlParseInternalSubset(ctxt);
9097 }
9098
9099 /*
9100 * Create and update the external subset.
9101 */
9102 ctxt->inSubset = 2;
9103 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9104 (!ctxt->disableSAX))
9105 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9106 ctxt->extSubSystem, ctxt->extSubURI);
9107 ctxt->inSubset = 0;
9108
9109
9110 ctxt->instate = XML_PARSER_PROLOG;
9111 xmlParseMisc(ctxt);
9112 }
9113
9114 /*
9115 * Time to start parsing the tree itself
9116 */
9117 GROW;
9118 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009119 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9120 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009121 } else {
9122 ctxt->instate = XML_PARSER_CONTENT;
9123 xmlParseElement(ctxt);
9124 ctxt->instate = XML_PARSER_EPILOG;
9125
9126
9127 /*
9128 * The Misc part at the end
9129 */
9130 xmlParseMisc(ctxt);
9131
Daniel Veillard561b7f82002-03-20 21:55:57 +00009132 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009133 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009134 }
9135 ctxt->instate = XML_PARSER_EOF;
9136 }
9137
9138 /*
9139 * SAX: end of the document processing.
9140 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009141 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009142 ctxt->sax->endDocument(ctxt->userData);
9143
Daniel Veillard5997aca2002-03-18 18:36:20 +00009144 /*
9145 * Remove locally kept entity definitions if the tree was not built
9146 */
9147 if ((ctxt->myDoc != NULL) &&
9148 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9149 xmlFreeDoc(ctxt->myDoc);
9150 ctxt->myDoc = NULL;
9151 }
9152
Daniel Veillardc7612992002-02-17 22:47:37 +00009153 if (! ctxt->wellFormed) {
9154 ctxt->valid = 0;
9155 return(-1);
9156 }
Owen Taylor3473f882001-02-23 17:55:21 +00009157 return(0);
9158}
9159
9160/**
9161 * xmlParseExtParsedEnt:
9162 * @ctxt: an XML parser context
9163 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009164 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009165 * An external general parsed entity is well-formed if it matches the
9166 * production labeled extParsedEnt.
9167 *
9168 * [78] extParsedEnt ::= TextDecl? content
9169 *
9170 * Returns 0, -1 in case of error. the parser context is augmented
9171 * as a result of the parsing.
9172 */
9173
9174int
9175xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9176 xmlChar start[4];
9177 xmlCharEncoding enc;
9178
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009179 if ((ctxt == NULL) || (ctxt->input == NULL))
9180 return(-1);
9181
Owen Taylor3473f882001-02-23 17:55:21 +00009182 xmlDefaultSAXHandlerInit();
9183
Daniel Veillard309f81d2003-09-23 09:02:53 +00009184 xmlDetectSAX2(ctxt);
9185
Owen Taylor3473f882001-02-23 17:55:21 +00009186 GROW;
9187
9188 /*
9189 * SAX: beginning of the document processing.
9190 */
9191 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9192 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9193
9194 /*
9195 * Get the 4 first bytes and decode the charset
9196 * if enc != XML_CHAR_ENCODING_NONE
9197 * plug some encoding conversion routines.
9198 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009199 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9200 start[0] = RAW;
9201 start[1] = NXT(1);
9202 start[2] = NXT(2);
9203 start[3] = NXT(3);
9204 enc = xmlDetectCharEncoding(start, 4);
9205 if (enc != XML_CHAR_ENCODING_NONE) {
9206 xmlSwitchEncoding(ctxt, enc);
9207 }
Owen Taylor3473f882001-02-23 17:55:21 +00009208 }
9209
9210
9211 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009212 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009213 }
9214
9215 /*
9216 * Check for the XMLDecl in the Prolog.
9217 */
9218 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009219 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009220
9221 /*
9222 * Note that we will switch encoding on the fly.
9223 */
9224 xmlParseXMLDecl(ctxt);
9225 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9226 /*
9227 * The XML REC instructs us to stop parsing right here
9228 */
9229 return(-1);
9230 }
9231 SKIP_BLANKS;
9232 } else {
9233 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9234 }
9235 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9236 ctxt->sax->startDocument(ctxt->userData);
9237
9238 /*
9239 * Doing validity checking on chunk doesn't make sense
9240 */
9241 ctxt->instate = XML_PARSER_CONTENT;
9242 ctxt->validate = 0;
9243 ctxt->loadsubset = 0;
9244 ctxt->depth = 0;
9245
9246 xmlParseContent(ctxt);
9247
9248 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009249 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009250 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009251 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009252 }
9253
9254 /*
9255 * SAX: end of the document processing.
9256 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009257 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009258 ctxt->sax->endDocument(ctxt->userData);
9259
9260 if (! ctxt->wellFormed) return(-1);
9261 return(0);
9262}
9263
Daniel Veillard73b013f2003-09-30 12:36:01 +00009264#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009265/************************************************************************
9266 * *
9267 * Progressive parsing interfaces *
9268 * *
9269 ************************************************************************/
9270
9271/**
9272 * xmlParseLookupSequence:
9273 * @ctxt: an XML parser context
9274 * @first: the first char to lookup
9275 * @next: the next char to lookup or zero
9276 * @third: the next char to lookup or zero
9277 *
9278 * Try to find if a sequence (first, next, third) or just (first next) or
9279 * (first) is available in the input stream.
9280 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9281 * to avoid rescanning sequences of bytes, it DOES change the state of the
9282 * parser, do not use liberally.
9283 *
9284 * Returns the index to the current parsing point if the full sequence
9285 * is available, -1 otherwise.
9286 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009287static int
Owen Taylor3473f882001-02-23 17:55:21 +00009288xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9289 xmlChar next, xmlChar third) {
9290 int base, len;
9291 xmlParserInputPtr in;
9292 const xmlChar *buf;
9293
9294 in = ctxt->input;
9295 if (in == NULL) return(-1);
9296 base = in->cur - in->base;
9297 if (base < 0) return(-1);
9298 if (ctxt->checkIndex > base)
9299 base = ctxt->checkIndex;
9300 if (in->buf == NULL) {
9301 buf = in->base;
9302 len = in->length;
9303 } else {
9304 buf = in->buf->buffer->content;
9305 len = in->buf->buffer->use;
9306 }
9307 /* take into account the sequence length */
9308 if (third) len -= 2;
9309 else if (next) len --;
9310 for (;base < len;base++) {
9311 if (buf[base] == first) {
9312 if (third != 0) {
9313 if ((buf[base + 1] != next) ||
9314 (buf[base + 2] != third)) continue;
9315 } else if (next != 0) {
9316 if (buf[base + 1] != next) continue;
9317 }
9318 ctxt->checkIndex = 0;
9319#ifdef DEBUG_PUSH
9320 if (next == 0)
9321 xmlGenericError(xmlGenericErrorContext,
9322 "PP: lookup '%c' found at %d\n",
9323 first, base);
9324 else if (third == 0)
9325 xmlGenericError(xmlGenericErrorContext,
9326 "PP: lookup '%c%c' found at %d\n",
9327 first, next, base);
9328 else
9329 xmlGenericError(xmlGenericErrorContext,
9330 "PP: lookup '%c%c%c' found at %d\n",
9331 first, next, third, base);
9332#endif
9333 return(base - (in->cur - in->base));
9334 }
9335 }
9336 ctxt->checkIndex = base;
9337#ifdef DEBUG_PUSH
9338 if (next == 0)
9339 xmlGenericError(xmlGenericErrorContext,
9340 "PP: lookup '%c' failed\n", first);
9341 else if (third == 0)
9342 xmlGenericError(xmlGenericErrorContext,
9343 "PP: lookup '%c%c' failed\n", first, next);
9344 else
9345 xmlGenericError(xmlGenericErrorContext,
9346 "PP: lookup '%c%c%c' failed\n", first, next, third);
9347#endif
9348 return(-1);
9349}
9350
9351/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009352 * xmlParseGetLasts:
9353 * @ctxt: an XML parser context
9354 * @lastlt: pointer to store the last '<' from the input
9355 * @lastgt: pointer to store the last '>' from the input
9356 *
9357 * Lookup the last < and > in the current chunk
9358 */
9359static void
9360xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9361 const xmlChar **lastgt) {
9362 const xmlChar *tmp;
9363
9364 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9365 xmlGenericError(xmlGenericErrorContext,
9366 "Internal error: xmlParseGetLasts\n");
9367 return;
9368 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009369 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009370 tmp = ctxt->input->end;
9371 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009372 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009373 if (tmp < ctxt->input->base) {
9374 *lastlt = NULL;
9375 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009377 *lastlt = tmp;
9378 tmp++;
9379 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9380 if (*tmp == '\'') {
9381 tmp++;
9382 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9383 if (tmp < ctxt->input->end) tmp++;
9384 } else if (*tmp == '"') {
9385 tmp++;
9386 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9387 if (tmp < ctxt->input->end) tmp++;
9388 } else
9389 tmp++;
9390 }
9391 if (tmp < ctxt->input->end)
9392 *lastgt = tmp;
9393 else {
9394 tmp = *lastlt;
9395 tmp--;
9396 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9397 if (tmp >= ctxt->input->base)
9398 *lastgt = tmp;
9399 else
9400 *lastgt = NULL;
9401 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009402 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009403 } else {
9404 *lastlt = NULL;
9405 *lastgt = NULL;
9406 }
9407}
9408/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009409 * xmlCheckCdataPush:
9410 * @cur: pointer to the bock of characters
9411 * @len: length of the block in bytes
9412 *
9413 * Check that the block of characters is okay as SCdata content [20]
9414 *
9415 * Returns the number of bytes to pass if okay, a negative index where an
9416 * UTF-8 error occured otherwise
9417 */
9418static int
9419xmlCheckCdataPush(const xmlChar *utf, int len) {
9420 int ix;
9421 unsigned char c;
9422 int codepoint;
9423
9424 if ((utf == NULL) || (len <= 0))
9425 return(0);
9426
9427 for (ix = 0; ix < len;) { /* string is 0-terminated */
9428 c = utf[ix];
9429 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9430 if (c >= 0x20)
9431 ix++;
9432 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9433 ix++;
9434 else
9435 return(-ix);
9436 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9437 if (ix + 2 > len) return(ix);
9438 if ((utf[ix+1] & 0xc0 ) != 0x80)
9439 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009440 codepoint = (utf[ix] & 0x1f) << 6;
9441 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009442 if (!xmlIsCharQ(codepoint))
9443 return(-ix);
9444 ix += 2;
9445 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9446 if (ix + 3 > len) return(ix);
9447 if (((utf[ix+1] & 0xc0) != 0x80) ||
9448 ((utf[ix+2] & 0xc0) != 0x80))
9449 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009450 codepoint = (utf[ix] & 0xf) << 12;
9451 codepoint |= (utf[ix+1] & 0x3f) << 6;
9452 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009453 if (!xmlIsCharQ(codepoint))
9454 return(-ix);
9455 ix += 3;
9456 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9457 if (ix + 4 > len) return(ix);
9458 if (((utf[ix+1] & 0xc0) != 0x80) ||
9459 ((utf[ix+2] & 0xc0) != 0x80) ||
9460 ((utf[ix+3] & 0xc0) != 0x80))
9461 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009462 codepoint = (utf[ix] & 0x7) << 18;
9463 codepoint |= (utf[ix+1] & 0x3f) << 12;
9464 codepoint |= (utf[ix+2] & 0x3f) << 6;
9465 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009466 if (!xmlIsCharQ(codepoint))
9467 return(-ix);
9468 ix += 4;
9469 } else /* unknown encoding */
9470 return(-ix);
9471 }
9472 return(ix);
9473}
9474
9475/**
Owen Taylor3473f882001-02-23 17:55:21 +00009476 * xmlParseTryOrFinish:
9477 * @ctxt: an XML parser context
9478 * @terminate: last chunk indicator
9479 *
9480 * Try to progress on parsing
9481 *
9482 * Returns zero if no parsing was possible
9483 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009484static int
Owen Taylor3473f882001-02-23 17:55:21 +00009485xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9486 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009487 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009488 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009489 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009490
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009491 if (ctxt->input == NULL)
9492 return(0);
9493
Owen Taylor3473f882001-02-23 17:55:21 +00009494#ifdef DEBUG_PUSH
9495 switch (ctxt->instate) {
9496 case XML_PARSER_EOF:
9497 xmlGenericError(xmlGenericErrorContext,
9498 "PP: try EOF\n"); break;
9499 case XML_PARSER_START:
9500 xmlGenericError(xmlGenericErrorContext,
9501 "PP: try START\n"); break;
9502 case XML_PARSER_MISC:
9503 xmlGenericError(xmlGenericErrorContext,
9504 "PP: try MISC\n");break;
9505 case XML_PARSER_COMMENT:
9506 xmlGenericError(xmlGenericErrorContext,
9507 "PP: try COMMENT\n");break;
9508 case XML_PARSER_PROLOG:
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: try PROLOG\n");break;
9511 case XML_PARSER_START_TAG:
9512 xmlGenericError(xmlGenericErrorContext,
9513 "PP: try START_TAG\n");break;
9514 case XML_PARSER_CONTENT:
9515 xmlGenericError(xmlGenericErrorContext,
9516 "PP: try CONTENT\n");break;
9517 case XML_PARSER_CDATA_SECTION:
9518 xmlGenericError(xmlGenericErrorContext,
9519 "PP: try CDATA_SECTION\n");break;
9520 case XML_PARSER_END_TAG:
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: try END_TAG\n");break;
9523 case XML_PARSER_ENTITY_DECL:
9524 xmlGenericError(xmlGenericErrorContext,
9525 "PP: try ENTITY_DECL\n");break;
9526 case XML_PARSER_ENTITY_VALUE:
9527 xmlGenericError(xmlGenericErrorContext,
9528 "PP: try ENTITY_VALUE\n");break;
9529 case XML_PARSER_ATTRIBUTE_VALUE:
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: try ATTRIBUTE_VALUE\n");break;
9532 case XML_PARSER_DTD:
9533 xmlGenericError(xmlGenericErrorContext,
9534 "PP: try DTD\n");break;
9535 case XML_PARSER_EPILOG:
9536 xmlGenericError(xmlGenericErrorContext,
9537 "PP: try EPILOG\n");break;
9538 case XML_PARSER_PI:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: try PI\n");break;
9541 case XML_PARSER_IGNORE:
9542 xmlGenericError(xmlGenericErrorContext,
9543 "PP: try IGNORE\n");break;
9544 }
9545#endif
9546
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009547 if ((ctxt->input != NULL) &&
9548 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009549 xmlSHRINK(ctxt);
9550 ctxt->checkIndex = 0;
9551 }
9552 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009553
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009555 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009556 return(0);
9557
9558
Owen Taylor3473f882001-02-23 17:55:21 +00009559 /*
9560 * Pop-up of finished entities.
9561 */
9562 while ((RAW == 0) && (ctxt->inputNr > 1))
9563 xmlPopInput(ctxt);
9564
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009565 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009566 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009567 avail = ctxt->input->length -
9568 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009569 else {
9570 /*
9571 * If we are operating on converted input, try to flush
9572 * remainng chars to avoid them stalling in the non-converted
9573 * buffer.
9574 */
9575 if ((ctxt->input->buf->raw != NULL) &&
9576 (ctxt->input->buf->raw->use > 0)) {
9577 int base = ctxt->input->base -
9578 ctxt->input->buf->buffer->content;
9579 int current = ctxt->input->cur - ctxt->input->base;
9580
9581 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9582 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9583 ctxt->input->cur = ctxt->input->base + current;
9584 ctxt->input->end =
9585 &ctxt->input->buf->buffer->content[
9586 ctxt->input->buf->buffer->use];
9587 }
9588 avail = ctxt->input->buf->buffer->use -
9589 (ctxt->input->cur - ctxt->input->base);
9590 }
Owen Taylor3473f882001-02-23 17:55:21 +00009591 if (avail < 1)
9592 goto done;
9593 switch (ctxt->instate) {
9594 case XML_PARSER_EOF:
9595 /*
9596 * Document parsing is done !
9597 */
9598 goto done;
9599 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009600 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9601 xmlChar start[4];
9602 xmlCharEncoding enc;
9603
9604 /*
9605 * Very first chars read from the document flow.
9606 */
9607 if (avail < 4)
9608 goto done;
9609
9610 /*
9611 * Get the 4 first bytes and decode the charset
9612 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009613 * plug some encoding conversion routines,
9614 * else xmlSwitchEncoding will set to (default)
9615 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009616 */
9617 start[0] = RAW;
9618 start[1] = NXT(1);
9619 start[2] = NXT(2);
9620 start[3] = NXT(3);
9621 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009622 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009623 break;
9624 }
Owen Taylor3473f882001-02-23 17:55:21 +00009625
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009626 if (avail < 2)
9627 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009628 cur = ctxt->input->cur[0];
9629 next = ctxt->input->cur[1];
9630 if (cur == 0) {
9631 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9632 ctxt->sax->setDocumentLocator(ctxt->userData,
9633 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009634 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009635 ctxt->instate = XML_PARSER_EOF;
9636#ifdef DEBUG_PUSH
9637 xmlGenericError(xmlGenericErrorContext,
9638 "PP: entering EOF\n");
9639#endif
9640 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9641 ctxt->sax->endDocument(ctxt->userData);
9642 goto done;
9643 }
9644 if ((cur == '<') && (next == '?')) {
9645 /* PI or XML decl */
9646 if (avail < 5) return(ret);
9647 if ((!terminate) &&
9648 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9649 return(ret);
9650 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9651 ctxt->sax->setDocumentLocator(ctxt->userData,
9652 &xmlDefaultSAXLocator);
9653 if ((ctxt->input->cur[2] == 'x') &&
9654 (ctxt->input->cur[3] == 'm') &&
9655 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009656 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009657 ret += 5;
9658#ifdef DEBUG_PUSH
9659 xmlGenericError(xmlGenericErrorContext,
9660 "PP: Parsing XML Decl\n");
9661#endif
9662 xmlParseXMLDecl(ctxt);
9663 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9664 /*
9665 * The XML REC instructs us to stop parsing right
9666 * here
9667 */
9668 ctxt->instate = XML_PARSER_EOF;
9669 return(0);
9670 }
9671 ctxt->standalone = ctxt->input->standalone;
9672 if ((ctxt->encoding == NULL) &&
9673 (ctxt->input->encoding != NULL))
9674 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9675 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9676 (!ctxt->disableSAX))
9677 ctxt->sax->startDocument(ctxt->userData);
9678 ctxt->instate = XML_PARSER_MISC;
9679#ifdef DEBUG_PUSH
9680 xmlGenericError(xmlGenericErrorContext,
9681 "PP: entering MISC\n");
9682#endif
9683 } else {
9684 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9685 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9686 (!ctxt->disableSAX))
9687 ctxt->sax->startDocument(ctxt->userData);
9688 ctxt->instate = XML_PARSER_MISC;
9689#ifdef DEBUG_PUSH
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: entering MISC\n");
9692#endif
9693 }
9694 } else {
9695 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9696 ctxt->sax->setDocumentLocator(ctxt->userData,
9697 &xmlDefaultSAXLocator);
9698 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009699 if (ctxt->version == NULL) {
9700 xmlErrMemory(ctxt, NULL);
9701 break;
9702 }
Owen Taylor3473f882001-02-23 17:55:21 +00009703 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9704 (!ctxt->disableSAX))
9705 ctxt->sax->startDocument(ctxt->userData);
9706 ctxt->instate = XML_PARSER_MISC;
9707#ifdef DEBUG_PUSH
9708 xmlGenericError(xmlGenericErrorContext,
9709 "PP: entering MISC\n");
9710#endif
9711 }
9712 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009713 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009714 const xmlChar *name;
9715 const xmlChar *prefix;
9716 const xmlChar *URI;
9717 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009718
9719 if ((avail < 2) && (ctxt->inputNr == 1))
9720 goto done;
9721 cur = ctxt->input->cur[0];
9722 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009723 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009724 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009725 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9726 ctxt->sax->endDocument(ctxt->userData);
9727 goto done;
9728 }
9729 if (!terminate) {
9730 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009731 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009732 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009733 goto done;
9734 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9735 goto done;
9736 }
9737 }
9738 if (ctxt->spaceNr == 0)
9739 spacePush(ctxt, -1);
9740 else
9741 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009742#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009743 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009744#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009745 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009746#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009747 else
9748 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009749#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009750 if (name == NULL) {
9751 spacePop(ctxt);
9752 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009753 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9754 ctxt->sax->endDocument(ctxt->userData);
9755 goto done;
9756 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009757#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009758 /*
9759 * [ VC: Root Element Type ]
9760 * The Name in the document type declaration must match
9761 * the element type of the root element.
9762 */
9763 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9764 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9765 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009766#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009767
9768 /*
9769 * Check for an Empty Element.
9770 */
9771 if ((RAW == '/') && (NXT(1) == '>')) {
9772 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009773
9774 if (ctxt->sax2) {
9775 if ((ctxt->sax != NULL) &&
9776 (ctxt->sax->endElementNs != NULL) &&
9777 (!ctxt->disableSAX))
9778 ctxt->sax->endElementNs(ctxt->userData, name,
9779 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009780 if (ctxt->nsNr - nsNr > 0)
9781 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009782#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009783 } else {
9784 if ((ctxt->sax != NULL) &&
9785 (ctxt->sax->endElement != NULL) &&
9786 (!ctxt->disableSAX))
9787 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009788#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009789 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009790 spacePop(ctxt);
9791 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009792 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009793 } else {
9794 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009795 }
9796 break;
9797 }
9798 if (RAW == '>') {
9799 NEXT;
9800 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009801 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009802 "Couldn't find end of Start Tag %s\n",
9803 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009804 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009805 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009807 if (ctxt->sax2)
9808 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009809#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009810 else
9811 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009812#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009813
Daniel Veillarda880b122003-04-21 21:36:41 +00009814 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009815 break;
9816 }
9817 case XML_PARSER_CONTENT: {
9818 const xmlChar *test;
9819 unsigned int cons;
9820 if ((avail < 2) && (ctxt->inputNr == 1))
9821 goto done;
9822 cur = ctxt->input->cur[0];
9823 next = ctxt->input->cur[1];
9824
9825 test = CUR_PTR;
9826 cons = ctxt->input->consumed;
9827 if ((cur == '<') && (next == '/')) {
9828 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009829 break;
9830 } else if ((cur == '<') && (next == '?')) {
9831 if ((!terminate) &&
9832 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9833 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009834 xmlParsePI(ctxt);
9835 } else if ((cur == '<') && (next != '!')) {
9836 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009837 break;
9838 } else if ((cur == '<') && (next == '!') &&
9839 (ctxt->input->cur[2] == '-') &&
9840 (ctxt->input->cur[3] == '-')) {
9841 if ((!terminate) &&
9842 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9843 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009844 xmlParseComment(ctxt);
9845 ctxt->instate = XML_PARSER_CONTENT;
9846 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9847 (ctxt->input->cur[2] == '[') &&
9848 (ctxt->input->cur[3] == 'C') &&
9849 (ctxt->input->cur[4] == 'D') &&
9850 (ctxt->input->cur[5] == 'A') &&
9851 (ctxt->input->cur[6] == 'T') &&
9852 (ctxt->input->cur[7] == 'A') &&
9853 (ctxt->input->cur[8] == '[')) {
9854 SKIP(9);
9855 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009856 break;
9857 } else if ((cur == '<') && (next == '!') &&
9858 (avail < 9)) {
9859 goto done;
9860 } else if (cur == '&') {
9861 if ((!terminate) &&
9862 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9863 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009864 xmlParseReference(ctxt);
9865 } else {
9866 /* TODO Avoid the extra copy, handle directly !!! */
9867 /*
9868 * Goal of the following test is:
9869 * - minimize calls to the SAX 'character' callback
9870 * when they are mergeable
9871 * - handle an problem for isBlank when we only parse
9872 * a sequence of blank chars and the next one is
9873 * not available to check against '<' presence.
9874 * - tries to homogenize the differences in SAX
9875 * callbacks between the push and pull versions
9876 * of the parser.
9877 */
9878 if ((ctxt->inputNr == 1) &&
9879 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9880 if (!terminate) {
9881 if (ctxt->progressive) {
9882 if ((lastlt == NULL) ||
9883 (ctxt->input->cur > lastlt))
9884 goto done;
9885 } else if (xmlParseLookupSequence(ctxt,
9886 '<', 0, 0) < 0) {
9887 goto done;
9888 }
9889 }
9890 }
9891 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009892 xmlParseCharData(ctxt, 0);
9893 }
9894 /*
9895 * Pop-up of finished entities.
9896 */
9897 while ((RAW == 0) && (ctxt->inputNr > 1))
9898 xmlPopInput(ctxt);
9899 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009900 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9901 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 ctxt->instate = XML_PARSER_EOF;
9903 break;
9904 }
9905 break;
9906 }
9907 case XML_PARSER_END_TAG:
9908 if (avail < 2)
9909 goto done;
9910 if (!terminate) {
9911 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009912 /* > can be found unescaped in attribute values */
9913 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009914 goto done;
9915 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9916 goto done;
9917 }
9918 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009919 if (ctxt->sax2) {
9920 xmlParseEndTag2(ctxt,
9921 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9922 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009923 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009924 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009925 }
9926#ifdef LIBXML_SAX1_ENABLED
9927 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009928 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009929#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009930 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009931 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009932 } else {
9933 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009934 }
9935 break;
9936 case XML_PARSER_CDATA_SECTION: {
9937 /*
9938 * The Push mode need to have the SAX callback for
9939 * cdataBlock merge back contiguous callbacks.
9940 */
9941 int base;
9942
9943 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9944 if (base < 0) {
9945 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009946 int tmp;
9947
9948 tmp = xmlCheckCdataPush(ctxt->input->cur,
9949 XML_PARSER_BIG_BUFFER_SIZE);
9950 if (tmp < 0) {
9951 tmp = -tmp;
9952 ctxt->input->cur += tmp;
9953 goto encoding_error;
9954 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009955 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9956 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009957 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009958 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009959 else if (ctxt->sax->characters != NULL)
9960 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009961 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009962 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009963 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009964 ctxt->checkIndex = 0;
9965 }
9966 goto done;
9967 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009968 int tmp;
9969
9970 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9971 if ((tmp < 0) || (tmp != base)) {
9972 tmp = -tmp;
9973 ctxt->input->cur += tmp;
9974 goto encoding_error;
9975 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009976 if ((ctxt->sax != NULL) && (base > 0) &&
9977 (!ctxt->disableSAX)) {
9978 if (ctxt->sax->cdataBlock != NULL)
9979 ctxt->sax->cdataBlock(ctxt->userData,
9980 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009981 else if (ctxt->sax->characters != NULL)
9982 ctxt->sax->characters(ctxt->userData,
9983 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009984 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009985 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009986 ctxt->checkIndex = 0;
9987 ctxt->instate = XML_PARSER_CONTENT;
9988#ifdef DEBUG_PUSH
9989 xmlGenericError(xmlGenericErrorContext,
9990 "PP: entering CONTENT\n");
9991#endif
9992 }
9993 break;
9994 }
Owen Taylor3473f882001-02-23 17:55:21 +00009995 case XML_PARSER_MISC:
9996 SKIP_BLANKS;
9997 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009998 avail = ctxt->input->length -
9999 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010000 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010001 avail = ctxt->input->buf->buffer->use -
10002 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010003 if (avail < 2)
10004 goto done;
10005 cur = ctxt->input->cur[0];
10006 next = ctxt->input->cur[1];
10007 if ((cur == '<') && (next == '?')) {
10008 if ((!terminate) &&
10009 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10010 goto done;
10011#ifdef DEBUG_PUSH
10012 xmlGenericError(xmlGenericErrorContext,
10013 "PP: Parsing PI\n");
10014#endif
10015 xmlParsePI(ctxt);
10016 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010017 (ctxt->input->cur[2] == '-') &&
10018 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010019 if ((!terminate) &&
10020 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10021 goto done;
10022#ifdef DEBUG_PUSH
10023 xmlGenericError(xmlGenericErrorContext,
10024 "PP: Parsing Comment\n");
10025#endif
10026 xmlParseComment(ctxt);
10027 ctxt->instate = XML_PARSER_MISC;
10028 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010029 (ctxt->input->cur[2] == 'D') &&
10030 (ctxt->input->cur[3] == 'O') &&
10031 (ctxt->input->cur[4] == 'C') &&
10032 (ctxt->input->cur[5] == 'T') &&
10033 (ctxt->input->cur[6] == 'Y') &&
10034 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010035 (ctxt->input->cur[8] == 'E')) {
10036 if ((!terminate) &&
10037 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10038 goto done;
10039#ifdef DEBUG_PUSH
10040 xmlGenericError(xmlGenericErrorContext,
10041 "PP: Parsing internal subset\n");
10042#endif
10043 ctxt->inSubset = 1;
10044 xmlParseDocTypeDecl(ctxt);
10045 if (RAW == '[') {
10046 ctxt->instate = XML_PARSER_DTD;
10047#ifdef DEBUG_PUSH
10048 xmlGenericError(xmlGenericErrorContext,
10049 "PP: entering DTD\n");
10050#endif
10051 } else {
10052 /*
10053 * Create and update the external subset.
10054 */
10055 ctxt->inSubset = 2;
10056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10057 (ctxt->sax->externalSubset != NULL))
10058 ctxt->sax->externalSubset(ctxt->userData,
10059 ctxt->intSubName, ctxt->extSubSystem,
10060 ctxt->extSubURI);
10061 ctxt->inSubset = 0;
10062 ctxt->instate = XML_PARSER_PROLOG;
10063#ifdef DEBUG_PUSH
10064 xmlGenericError(xmlGenericErrorContext,
10065 "PP: entering PROLOG\n");
10066#endif
10067 }
10068 } else if ((cur == '<') && (next == '!') &&
10069 (avail < 9)) {
10070 goto done;
10071 } else {
10072 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010073 ctxt->progressive = 1;
10074 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010075#ifdef DEBUG_PUSH
10076 xmlGenericError(xmlGenericErrorContext,
10077 "PP: entering START_TAG\n");
10078#endif
10079 }
10080 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010081 case XML_PARSER_PROLOG:
10082 SKIP_BLANKS;
10083 if (ctxt->input->buf == NULL)
10084 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10085 else
10086 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10087 if (avail < 2)
10088 goto done;
10089 cur = ctxt->input->cur[0];
10090 next = ctxt->input->cur[1];
10091 if ((cur == '<') && (next == '?')) {
10092 if ((!terminate) &&
10093 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10094 goto done;
10095#ifdef DEBUG_PUSH
10096 xmlGenericError(xmlGenericErrorContext,
10097 "PP: Parsing PI\n");
10098#endif
10099 xmlParsePI(ctxt);
10100 } else if ((cur == '<') && (next == '!') &&
10101 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10102 if ((!terminate) &&
10103 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10104 goto done;
10105#ifdef DEBUG_PUSH
10106 xmlGenericError(xmlGenericErrorContext,
10107 "PP: Parsing Comment\n");
10108#endif
10109 xmlParseComment(ctxt);
10110 ctxt->instate = XML_PARSER_PROLOG;
10111 } else if ((cur == '<') && (next == '!') &&
10112 (avail < 4)) {
10113 goto done;
10114 } else {
10115 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010116 if (ctxt->progressive == 0)
10117 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010118 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010119#ifdef DEBUG_PUSH
10120 xmlGenericError(xmlGenericErrorContext,
10121 "PP: entering START_TAG\n");
10122#endif
10123 }
10124 break;
10125 case XML_PARSER_EPILOG:
10126 SKIP_BLANKS;
10127 if (ctxt->input->buf == NULL)
10128 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10129 else
10130 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10131 if (avail < 2)
10132 goto done;
10133 cur = ctxt->input->cur[0];
10134 next = ctxt->input->cur[1];
10135 if ((cur == '<') && (next == '?')) {
10136 if ((!terminate) &&
10137 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10138 goto done;
10139#ifdef DEBUG_PUSH
10140 xmlGenericError(xmlGenericErrorContext,
10141 "PP: Parsing PI\n");
10142#endif
10143 xmlParsePI(ctxt);
10144 ctxt->instate = XML_PARSER_EPILOG;
10145 } else if ((cur == '<') && (next == '!') &&
10146 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10147 if ((!terminate) &&
10148 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10149 goto done;
10150#ifdef DEBUG_PUSH
10151 xmlGenericError(xmlGenericErrorContext,
10152 "PP: Parsing Comment\n");
10153#endif
10154 xmlParseComment(ctxt);
10155 ctxt->instate = XML_PARSER_EPILOG;
10156 } else if ((cur == '<') && (next == '!') &&
10157 (avail < 4)) {
10158 goto done;
10159 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010160 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010161 ctxt->instate = XML_PARSER_EOF;
10162#ifdef DEBUG_PUSH
10163 xmlGenericError(xmlGenericErrorContext,
10164 "PP: entering EOF\n");
10165#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010166 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010167 ctxt->sax->endDocument(ctxt->userData);
10168 goto done;
10169 }
10170 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010171 case XML_PARSER_DTD: {
10172 /*
10173 * Sorry but progressive parsing of the internal subset
10174 * is not expected to be supported. We first check that
10175 * the full content of the internal subset is available and
10176 * the parsing is launched only at that point.
10177 * Internal subset ends up with "']' S? '>'" in an unescaped
10178 * section and not in a ']]>' sequence which are conditional
10179 * sections (whoever argued to keep that crap in XML deserve
10180 * a place in hell !).
10181 */
10182 int base, i;
10183 xmlChar *buf;
10184 xmlChar quote = 0;
10185
10186 base = ctxt->input->cur - ctxt->input->base;
10187 if (base < 0) return(0);
10188 if (ctxt->checkIndex > base)
10189 base = ctxt->checkIndex;
10190 buf = ctxt->input->buf->buffer->content;
10191 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10192 base++) {
10193 if (quote != 0) {
10194 if (buf[base] == quote)
10195 quote = 0;
10196 continue;
10197 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010198 if ((quote == 0) && (buf[base] == '<')) {
10199 int found = 0;
10200 /* special handling of comments */
10201 if (((unsigned int) base + 4 <
10202 ctxt->input->buf->buffer->use) &&
10203 (buf[base + 1] == '!') &&
10204 (buf[base + 2] == '-') &&
10205 (buf[base + 3] == '-')) {
10206 for (;(unsigned int) base + 3 <
10207 ctxt->input->buf->buffer->use; base++) {
10208 if ((buf[base] == '-') &&
10209 (buf[base + 1] == '-') &&
10210 (buf[base + 2] == '>')) {
10211 found = 1;
10212 base += 2;
10213 break;
10214 }
10215 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010216 if (!found) {
10217#if 0
10218 fprintf(stderr, "unfinished comment\n");
10219#endif
10220 break; /* for */
10221 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010222 continue;
10223 }
10224 }
Owen Taylor3473f882001-02-23 17:55:21 +000010225 if (buf[base] == '"') {
10226 quote = '"';
10227 continue;
10228 }
10229 if (buf[base] == '\'') {
10230 quote = '\'';
10231 continue;
10232 }
10233 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010234#if 0
10235 fprintf(stderr, "%c%c%c%c: ", buf[base],
10236 buf[base + 1], buf[base + 2], buf[base + 3]);
10237#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010238 if ((unsigned int) base +1 >=
10239 ctxt->input->buf->buffer->use)
10240 break;
10241 if (buf[base + 1] == ']') {
10242 /* conditional crap, skip both ']' ! */
10243 base++;
10244 continue;
10245 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010246 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010247 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10248 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010249 if (buf[base + i] == '>') {
10250#if 0
10251 fprintf(stderr, "found\n");
10252#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010253 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010254 }
10255 if (!IS_BLANK_CH(buf[base + i])) {
10256#if 0
10257 fprintf(stderr, "not found\n");
10258#endif
10259 goto not_end_of_int_subset;
10260 }
Owen Taylor3473f882001-02-23 17:55:21 +000010261 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010262#if 0
10263 fprintf(stderr, "end of stream\n");
10264#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010265 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010266
Owen Taylor3473f882001-02-23 17:55:21 +000010267 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010268not_end_of_int_subset:
10269 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010270 }
10271 /*
10272 * We didn't found the end of the Internal subset
10273 */
Owen Taylor3473f882001-02-23 17:55:21 +000010274#ifdef DEBUG_PUSH
10275 if (next == 0)
10276 xmlGenericError(xmlGenericErrorContext,
10277 "PP: lookup of int subset end filed\n");
10278#endif
10279 goto done;
10280
10281found_end_int_subset:
10282 xmlParseInternalSubset(ctxt);
10283 ctxt->inSubset = 2;
10284 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10285 (ctxt->sax->externalSubset != NULL))
10286 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10287 ctxt->extSubSystem, ctxt->extSubURI);
10288 ctxt->inSubset = 0;
10289 ctxt->instate = XML_PARSER_PROLOG;
10290 ctxt->checkIndex = 0;
10291#ifdef DEBUG_PUSH
10292 xmlGenericError(xmlGenericErrorContext,
10293 "PP: entering PROLOG\n");
10294#endif
10295 break;
10296 }
10297 case XML_PARSER_COMMENT:
10298 xmlGenericError(xmlGenericErrorContext,
10299 "PP: internal error, state == COMMENT\n");
10300 ctxt->instate = XML_PARSER_CONTENT;
10301#ifdef DEBUG_PUSH
10302 xmlGenericError(xmlGenericErrorContext,
10303 "PP: entering CONTENT\n");
10304#endif
10305 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010306 case XML_PARSER_IGNORE:
10307 xmlGenericError(xmlGenericErrorContext,
10308 "PP: internal error, state == IGNORE");
10309 ctxt->instate = XML_PARSER_DTD;
10310#ifdef DEBUG_PUSH
10311 xmlGenericError(xmlGenericErrorContext,
10312 "PP: entering DTD\n");
10313#endif
10314 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010315 case XML_PARSER_PI:
10316 xmlGenericError(xmlGenericErrorContext,
10317 "PP: internal error, state == PI\n");
10318 ctxt->instate = XML_PARSER_CONTENT;
10319#ifdef DEBUG_PUSH
10320 xmlGenericError(xmlGenericErrorContext,
10321 "PP: entering CONTENT\n");
10322#endif
10323 break;
10324 case XML_PARSER_ENTITY_DECL:
10325 xmlGenericError(xmlGenericErrorContext,
10326 "PP: internal error, state == ENTITY_DECL\n");
10327 ctxt->instate = XML_PARSER_DTD;
10328#ifdef DEBUG_PUSH
10329 xmlGenericError(xmlGenericErrorContext,
10330 "PP: entering DTD\n");
10331#endif
10332 break;
10333 case XML_PARSER_ENTITY_VALUE:
10334 xmlGenericError(xmlGenericErrorContext,
10335 "PP: internal error, state == ENTITY_VALUE\n");
10336 ctxt->instate = XML_PARSER_CONTENT;
10337#ifdef DEBUG_PUSH
10338 xmlGenericError(xmlGenericErrorContext,
10339 "PP: entering DTD\n");
10340#endif
10341 break;
10342 case XML_PARSER_ATTRIBUTE_VALUE:
10343 xmlGenericError(xmlGenericErrorContext,
10344 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10345 ctxt->instate = XML_PARSER_START_TAG;
10346#ifdef DEBUG_PUSH
10347 xmlGenericError(xmlGenericErrorContext,
10348 "PP: entering START_TAG\n");
10349#endif
10350 break;
10351 case XML_PARSER_SYSTEM_LITERAL:
10352 xmlGenericError(xmlGenericErrorContext,
10353 "PP: internal error, state == SYSTEM_LITERAL\n");
10354 ctxt->instate = XML_PARSER_START_TAG;
10355#ifdef DEBUG_PUSH
10356 xmlGenericError(xmlGenericErrorContext,
10357 "PP: entering START_TAG\n");
10358#endif
10359 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010360 case XML_PARSER_PUBLIC_LITERAL:
10361 xmlGenericError(xmlGenericErrorContext,
10362 "PP: internal error, state == PUBLIC_LITERAL\n");
10363 ctxt->instate = XML_PARSER_START_TAG;
10364#ifdef DEBUG_PUSH
10365 xmlGenericError(xmlGenericErrorContext,
10366 "PP: entering START_TAG\n");
10367#endif
10368 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010369 }
10370 }
10371done:
10372#ifdef DEBUG_PUSH
10373 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10374#endif
10375 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010376encoding_error:
10377 {
10378 char buffer[150];
10379
10380 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10381 ctxt->input->cur[0], ctxt->input->cur[1],
10382 ctxt->input->cur[2], ctxt->input->cur[3]);
10383 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10384 "Input is not proper UTF-8, indicate encoding !\n%s",
10385 BAD_CAST buffer, NULL);
10386 }
10387 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010388}
10389
10390/**
Owen Taylor3473f882001-02-23 17:55:21 +000010391 * xmlParseChunk:
10392 * @ctxt: an XML parser context
10393 * @chunk: an char array
10394 * @size: the size in byte of the chunk
10395 * @terminate: last chunk indicator
10396 *
10397 * Parse a Chunk of memory
10398 *
10399 * Returns zero if no error, the xmlParserErrors otherwise.
10400 */
10401int
10402xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10403 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010404 int end_in_lf = 0;
10405
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010406 if (ctxt == NULL)
10407 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010408 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010409 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010410 if (ctxt->instate == XML_PARSER_START)
10411 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010412 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10413 (chunk[size - 1] == '\r')) {
10414 end_in_lf = 1;
10415 size--;
10416 }
Owen Taylor3473f882001-02-23 17:55:21 +000010417 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10418 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10419 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10420 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010421 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010422
William M. Bracka3215c72004-07-31 16:24:01 +000010423 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10424 if (res < 0) {
10425 ctxt->errNo = XML_PARSER_EOF;
10426 ctxt->disableSAX = 1;
10427 return (XML_PARSER_EOF);
10428 }
Owen Taylor3473f882001-02-23 17:55:21 +000010429 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10430 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010431 ctxt->input->end =
10432 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010433#ifdef DEBUG_PUSH
10434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10435#endif
10436
Owen Taylor3473f882001-02-23 17:55:21 +000010437 } else if (ctxt->instate != XML_PARSER_EOF) {
10438 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10439 xmlParserInputBufferPtr in = ctxt->input->buf;
10440 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10441 (in->raw != NULL)) {
10442 int nbchars;
10443
10444 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10445 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010446 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010447 xmlGenericError(xmlGenericErrorContext,
10448 "xmlParseChunk: encoder error\n");
10449 return(XML_ERR_INVALID_ENCODING);
10450 }
10451 }
10452 }
10453 }
10454 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010455 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10456 (ctxt->input->buf != NULL)) {
10457 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10458 }
Daniel Veillard14412512005-01-21 23:53:26 +000010459 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010460 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010461 if (terminate) {
10462 /*
10463 * Check for termination
10464 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010465 int avail = 0;
10466
10467 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010468 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010469 avail = ctxt->input->length -
10470 (ctxt->input->cur - ctxt->input->base);
10471 else
10472 avail = ctxt->input->buf->buffer->use -
10473 (ctxt->input->cur - ctxt->input->base);
10474 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010475
Owen Taylor3473f882001-02-23 17:55:21 +000010476 if ((ctxt->instate != XML_PARSER_EOF) &&
10477 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010478 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010479 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010480 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010481 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010482 }
Owen Taylor3473f882001-02-23 17:55:21 +000010483 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010484 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010485 ctxt->sax->endDocument(ctxt->userData);
10486 }
10487 ctxt->instate = XML_PARSER_EOF;
10488 }
10489 return((xmlParserErrors) ctxt->errNo);
10490}
10491
10492/************************************************************************
10493 * *
10494 * I/O front end functions to the parser *
10495 * *
10496 ************************************************************************/
10497
10498/**
Owen Taylor3473f882001-02-23 17:55:21 +000010499 * xmlCreatePushParserCtxt:
10500 * @sax: a SAX handler
10501 * @user_data: The user data returned on SAX callbacks
10502 * @chunk: a pointer to an array of chars
10503 * @size: number of chars in the array
10504 * @filename: an optional file name or URI
10505 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010506 * Create a parser context for using the XML parser in push mode.
10507 * If @buffer and @size are non-NULL, the data is used to detect
10508 * the encoding. The remaining characters will be parsed so they
10509 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010510 * To allow content encoding detection, @size should be >= 4
10511 * The value of @filename is used for fetching external entities
10512 * and error/warning reports.
10513 *
10514 * Returns the new parser context or NULL
10515 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010516
Owen Taylor3473f882001-02-23 17:55:21 +000010517xmlParserCtxtPtr
10518xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10519 const char *chunk, int size, const char *filename) {
10520 xmlParserCtxtPtr ctxt;
10521 xmlParserInputPtr inputStream;
10522 xmlParserInputBufferPtr buf;
10523 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10524
10525 /*
10526 * plug some encoding conversion routines
10527 */
10528 if ((chunk != NULL) && (size >= 4))
10529 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10530
10531 buf = xmlAllocParserInputBuffer(enc);
10532 if (buf == NULL) return(NULL);
10533
10534 ctxt = xmlNewParserCtxt();
10535 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010536 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010537 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010538 return(NULL);
10539 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010540 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010541 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10542 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010543 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010544 xmlFreeParserInputBuffer(buf);
10545 xmlFreeParserCtxt(ctxt);
10546 return(NULL);
10547 }
Owen Taylor3473f882001-02-23 17:55:21 +000010548 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010549#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010550 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010551#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010552 xmlFree(ctxt->sax);
10553 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10554 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010555 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010556 xmlFreeParserInputBuffer(buf);
10557 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010558 return(NULL);
10559 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010560 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10561 if (sax->initialized == XML_SAX2_MAGIC)
10562 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10563 else
10564 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010565 if (user_data != NULL)
10566 ctxt->userData = user_data;
10567 }
10568 if (filename == NULL) {
10569 ctxt->directory = NULL;
10570 } else {
10571 ctxt->directory = xmlParserGetDirectory(filename);
10572 }
10573
10574 inputStream = xmlNewInputStream(ctxt);
10575 if (inputStream == NULL) {
10576 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010577 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010578 return(NULL);
10579 }
10580
10581 if (filename == NULL)
10582 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010583 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010584 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010585 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010586 if (inputStream->filename == NULL) {
10587 xmlFreeParserCtxt(ctxt);
10588 xmlFreeParserInputBuffer(buf);
10589 return(NULL);
10590 }
10591 }
Owen Taylor3473f882001-02-23 17:55:21 +000010592 inputStream->buf = buf;
10593 inputStream->base = inputStream->buf->buffer->content;
10594 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010595 inputStream->end =
10596 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010597
10598 inputPush(ctxt, inputStream);
10599
William M. Brack3a1cd212005-02-11 14:35:54 +000010600 /*
10601 * If the caller didn't provide an initial 'chunk' for determining
10602 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10603 * that it can be automatically determined later
10604 */
10605 if ((size == 0) || (chunk == NULL)) {
10606 ctxt->charset = XML_CHAR_ENCODING_NONE;
10607 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010608 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10609 int cur = ctxt->input->cur - ctxt->input->base;
10610
Owen Taylor3473f882001-02-23 17:55:21 +000010611 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010612
10613 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10614 ctxt->input->cur = ctxt->input->base + cur;
10615 ctxt->input->end =
10616 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010617#ifdef DEBUG_PUSH
10618 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10619#endif
10620 }
10621
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010622 if (enc != XML_CHAR_ENCODING_NONE) {
10623 xmlSwitchEncoding(ctxt, enc);
10624 }
10625
Owen Taylor3473f882001-02-23 17:55:21 +000010626 return(ctxt);
10627}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010628#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010629
10630/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010631 * xmlStopParser:
10632 * @ctxt: an XML parser context
10633 *
10634 * Blocks further parser processing
10635 */
10636void
10637xmlStopParser(xmlParserCtxtPtr ctxt) {
10638 if (ctxt == NULL)
10639 return;
10640 ctxt->instate = XML_PARSER_EOF;
10641 ctxt->disableSAX = 1;
10642 if (ctxt->input != NULL) {
10643 ctxt->input->cur = BAD_CAST"";
10644 ctxt->input->base = ctxt->input->cur;
10645 }
10646}
10647
10648/**
Owen Taylor3473f882001-02-23 17:55:21 +000010649 * xmlCreateIOParserCtxt:
10650 * @sax: a SAX handler
10651 * @user_data: The user data returned on SAX callbacks
10652 * @ioread: an I/O read function
10653 * @ioclose: an I/O close function
10654 * @ioctx: an I/O handler
10655 * @enc: the charset encoding if known
10656 *
10657 * Create a parser context for using the XML parser with an existing
10658 * I/O stream
10659 *
10660 * Returns the new parser context or NULL
10661 */
10662xmlParserCtxtPtr
10663xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10664 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10665 void *ioctx, xmlCharEncoding enc) {
10666 xmlParserCtxtPtr ctxt;
10667 xmlParserInputPtr inputStream;
10668 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010669
10670 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010671
10672 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10673 if (buf == NULL) return(NULL);
10674
10675 ctxt = xmlNewParserCtxt();
10676 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010677 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010678 return(NULL);
10679 }
10680 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010681#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010682 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010683#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010684 xmlFree(ctxt->sax);
10685 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10686 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010687 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010688 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010689 return(NULL);
10690 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010691 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10692 if (sax->initialized == XML_SAX2_MAGIC)
10693 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10694 else
10695 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010696 if (user_data != NULL)
10697 ctxt->userData = user_data;
10698 }
10699
10700 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10701 if (inputStream == NULL) {
10702 xmlFreeParserCtxt(ctxt);
10703 return(NULL);
10704 }
10705 inputPush(ctxt, inputStream);
10706
10707 return(ctxt);
10708}
10709
Daniel Veillard4432df22003-09-28 18:58:27 +000010710#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010711/************************************************************************
10712 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010713 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010714 * *
10715 ************************************************************************/
10716
10717/**
10718 * xmlIOParseDTD:
10719 * @sax: the SAX handler block or NULL
10720 * @input: an Input Buffer
10721 * @enc: the charset encoding if known
10722 *
10723 * Load and parse a DTD
10724 *
10725 * Returns the resulting xmlDtdPtr or NULL in case of error.
10726 * @input will be freed at parsing end.
10727 */
10728
10729xmlDtdPtr
10730xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10731 xmlCharEncoding enc) {
10732 xmlDtdPtr ret = NULL;
10733 xmlParserCtxtPtr ctxt;
10734 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010735 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010736
10737 if (input == NULL)
10738 return(NULL);
10739
10740 ctxt = xmlNewParserCtxt();
10741 if (ctxt == NULL) {
10742 return(NULL);
10743 }
10744
10745 /*
10746 * Set-up the SAX context
10747 */
10748 if (sax != NULL) {
10749 if (ctxt->sax != NULL)
10750 xmlFree(ctxt->sax);
10751 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010752 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010753 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010754 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010755
10756 /*
10757 * generate a parser input from the I/O handler
10758 */
10759
Daniel Veillard43caefb2003-12-07 19:32:22 +000010760 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010761 if (pinput == NULL) {
10762 if (sax != NULL) ctxt->sax = NULL;
10763 xmlFreeParserCtxt(ctxt);
10764 return(NULL);
10765 }
10766
10767 /*
10768 * plug some encoding conversion routines here.
10769 */
10770 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010771 if (enc != XML_CHAR_ENCODING_NONE) {
10772 xmlSwitchEncoding(ctxt, enc);
10773 }
Owen Taylor3473f882001-02-23 17:55:21 +000010774
10775 pinput->filename = NULL;
10776 pinput->line = 1;
10777 pinput->col = 1;
10778 pinput->base = ctxt->input->cur;
10779 pinput->cur = ctxt->input->cur;
10780 pinput->free = NULL;
10781
10782 /*
10783 * let's parse that entity knowing it's an external subset.
10784 */
10785 ctxt->inSubset = 2;
10786 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10787 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10788 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010789
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010790 if ((enc == XML_CHAR_ENCODING_NONE) &&
10791 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010792 /*
10793 * Get the 4 first bytes and decode the charset
10794 * if enc != XML_CHAR_ENCODING_NONE
10795 * plug some encoding conversion routines.
10796 */
10797 start[0] = RAW;
10798 start[1] = NXT(1);
10799 start[2] = NXT(2);
10800 start[3] = NXT(3);
10801 enc = xmlDetectCharEncoding(start, 4);
10802 if (enc != XML_CHAR_ENCODING_NONE) {
10803 xmlSwitchEncoding(ctxt, enc);
10804 }
10805 }
10806
Owen Taylor3473f882001-02-23 17:55:21 +000010807 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10808
10809 if (ctxt->myDoc != NULL) {
10810 if (ctxt->wellFormed) {
10811 ret = ctxt->myDoc->extSubset;
10812 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010813 if (ret != NULL) {
10814 xmlNodePtr tmp;
10815
10816 ret->doc = NULL;
10817 tmp = ret->children;
10818 while (tmp != NULL) {
10819 tmp->doc = NULL;
10820 tmp = tmp->next;
10821 }
10822 }
Owen Taylor3473f882001-02-23 17:55:21 +000010823 } else {
10824 ret = NULL;
10825 }
10826 xmlFreeDoc(ctxt->myDoc);
10827 ctxt->myDoc = NULL;
10828 }
10829 if (sax != NULL) ctxt->sax = NULL;
10830 xmlFreeParserCtxt(ctxt);
10831
10832 return(ret);
10833}
10834
10835/**
10836 * xmlSAXParseDTD:
10837 * @sax: the SAX handler block
10838 * @ExternalID: a NAME* containing the External ID of the DTD
10839 * @SystemID: a NAME* containing the URL to the DTD
10840 *
10841 * Load and parse an external subset.
10842 *
10843 * Returns the resulting xmlDtdPtr or NULL in case of error.
10844 */
10845
10846xmlDtdPtr
10847xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10848 const xmlChar *SystemID) {
10849 xmlDtdPtr ret = NULL;
10850 xmlParserCtxtPtr ctxt;
10851 xmlParserInputPtr input = NULL;
10852 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010853 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010854
10855 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10856
10857 ctxt = xmlNewParserCtxt();
10858 if (ctxt == NULL) {
10859 return(NULL);
10860 }
10861
10862 /*
10863 * Set-up the SAX context
10864 */
10865 if (sax != NULL) {
10866 if (ctxt->sax != NULL)
10867 xmlFree(ctxt->sax);
10868 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010869 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010870 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010871
10872 /*
10873 * Canonicalise the system ID
10874 */
10875 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010876 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010877 xmlFreeParserCtxt(ctxt);
10878 return(NULL);
10879 }
Owen Taylor3473f882001-02-23 17:55:21 +000010880
10881 /*
10882 * Ask the Entity resolver to load the damn thing
10883 */
10884
10885 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010886 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 if (input == NULL) {
10888 if (sax != NULL) ctxt->sax = NULL;
10889 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010890 if (systemIdCanonic != NULL)
10891 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010892 return(NULL);
10893 }
10894
10895 /*
10896 * plug some encoding conversion routines here.
10897 */
10898 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010899 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10900 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10901 xmlSwitchEncoding(ctxt, enc);
10902 }
Owen Taylor3473f882001-02-23 17:55:21 +000010903
10904 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010905 input->filename = (char *) systemIdCanonic;
10906 else
10907 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010908 input->line = 1;
10909 input->col = 1;
10910 input->base = ctxt->input->cur;
10911 input->cur = ctxt->input->cur;
10912 input->free = NULL;
10913
10914 /*
10915 * let's parse that entity knowing it's an external subset.
10916 */
10917 ctxt->inSubset = 2;
10918 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10919 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10920 ExternalID, SystemID);
10921 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10922
10923 if (ctxt->myDoc != NULL) {
10924 if (ctxt->wellFormed) {
10925 ret = ctxt->myDoc->extSubset;
10926 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010927 if (ret != NULL) {
10928 xmlNodePtr tmp;
10929
10930 ret->doc = NULL;
10931 tmp = ret->children;
10932 while (tmp != NULL) {
10933 tmp->doc = NULL;
10934 tmp = tmp->next;
10935 }
10936 }
Owen Taylor3473f882001-02-23 17:55:21 +000010937 } else {
10938 ret = NULL;
10939 }
10940 xmlFreeDoc(ctxt->myDoc);
10941 ctxt->myDoc = NULL;
10942 }
10943 if (sax != NULL) ctxt->sax = NULL;
10944 xmlFreeParserCtxt(ctxt);
10945
10946 return(ret);
10947}
10948
Daniel Veillard4432df22003-09-28 18:58:27 +000010949
Owen Taylor3473f882001-02-23 17:55:21 +000010950/**
10951 * xmlParseDTD:
10952 * @ExternalID: a NAME* containing the External ID of the DTD
10953 * @SystemID: a NAME* containing the URL to the DTD
10954 *
10955 * Load and parse an external subset.
10956 *
10957 * Returns the resulting xmlDtdPtr or NULL in case of error.
10958 */
10959
10960xmlDtdPtr
10961xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10962 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10963}
Daniel Veillard4432df22003-09-28 18:58:27 +000010964#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010965
10966/************************************************************************
10967 * *
10968 * Front ends when parsing an Entity *
10969 * *
10970 ************************************************************************/
10971
10972/**
Owen Taylor3473f882001-02-23 17:55:21 +000010973 * xmlParseCtxtExternalEntity:
10974 * @ctx: the existing parsing context
10975 * @URL: the URL for the entity to load
10976 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010977 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010978 *
10979 * Parse an external general entity within an existing parsing context
10980 * An external general parsed entity is well-formed if it matches the
10981 * production labeled extParsedEnt.
10982 *
10983 * [78] extParsedEnt ::= TextDecl? content
10984 *
10985 * Returns 0 if the entity is well formed, -1 in case of args problem and
10986 * the parser error code otherwise
10987 */
10988
10989int
10990xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010991 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010992 xmlParserCtxtPtr ctxt;
10993 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010994 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010995 xmlSAXHandlerPtr oldsax = NULL;
10996 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010997 xmlChar start[4];
10998 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010999
Daniel Veillardce682bc2004-11-05 17:22:25 +000011000 if (ctx == NULL) return(-1);
11001
Owen Taylor3473f882001-02-23 17:55:21 +000011002 if (ctx->depth > 40) {
11003 return(XML_ERR_ENTITY_LOOP);
11004 }
11005
Daniel Veillardcda96922001-08-21 10:56:31 +000011006 if (lst != NULL)
11007 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011008 if ((URL == NULL) && (ID == NULL))
11009 return(-1);
11010 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11011 return(-1);
11012
11013
11014 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11015 if (ctxt == NULL) return(-1);
11016 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011017 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011018 oldsax = ctxt->sax;
11019 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011020 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011021 newDoc = xmlNewDoc(BAD_CAST "1.0");
11022 if (newDoc == NULL) {
11023 xmlFreeParserCtxt(ctxt);
11024 return(-1);
11025 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011026 if (ctx->myDoc->dict) {
11027 newDoc->dict = ctx->myDoc->dict;
11028 xmlDictReference(newDoc->dict);
11029 }
Owen Taylor3473f882001-02-23 17:55:21 +000011030 if (ctx->myDoc != NULL) {
11031 newDoc->intSubset = ctx->myDoc->intSubset;
11032 newDoc->extSubset = ctx->myDoc->extSubset;
11033 }
11034 if (ctx->myDoc->URL != NULL) {
11035 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11036 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011037 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11038 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011039 ctxt->sax = oldsax;
11040 xmlFreeParserCtxt(ctxt);
11041 newDoc->intSubset = NULL;
11042 newDoc->extSubset = NULL;
11043 xmlFreeDoc(newDoc);
11044 return(-1);
11045 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011046 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011047 nodePush(ctxt, newDoc->children);
11048 if (ctx->myDoc == NULL) {
11049 ctxt->myDoc = newDoc;
11050 } else {
11051 ctxt->myDoc = ctx->myDoc;
11052 newDoc->children->doc = ctx->myDoc;
11053 }
11054
Daniel Veillard87a764e2001-06-20 17:41:10 +000011055 /*
11056 * Get the 4 first bytes and decode the charset
11057 * if enc != XML_CHAR_ENCODING_NONE
11058 * plug some encoding conversion routines.
11059 */
11060 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011061 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11062 start[0] = RAW;
11063 start[1] = NXT(1);
11064 start[2] = NXT(2);
11065 start[3] = NXT(3);
11066 enc = xmlDetectCharEncoding(start, 4);
11067 if (enc != XML_CHAR_ENCODING_NONE) {
11068 xmlSwitchEncoding(ctxt, enc);
11069 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011070 }
11071
Owen Taylor3473f882001-02-23 17:55:21 +000011072 /*
11073 * Parse a possible text declaration first
11074 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011075 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011076 xmlParseTextDecl(ctxt);
11077 }
11078
11079 /*
11080 * Doing validity checking on chunk doesn't make sense
11081 */
11082 ctxt->instate = XML_PARSER_CONTENT;
11083 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011084 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011085 ctxt->loadsubset = ctx->loadsubset;
11086 ctxt->depth = ctx->depth + 1;
11087 ctxt->replaceEntities = ctx->replaceEntities;
11088 if (ctxt->validate) {
11089 ctxt->vctxt.error = ctx->vctxt.error;
11090 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011091 } else {
11092 ctxt->vctxt.error = NULL;
11093 ctxt->vctxt.warning = NULL;
11094 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011095 ctxt->vctxt.nodeTab = NULL;
11096 ctxt->vctxt.nodeNr = 0;
11097 ctxt->vctxt.nodeMax = 0;
11098 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011099 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11100 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011101 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11102 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11103 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011104 ctxt->dictNames = ctx->dictNames;
11105 ctxt->attsDefault = ctx->attsDefault;
11106 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011107 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011108
11109 xmlParseContent(ctxt);
11110
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011111 ctx->validate = ctxt->validate;
11112 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011113 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011114 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011115 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011116 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011117 }
11118 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011119 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011120 }
11121
11122 if (!ctxt->wellFormed) {
11123 if (ctxt->errNo == 0)
11124 ret = 1;
11125 else
11126 ret = ctxt->errNo;
11127 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011128 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011129 xmlNodePtr cur;
11130
11131 /*
11132 * Return the newly created nodeset after unlinking it from
11133 * they pseudo parent.
11134 */
11135 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011136 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011137 while (cur != NULL) {
11138 cur->parent = NULL;
11139 cur = cur->next;
11140 }
11141 newDoc->children->children = NULL;
11142 }
11143 ret = 0;
11144 }
11145 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011146 ctxt->dict = NULL;
11147 ctxt->attsDefault = NULL;
11148 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011149 xmlFreeParserCtxt(ctxt);
11150 newDoc->intSubset = NULL;
11151 newDoc->extSubset = NULL;
11152 xmlFreeDoc(newDoc);
11153
11154 return(ret);
11155}
11156
11157/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011158 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011159 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011160 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011161 * @sax: the SAX handler bloc (possibly NULL)
11162 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11163 * @depth: Used for loop detection, use 0
11164 * @URL: the URL for the entity to load
11165 * @ID: the System ID for the entity to load
11166 * @list: the return value for the set of parsed nodes
11167 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011168 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011169 *
11170 * Returns 0 if the entity is well formed, -1 in case of args problem and
11171 * the parser error code otherwise
11172 */
11173
Daniel Veillard7d515752003-09-26 19:12:37 +000011174static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011175xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11176 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011177 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011178 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011179 xmlParserCtxtPtr ctxt;
11180 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011181 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011182 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011183 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011184 xmlChar start[4];
11185 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011186
11187 if (depth > 40) {
11188 return(XML_ERR_ENTITY_LOOP);
11189 }
11190
11191
11192
11193 if (list != NULL)
11194 *list = NULL;
11195 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011196 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011197 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011198 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011199
11200
11201 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011202 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011203 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011204 if (oldctxt != NULL) {
11205 ctxt->_private = oldctxt->_private;
11206 ctxt->loadsubset = oldctxt->loadsubset;
11207 ctxt->validate = oldctxt->validate;
11208 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011209 ctxt->record_info = oldctxt->record_info;
11210 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11211 ctxt->node_seq.length = oldctxt->node_seq.length;
11212 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011213 } else {
11214 /*
11215 * Doing validity checking on chunk without context
11216 * doesn't make sense
11217 */
11218 ctxt->_private = NULL;
11219 ctxt->validate = 0;
11220 ctxt->external = 2;
11221 ctxt->loadsubset = 0;
11222 }
Owen Taylor3473f882001-02-23 17:55:21 +000011223 if (sax != NULL) {
11224 oldsax = ctxt->sax;
11225 ctxt->sax = sax;
11226 if (user_data != NULL)
11227 ctxt->userData = user_data;
11228 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011229 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011230 newDoc = xmlNewDoc(BAD_CAST "1.0");
11231 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011232 ctxt->node_seq.maximum = 0;
11233 ctxt->node_seq.length = 0;
11234 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011235 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011236 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011237 }
11238 if (doc != NULL) {
11239 newDoc->intSubset = doc->intSubset;
11240 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011241 newDoc->dict = doc->dict;
11242 } else if (oldctxt != NULL) {
11243 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011244 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011245 xmlDictReference(newDoc->dict);
11246
Owen Taylor3473f882001-02-23 17:55:21 +000011247 if (doc->URL != NULL) {
11248 newDoc->URL = xmlStrdup(doc->URL);
11249 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011250 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11251 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011252 if (sax != NULL)
11253 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011254 ctxt->node_seq.maximum = 0;
11255 ctxt->node_seq.length = 0;
11256 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011257 xmlFreeParserCtxt(ctxt);
11258 newDoc->intSubset = NULL;
11259 newDoc->extSubset = NULL;
11260 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011261 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011262 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011263 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 nodePush(ctxt, newDoc->children);
11265 if (doc == NULL) {
11266 ctxt->myDoc = newDoc;
11267 } else {
11268 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011269 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011270 }
11271
Daniel Veillard87a764e2001-06-20 17:41:10 +000011272 /*
11273 * Get the 4 first bytes and decode the charset
11274 * if enc != XML_CHAR_ENCODING_NONE
11275 * plug some encoding conversion routines.
11276 */
11277 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011278 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11279 start[0] = RAW;
11280 start[1] = NXT(1);
11281 start[2] = NXT(2);
11282 start[3] = NXT(3);
11283 enc = xmlDetectCharEncoding(start, 4);
11284 if (enc != XML_CHAR_ENCODING_NONE) {
11285 xmlSwitchEncoding(ctxt, enc);
11286 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011287 }
11288
Owen Taylor3473f882001-02-23 17:55:21 +000011289 /*
11290 * Parse a possible text declaration first
11291 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011292 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011293 xmlParseTextDecl(ctxt);
11294 }
11295
Owen Taylor3473f882001-02-23 17:55:21 +000011296 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011297 ctxt->depth = depth;
11298
11299 xmlParseContent(ctxt);
11300
Daniel Veillard561b7f82002-03-20 21:55:57 +000011301 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011303 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011304 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011305 }
11306 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011307 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011308 }
11309
11310 if (!ctxt->wellFormed) {
11311 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011312 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011313 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011314 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011315 } else {
11316 if (list != NULL) {
11317 xmlNodePtr cur;
11318
11319 /*
11320 * Return the newly created nodeset after unlinking it from
11321 * they pseudo parent.
11322 */
11323 cur = newDoc->children->children;
11324 *list = cur;
11325 while (cur != NULL) {
11326 cur->parent = NULL;
11327 cur = cur->next;
11328 }
11329 newDoc->children->children = NULL;
11330 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011331 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011332 }
11333 if (sax != NULL)
11334 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011335 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11336 oldctxt->node_seq.length = ctxt->node_seq.length;
11337 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011338 ctxt->node_seq.maximum = 0;
11339 ctxt->node_seq.length = 0;
11340 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011341 xmlFreeParserCtxt(ctxt);
11342 newDoc->intSubset = NULL;
11343 newDoc->extSubset = NULL;
11344 xmlFreeDoc(newDoc);
11345
11346 return(ret);
11347}
11348
Daniel Veillard81273902003-09-30 00:43:48 +000011349#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011350/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011351 * xmlParseExternalEntity:
11352 * @doc: the document the chunk pertains to
11353 * @sax: the SAX handler bloc (possibly NULL)
11354 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11355 * @depth: Used for loop detection, use 0
11356 * @URL: the URL for the entity to load
11357 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011358 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011359 *
11360 * Parse an external general entity
11361 * An external general parsed entity is well-formed if it matches the
11362 * production labeled extParsedEnt.
11363 *
11364 * [78] extParsedEnt ::= TextDecl? content
11365 *
11366 * Returns 0 if the entity is well formed, -1 in case of args problem and
11367 * the parser error code otherwise
11368 */
11369
11370int
11371xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011372 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011373 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011374 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011375}
11376
11377/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011378 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011379 * @doc: the document the chunk pertains to
11380 * @sax: the SAX handler bloc (possibly NULL)
11381 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11382 * @depth: Used for loop detection, use 0
11383 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011384 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011385 *
11386 * Parse a well-balanced chunk of an XML document
11387 * called by the parser
11388 * The allowed sequence for the Well Balanced Chunk is the one defined by
11389 * the content production in the XML grammar:
11390 *
11391 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11392 *
11393 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11394 * the parser error code otherwise
11395 */
11396
11397int
11398xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011399 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011400 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11401 depth, string, lst, 0 );
11402}
Daniel Veillard81273902003-09-30 00:43:48 +000011403#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011404
11405/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011406 * xmlParseBalancedChunkMemoryInternal:
11407 * @oldctxt: the existing parsing context
11408 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11409 * @user_data: the user data field for the parser context
11410 * @lst: the return value for the set of parsed nodes
11411 *
11412 *
11413 * Parse a well-balanced chunk of an XML document
11414 * called by the parser
11415 * The allowed sequence for the Well Balanced Chunk is the one defined by
11416 * the content production in the XML grammar:
11417 *
11418 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11419 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011420 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11421 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011422 *
11423 * In case recover is set to 1, the nodelist will not be empty even if
11424 * the parsed chunk is not well balanced.
11425 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011426static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011427xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11428 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11429 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011430 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011431 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011432 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011433 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011434 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011435 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011436 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011437
11438 if (oldctxt->depth > 40) {
11439 return(XML_ERR_ENTITY_LOOP);
11440 }
11441
11442
11443 if (lst != NULL)
11444 *lst = NULL;
11445 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011446 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011447
11448 size = xmlStrlen(string);
11449
11450 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011451 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011452 if (user_data != NULL)
11453 ctxt->userData = user_data;
11454 else
11455 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011456 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11457 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011458 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11459 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11460 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011461
11462 oldsax = ctxt->sax;
11463 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011464 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011465 ctxt->replaceEntities = oldctxt->replaceEntities;
11466 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011467
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011468 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011469 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011470 newDoc = xmlNewDoc(BAD_CAST "1.0");
11471 if (newDoc == NULL) {
11472 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011473 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011474 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011475 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011476 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011477 newDoc->dict = ctxt->dict;
11478 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011479 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011480 } else {
11481 ctxt->myDoc = oldctxt->myDoc;
11482 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011483 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011484 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011485 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11486 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011487 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011488 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011489 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011490 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011491 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011492 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011493 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011494 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011495 ctxt->myDoc->children = NULL;
11496 ctxt->myDoc->last = NULL;
11497 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011498 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011499 ctxt->instate = XML_PARSER_CONTENT;
11500 ctxt->depth = oldctxt->depth + 1;
11501
Daniel Veillard328f48c2002-11-15 15:24:34 +000011502 ctxt->validate = 0;
11503 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011504 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11505 /*
11506 * ID/IDREF registration will be done in xmlValidateElement below
11507 */
11508 ctxt->loadsubset |= XML_SKIP_IDS;
11509 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011510 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011511 ctxt->attsDefault = oldctxt->attsDefault;
11512 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011513
Daniel Veillard68e9e742002-11-16 15:35:11 +000011514 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011515 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011516 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011517 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011518 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011519 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011520 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011521 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011522 }
11523
11524 if (!ctxt->wellFormed) {
11525 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011526 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011527 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011528 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011529 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011530 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011531 }
11532
William M. Brack7b9154b2003-09-27 19:23:50 +000011533 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011534 xmlNodePtr cur;
11535
11536 /*
11537 * Return the newly created nodeset after unlinking it from
11538 * they pseudo parent.
11539 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011540 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011541 *lst = cur;
11542 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011543#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011544 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11545 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11546 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011547 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11548 oldctxt->myDoc, cur);
11549 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011550#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011551 cur->parent = NULL;
11552 cur = cur->next;
11553 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011554 ctxt->myDoc->children->children = NULL;
11555 }
11556 if (ctxt->myDoc != NULL) {
11557 xmlFreeNode(ctxt->myDoc->children);
11558 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011559 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011560 }
11561
11562 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011563 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011564 ctxt->attsDefault = NULL;
11565 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011566 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011567 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011568 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011569 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011570
11571 return(ret);
11572}
11573
Daniel Veillard29b17482004-08-16 00:39:03 +000011574/**
11575 * xmlParseInNodeContext:
11576 * @node: the context node
11577 * @data: the input string
11578 * @datalen: the input string length in bytes
11579 * @options: a combination of xmlParserOption
11580 * @lst: the return value for the set of parsed nodes
11581 *
11582 * Parse a well-balanced chunk of an XML document
11583 * within the context (DTD, namespaces, etc ...) of the given node.
11584 *
11585 * The allowed sequence for the data is a Well Balanced Chunk defined by
11586 * the content production in the XML grammar:
11587 *
11588 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11589 *
11590 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11591 * error code otherwise
11592 */
11593xmlParserErrors
11594xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11595 int options, xmlNodePtr *lst) {
11596#ifdef SAX2
11597 xmlParserCtxtPtr ctxt;
11598 xmlDocPtr doc = NULL;
11599 xmlNodePtr fake, cur;
11600 int nsnr = 0;
11601
11602 xmlParserErrors ret = XML_ERR_OK;
11603
11604 /*
11605 * check all input parameters, grab the document
11606 */
11607 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11608 return(XML_ERR_INTERNAL_ERROR);
11609 switch (node->type) {
11610 case XML_ELEMENT_NODE:
11611 case XML_ATTRIBUTE_NODE:
11612 case XML_TEXT_NODE:
11613 case XML_CDATA_SECTION_NODE:
11614 case XML_ENTITY_REF_NODE:
11615 case XML_PI_NODE:
11616 case XML_COMMENT_NODE:
11617 case XML_DOCUMENT_NODE:
11618 case XML_HTML_DOCUMENT_NODE:
11619 break;
11620 default:
11621 return(XML_ERR_INTERNAL_ERROR);
11622
11623 }
11624 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11625 (node->type != XML_DOCUMENT_NODE) &&
11626 (node->type != XML_HTML_DOCUMENT_NODE))
11627 node = node->parent;
11628 if (node == NULL)
11629 return(XML_ERR_INTERNAL_ERROR);
11630 if (node->type == XML_ELEMENT_NODE)
11631 doc = node->doc;
11632 else
11633 doc = (xmlDocPtr) node;
11634 if (doc == NULL)
11635 return(XML_ERR_INTERNAL_ERROR);
11636
11637 /*
11638 * allocate a context and set-up everything not related to the
11639 * node position in the tree
11640 */
11641 if (doc->type == XML_DOCUMENT_NODE)
11642 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11643#ifdef LIBXML_HTML_ENABLED
11644 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11645 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11646#endif
11647 else
11648 return(XML_ERR_INTERNAL_ERROR);
11649
11650 if (ctxt == NULL)
11651 return(XML_ERR_NO_MEMORY);
11652 fake = xmlNewComment(NULL);
11653 if (fake == NULL) {
11654 xmlFreeParserCtxt(ctxt);
11655 return(XML_ERR_NO_MEMORY);
11656 }
11657 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011658
11659 /*
11660 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11661 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11662 * we must wait until the last moment to free the original one.
11663 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011664 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011665 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011666 xmlDictFree(ctxt->dict);
11667 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011668 } else
11669 options |= XML_PARSE_NODICT;
11670
11671 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011672 xmlDetectSAX2(ctxt);
11673 ctxt->myDoc = doc;
11674
11675 if (node->type == XML_ELEMENT_NODE) {
11676 nodePush(ctxt, node);
11677 /*
11678 * initialize the SAX2 namespaces stack
11679 */
11680 cur = node;
11681 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11682 xmlNsPtr ns = cur->nsDef;
11683 const xmlChar *iprefix, *ihref;
11684
11685 while (ns != NULL) {
11686 if (ctxt->dict) {
11687 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11688 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11689 } else {
11690 iprefix = ns->prefix;
11691 ihref = ns->href;
11692 }
11693
11694 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11695 nsPush(ctxt, iprefix, ihref);
11696 nsnr++;
11697 }
11698 ns = ns->next;
11699 }
11700 cur = cur->parent;
11701 }
11702 ctxt->instate = XML_PARSER_CONTENT;
11703 }
11704
11705 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11706 /*
11707 * ID/IDREF registration will be done in xmlValidateElement below
11708 */
11709 ctxt->loadsubset |= XML_SKIP_IDS;
11710 }
11711
Daniel Veillard499cc922006-01-18 17:22:35 +000011712#ifdef LIBXML_HTML_ENABLED
11713 if (doc->type == XML_HTML_DOCUMENT_NODE)
11714 __htmlParseContent(ctxt);
11715 else
11716#endif
11717 xmlParseContent(ctxt);
11718
Daniel Veillard29b17482004-08-16 00:39:03 +000011719 nsPop(ctxt, nsnr);
11720 if ((RAW == '<') && (NXT(1) == '/')) {
11721 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11722 } else if (RAW != 0) {
11723 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11724 }
11725 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11726 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11727 ctxt->wellFormed = 0;
11728 }
11729
11730 if (!ctxt->wellFormed) {
11731 if (ctxt->errNo == 0)
11732 ret = XML_ERR_INTERNAL_ERROR;
11733 else
11734 ret = (xmlParserErrors)ctxt->errNo;
11735 } else {
11736 ret = XML_ERR_OK;
11737 }
11738
11739 /*
11740 * Return the newly created nodeset after unlinking it from
11741 * the pseudo sibling.
11742 */
11743
11744 cur = fake->next;
11745 fake->next = NULL;
11746 node->last = fake;
11747
11748 if (cur != NULL) {
11749 cur->prev = NULL;
11750 }
11751
11752 *lst = cur;
11753
11754 while (cur != NULL) {
11755 cur->parent = NULL;
11756 cur = cur->next;
11757 }
11758
11759 xmlUnlinkNode(fake);
11760 xmlFreeNode(fake);
11761
11762
11763 if (ret != XML_ERR_OK) {
11764 xmlFreeNodeList(*lst);
11765 *lst = NULL;
11766 }
William M. Brackc3f81342004-10-03 01:22:44 +000011767
William M. Brackb7b54de2004-10-06 16:38:01 +000011768 if (doc->dict != NULL)
11769 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011770 xmlFreeParserCtxt(ctxt);
11771
11772 return(ret);
11773#else /* !SAX2 */
11774 return(XML_ERR_INTERNAL_ERROR);
11775#endif
11776}
11777
Daniel Veillard81273902003-09-30 00:43:48 +000011778#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011779/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011780 * xmlParseBalancedChunkMemoryRecover:
11781 * @doc: the document the chunk pertains to
11782 * @sax: the SAX handler bloc (possibly NULL)
11783 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11784 * @depth: Used for loop detection, use 0
11785 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11786 * @lst: the return value for the set of parsed nodes
11787 * @recover: return nodes even if the data is broken (use 0)
11788 *
11789 *
11790 * Parse a well-balanced chunk of an XML document
11791 * called by the parser
11792 * The allowed sequence for the Well Balanced Chunk is the one defined by
11793 * the content production in the XML grammar:
11794 *
11795 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11796 *
11797 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11798 * the parser error code otherwise
11799 *
11800 * In case recover is set to 1, the nodelist will not be empty even if
11801 * the parsed chunk is not well balanced.
11802 */
11803int
11804xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11805 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11806 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011807 xmlParserCtxtPtr ctxt;
11808 xmlDocPtr newDoc;
11809 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011810 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011811 int size;
11812 int ret = 0;
11813
11814 if (depth > 40) {
11815 return(XML_ERR_ENTITY_LOOP);
11816 }
11817
11818
Daniel Veillardcda96922001-08-21 10:56:31 +000011819 if (lst != NULL)
11820 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011821 if (string == NULL)
11822 return(-1);
11823
11824 size = xmlStrlen(string);
11825
11826 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11827 if (ctxt == NULL) return(-1);
11828 ctxt->userData = ctxt;
11829 if (sax != NULL) {
11830 oldsax = ctxt->sax;
11831 ctxt->sax = sax;
11832 if (user_data != NULL)
11833 ctxt->userData = user_data;
11834 }
11835 newDoc = xmlNewDoc(BAD_CAST "1.0");
11836 if (newDoc == NULL) {
11837 xmlFreeParserCtxt(ctxt);
11838 return(-1);
11839 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011840 if ((doc != NULL) && (doc->dict != NULL)) {
11841 xmlDictFree(ctxt->dict);
11842 ctxt->dict = doc->dict;
11843 xmlDictReference(ctxt->dict);
11844 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11845 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11846 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11847 ctxt->dictNames = 1;
11848 } else {
11849 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11850 }
Owen Taylor3473f882001-02-23 17:55:21 +000011851 if (doc != NULL) {
11852 newDoc->intSubset = doc->intSubset;
11853 newDoc->extSubset = doc->extSubset;
11854 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011855 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11856 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011857 if (sax != NULL)
11858 ctxt->sax = oldsax;
11859 xmlFreeParserCtxt(ctxt);
11860 newDoc->intSubset = NULL;
11861 newDoc->extSubset = NULL;
11862 xmlFreeDoc(newDoc);
11863 return(-1);
11864 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011865 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11866 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011867 if (doc == NULL) {
11868 ctxt->myDoc = newDoc;
11869 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011870 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011871 newDoc->children->doc = doc;
11872 }
11873 ctxt->instate = XML_PARSER_CONTENT;
11874 ctxt->depth = depth;
11875
11876 /*
11877 * Doing validity checking on chunk doesn't make sense
11878 */
11879 ctxt->validate = 0;
11880 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011881 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011882
Daniel Veillardb39bc392002-10-26 19:29:51 +000011883 if ( doc != NULL ){
11884 content = doc->children;
11885 doc->children = NULL;
11886 xmlParseContent(ctxt);
11887 doc->children = content;
11888 }
11889 else {
11890 xmlParseContent(ctxt);
11891 }
Owen Taylor3473f882001-02-23 17:55:21 +000011892 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011893 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011894 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011895 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011896 }
11897 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011898 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011899 }
11900
11901 if (!ctxt->wellFormed) {
11902 if (ctxt->errNo == 0)
11903 ret = 1;
11904 else
11905 ret = ctxt->errNo;
11906 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011907 ret = 0;
11908 }
11909
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011910 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11911 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011912
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011913 /*
11914 * Return the newly created nodeset after unlinking it from
11915 * they pseudo parent.
11916 */
11917 cur = newDoc->children->children;
11918 *lst = cur;
11919 while (cur != NULL) {
11920 xmlSetTreeDoc(cur, doc);
11921 cur->parent = NULL;
11922 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011923 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011924 newDoc->children->children = NULL;
11925 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011926
Owen Taylor3473f882001-02-23 17:55:21 +000011927 if (sax != NULL)
11928 ctxt->sax = oldsax;
11929 xmlFreeParserCtxt(ctxt);
11930 newDoc->intSubset = NULL;
11931 newDoc->extSubset = NULL;
11932 xmlFreeDoc(newDoc);
11933
11934 return(ret);
11935}
11936
11937/**
11938 * xmlSAXParseEntity:
11939 * @sax: the SAX handler block
11940 * @filename: the filename
11941 *
11942 * parse an XML external entity out of context and build a tree.
11943 * It use the given SAX function block to handle the parsing callback.
11944 * If sax is NULL, fallback to the default DOM tree building routines.
11945 *
11946 * [78] extParsedEnt ::= TextDecl? content
11947 *
11948 * This correspond to a "Well Balanced" chunk
11949 *
11950 * Returns the resulting document tree
11951 */
11952
11953xmlDocPtr
11954xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11955 xmlDocPtr ret;
11956 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011957
11958 ctxt = xmlCreateFileParserCtxt(filename);
11959 if (ctxt == NULL) {
11960 return(NULL);
11961 }
11962 if (sax != NULL) {
11963 if (ctxt->sax != NULL)
11964 xmlFree(ctxt->sax);
11965 ctxt->sax = sax;
11966 ctxt->userData = NULL;
11967 }
11968
Owen Taylor3473f882001-02-23 17:55:21 +000011969 xmlParseExtParsedEnt(ctxt);
11970
11971 if (ctxt->wellFormed)
11972 ret = ctxt->myDoc;
11973 else {
11974 ret = NULL;
11975 xmlFreeDoc(ctxt->myDoc);
11976 ctxt->myDoc = NULL;
11977 }
11978 if (sax != NULL)
11979 ctxt->sax = NULL;
11980 xmlFreeParserCtxt(ctxt);
11981
11982 return(ret);
11983}
11984
11985/**
11986 * xmlParseEntity:
11987 * @filename: the filename
11988 *
11989 * parse an XML external entity out of context and build a tree.
11990 *
11991 * [78] extParsedEnt ::= TextDecl? content
11992 *
11993 * This correspond to a "Well Balanced" chunk
11994 *
11995 * Returns the resulting document tree
11996 */
11997
11998xmlDocPtr
11999xmlParseEntity(const char *filename) {
12000 return(xmlSAXParseEntity(NULL, filename));
12001}
Daniel Veillard81273902003-09-30 00:43:48 +000012002#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012003
12004/**
12005 * xmlCreateEntityParserCtxt:
12006 * @URL: the entity URL
12007 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012008 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012009 *
12010 * Create a parser context for an external entity
12011 * Automatic support for ZLIB/Compress compressed document is provided
12012 * by default if found at compile-time.
12013 *
12014 * Returns the new parser context or NULL
12015 */
12016xmlParserCtxtPtr
12017xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12018 const xmlChar *base) {
12019 xmlParserCtxtPtr ctxt;
12020 xmlParserInputPtr inputStream;
12021 char *directory = NULL;
12022 xmlChar *uri;
12023
12024 ctxt = xmlNewParserCtxt();
12025 if (ctxt == NULL) {
12026 return(NULL);
12027 }
12028
12029 uri = xmlBuildURI(URL, base);
12030
12031 if (uri == NULL) {
12032 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12033 if (inputStream == NULL) {
12034 xmlFreeParserCtxt(ctxt);
12035 return(NULL);
12036 }
12037
12038 inputPush(ctxt, inputStream);
12039
12040 if ((ctxt->directory == NULL) && (directory == NULL))
12041 directory = xmlParserGetDirectory((char *)URL);
12042 if ((ctxt->directory == NULL) && (directory != NULL))
12043 ctxt->directory = directory;
12044 } else {
12045 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12046 if (inputStream == NULL) {
12047 xmlFree(uri);
12048 xmlFreeParserCtxt(ctxt);
12049 return(NULL);
12050 }
12051
12052 inputPush(ctxt, inputStream);
12053
12054 if ((ctxt->directory == NULL) && (directory == NULL))
12055 directory = xmlParserGetDirectory((char *)uri);
12056 if ((ctxt->directory == NULL) && (directory != NULL))
12057 ctxt->directory = directory;
12058 xmlFree(uri);
12059 }
Owen Taylor3473f882001-02-23 17:55:21 +000012060 return(ctxt);
12061}
12062
12063/************************************************************************
12064 * *
12065 * Front ends when parsing from a file *
12066 * *
12067 ************************************************************************/
12068
12069/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012070 * xmlCreateURLParserCtxt:
12071 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012072 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012073 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012074 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012075 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012076 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012077 *
12078 * Returns the new parser context or NULL
12079 */
12080xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012081xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012082{
12083 xmlParserCtxtPtr ctxt;
12084 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012085 char *directory = NULL;
12086
Owen Taylor3473f882001-02-23 17:55:21 +000012087 ctxt = xmlNewParserCtxt();
12088 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012089 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012090 return(NULL);
12091 }
12092
Daniel Veillarddf292f72005-01-16 19:00:15 +000012093 if (options)
12094 xmlCtxtUseOptions(ctxt, options);
12095 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012096
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012097 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012098 if (inputStream == NULL) {
12099 xmlFreeParserCtxt(ctxt);
12100 return(NULL);
12101 }
12102
Owen Taylor3473f882001-02-23 17:55:21 +000012103 inputPush(ctxt, inputStream);
12104 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012105 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 if ((ctxt->directory == NULL) && (directory != NULL))
12107 ctxt->directory = directory;
12108
12109 return(ctxt);
12110}
12111
Daniel Veillard61b93382003-11-03 14:28:31 +000012112/**
12113 * xmlCreateFileParserCtxt:
12114 * @filename: the filename
12115 *
12116 * Create a parser context for a file content.
12117 * Automatic support for ZLIB/Compress compressed document is provided
12118 * by default if found at compile-time.
12119 *
12120 * Returns the new parser context or NULL
12121 */
12122xmlParserCtxtPtr
12123xmlCreateFileParserCtxt(const char *filename)
12124{
12125 return(xmlCreateURLParserCtxt(filename, 0));
12126}
12127
Daniel Veillard81273902003-09-30 00:43:48 +000012128#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012129/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012130 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012131 * @sax: the SAX handler block
12132 * @filename: the filename
12133 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12134 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012135 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012136 *
12137 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12138 * compressed document is provided by default if found at compile-time.
12139 * It use the given SAX function block to handle the parsing callback.
12140 * If sax is NULL, fallback to the default DOM tree building routines.
12141 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012142 * User data (void *) is stored within the parser context in the
12143 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012144 *
Owen Taylor3473f882001-02-23 17:55:21 +000012145 * Returns the resulting document tree
12146 */
12147
12148xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012149xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12150 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012151 xmlDocPtr ret;
12152 xmlParserCtxtPtr ctxt;
12153 char *directory = NULL;
12154
Daniel Veillard635ef722001-10-29 11:48:19 +000012155 xmlInitParser();
12156
Owen Taylor3473f882001-02-23 17:55:21 +000012157 ctxt = xmlCreateFileParserCtxt(filename);
12158 if (ctxt == NULL) {
12159 return(NULL);
12160 }
12161 if (sax != NULL) {
12162 if (ctxt->sax != NULL)
12163 xmlFree(ctxt->sax);
12164 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012165 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012166 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012167 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012168 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012169 }
Owen Taylor3473f882001-02-23 17:55:21 +000012170
12171 if ((ctxt->directory == NULL) && (directory == NULL))
12172 directory = xmlParserGetDirectory(filename);
12173 if ((ctxt->directory == NULL) && (directory != NULL))
12174 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12175
Daniel Veillarddad3f682002-11-17 16:47:27 +000012176 ctxt->recovery = recovery;
12177
Owen Taylor3473f882001-02-23 17:55:21 +000012178 xmlParseDocument(ctxt);
12179
William M. Brackc07329e2003-09-08 01:57:30 +000012180 if ((ctxt->wellFormed) || recovery) {
12181 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012182 if (ret != NULL) {
12183 if (ctxt->input->buf->compressed > 0)
12184 ret->compression = 9;
12185 else
12186 ret->compression = ctxt->input->buf->compressed;
12187 }
William M. Brackc07329e2003-09-08 01:57:30 +000012188 }
Owen Taylor3473f882001-02-23 17:55:21 +000012189 else {
12190 ret = NULL;
12191 xmlFreeDoc(ctxt->myDoc);
12192 ctxt->myDoc = NULL;
12193 }
12194 if (sax != NULL)
12195 ctxt->sax = NULL;
12196 xmlFreeParserCtxt(ctxt);
12197
12198 return(ret);
12199}
12200
12201/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012202 * xmlSAXParseFile:
12203 * @sax: the SAX handler block
12204 * @filename: the filename
12205 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12206 * documents
12207 *
12208 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12209 * compressed document is provided by default if found at compile-time.
12210 * It use the given SAX function block to handle the parsing callback.
12211 * If sax is NULL, fallback to the default DOM tree building routines.
12212 *
12213 * Returns the resulting document tree
12214 */
12215
12216xmlDocPtr
12217xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12218 int recovery) {
12219 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12220}
12221
12222/**
Owen Taylor3473f882001-02-23 17:55:21 +000012223 * xmlRecoverDoc:
12224 * @cur: a pointer to an array of xmlChar
12225 *
12226 * parse an XML in-memory document and build a tree.
12227 * In the case the document is not Well Formed, a tree is built anyway
12228 *
12229 * Returns the resulting document tree
12230 */
12231
12232xmlDocPtr
12233xmlRecoverDoc(xmlChar *cur) {
12234 return(xmlSAXParseDoc(NULL, cur, 1));
12235}
12236
12237/**
12238 * xmlParseFile:
12239 * @filename: the filename
12240 *
12241 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12242 * compressed document is provided by default if found at compile-time.
12243 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012244 * Returns the resulting document tree if the file was wellformed,
12245 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012246 */
12247
12248xmlDocPtr
12249xmlParseFile(const char *filename) {
12250 return(xmlSAXParseFile(NULL, filename, 0));
12251}
12252
12253/**
12254 * xmlRecoverFile:
12255 * @filename: the filename
12256 *
12257 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12258 * compressed document is provided by default if found at compile-time.
12259 * In the case the document is not Well Formed, a tree is built anyway
12260 *
12261 * Returns the resulting document tree
12262 */
12263
12264xmlDocPtr
12265xmlRecoverFile(const char *filename) {
12266 return(xmlSAXParseFile(NULL, filename, 1));
12267}
12268
12269
12270/**
12271 * xmlSetupParserForBuffer:
12272 * @ctxt: an XML parser context
12273 * @buffer: a xmlChar * buffer
12274 * @filename: a file name
12275 *
12276 * Setup the parser context to parse a new buffer; Clears any prior
12277 * contents from the parser context. The buffer parameter must not be
12278 * NULL, but the filename parameter can be
12279 */
12280void
12281xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12282 const char* filename)
12283{
12284 xmlParserInputPtr input;
12285
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012286 if ((ctxt == NULL) || (buffer == NULL))
12287 return;
12288
Owen Taylor3473f882001-02-23 17:55:21 +000012289 input = xmlNewInputStream(ctxt);
12290 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012291 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012292 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012293 return;
12294 }
12295
12296 xmlClearParserCtxt(ctxt);
12297 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012298 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012299 input->base = buffer;
12300 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012301 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012302 inputPush(ctxt, input);
12303}
12304
12305/**
12306 * xmlSAXUserParseFile:
12307 * @sax: a SAX handler
12308 * @user_data: The user data returned on SAX callbacks
12309 * @filename: a file name
12310 *
12311 * parse an XML file and call the given SAX handler routines.
12312 * Automatic support for ZLIB/Compress compressed document is provided
12313 *
12314 * Returns 0 in case of success or a error number otherwise
12315 */
12316int
12317xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12318 const char *filename) {
12319 int ret = 0;
12320 xmlParserCtxtPtr ctxt;
12321
12322 ctxt = xmlCreateFileParserCtxt(filename);
12323 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012324#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012325 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012326#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012327 xmlFree(ctxt->sax);
12328 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012329 xmlDetectSAX2(ctxt);
12330
Owen Taylor3473f882001-02-23 17:55:21 +000012331 if (user_data != NULL)
12332 ctxt->userData = user_data;
12333
12334 xmlParseDocument(ctxt);
12335
12336 if (ctxt->wellFormed)
12337 ret = 0;
12338 else {
12339 if (ctxt->errNo != 0)
12340 ret = ctxt->errNo;
12341 else
12342 ret = -1;
12343 }
12344 if (sax != NULL)
12345 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012346 if (ctxt->myDoc != NULL) {
12347 xmlFreeDoc(ctxt->myDoc);
12348 ctxt->myDoc = NULL;
12349 }
Owen Taylor3473f882001-02-23 17:55:21 +000012350 xmlFreeParserCtxt(ctxt);
12351
12352 return ret;
12353}
Daniel Veillard81273902003-09-30 00:43:48 +000012354#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012355
12356/************************************************************************
12357 * *
12358 * Front ends when parsing from memory *
12359 * *
12360 ************************************************************************/
12361
12362/**
12363 * xmlCreateMemoryParserCtxt:
12364 * @buffer: a pointer to a char array
12365 * @size: the size of the array
12366 *
12367 * Create a parser context for an XML in-memory document.
12368 *
12369 * Returns the new parser context or NULL
12370 */
12371xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012372xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012373 xmlParserCtxtPtr ctxt;
12374 xmlParserInputPtr input;
12375 xmlParserInputBufferPtr buf;
12376
12377 if (buffer == NULL)
12378 return(NULL);
12379 if (size <= 0)
12380 return(NULL);
12381
12382 ctxt = xmlNewParserCtxt();
12383 if (ctxt == NULL)
12384 return(NULL);
12385
Daniel Veillard53350552003-09-18 13:35:51 +000012386 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012387 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012388 if (buf == NULL) {
12389 xmlFreeParserCtxt(ctxt);
12390 return(NULL);
12391 }
Owen Taylor3473f882001-02-23 17:55:21 +000012392
12393 input = xmlNewInputStream(ctxt);
12394 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 xmlFreeParserCtxt(ctxt);
12397 return(NULL);
12398 }
12399
12400 input->filename = NULL;
12401 input->buf = buf;
12402 input->base = input->buf->buffer->content;
12403 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012404 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012405
12406 inputPush(ctxt, input);
12407 return(ctxt);
12408}
12409
Daniel Veillard81273902003-09-30 00:43:48 +000012410#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012411/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012412 * xmlSAXParseMemoryWithData:
12413 * @sax: the SAX handler block
12414 * @buffer: an pointer to a char array
12415 * @size: the size of the array
12416 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12417 * documents
12418 * @data: the userdata
12419 *
12420 * parse an XML in-memory block and use the given SAX function block
12421 * to handle the parsing callback. If sax is NULL, fallback to the default
12422 * DOM tree building routines.
12423 *
12424 * User data (void *) is stored within the parser context in the
12425 * context's _private member, so it is available nearly everywhere in libxml
12426 *
12427 * Returns the resulting document tree
12428 */
12429
12430xmlDocPtr
12431xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12432 int size, int recovery, void *data) {
12433 xmlDocPtr ret;
12434 xmlParserCtxtPtr ctxt;
12435
12436 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12437 if (ctxt == NULL) return(NULL);
12438 if (sax != NULL) {
12439 if (ctxt->sax != NULL)
12440 xmlFree(ctxt->sax);
12441 ctxt->sax = sax;
12442 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012443 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012444 if (data!=NULL) {
12445 ctxt->_private=data;
12446 }
12447
Daniel Veillardadba5f12003-04-04 16:09:01 +000012448 ctxt->recovery = recovery;
12449
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012450 xmlParseDocument(ctxt);
12451
12452 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12453 else {
12454 ret = NULL;
12455 xmlFreeDoc(ctxt->myDoc);
12456 ctxt->myDoc = NULL;
12457 }
12458 if (sax != NULL)
12459 ctxt->sax = NULL;
12460 xmlFreeParserCtxt(ctxt);
12461
12462 return(ret);
12463}
12464
12465/**
Owen Taylor3473f882001-02-23 17:55:21 +000012466 * xmlSAXParseMemory:
12467 * @sax: the SAX handler block
12468 * @buffer: an pointer to a char array
12469 * @size: the size of the array
12470 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12471 * documents
12472 *
12473 * parse an XML in-memory block and use the given SAX function block
12474 * to handle the parsing callback. If sax is NULL, fallback to the default
12475 * DOM tree building routines.
12476 *
12477 * Returns the resulting document tree
12478 */
12479xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012480xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12481 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012482 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012483}
12484
12485/**
12486 * xmlParseMemory:
12487 * @buffer: an pointer to a char array
12488 * @size: the size of the array
12489 *
12490 * parse an XML in-memory block and build a tree.
12491 *
12492 * Returns the resulting document tree
12493 */
12494
Daniel Veillard50822cb2001-07-26 20:05:51 +000012495xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012496 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12497}
12498
12499/**
12500 * xmlRecoverMemory:
12501 * @buffer: an pointer to a char array
12502 * @size: the size of the array
12503 *
12504 * parse an XML in-memory block and build a tree.
12505 * In the case the document is not Well Formed, a tree is built anyway
12506 *
12507 * Returns the resulting document tree
12508 */
12509
Daniel Veillard50822cb2001-07-26 20:05:51 +000012510xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012511 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12512}
12513
12514/**
12515 * xmlSAXUserParseMemory:
12516 * @sax: a SAX handler
12517 * @user_data: The user data returned on SAX callbacks
12518 * @buffer: an in-memory XML document input
12519 * @size: the length of the XML document in bytes
12520 *
12521 * A better SAX parsing routine.
12522 * parse an XML in-memory buffer and call the given SAX handler routines.
12523 *
12524 * Returns 0 in case of success or a error number otherwise
12525 */
12526int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012527 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012528 int ret = 0;
12529 xmlParserCtxtPtr ctxt;
12530 xmlSAXHandlerPtr oldsax = NULL;
12531
Daniel Veillard9e923512002-08-14 08:48:52 +000012532 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012533 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12534 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012535 oldsax = ctxt->sax;
12536 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012537 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012538 if (user_data != NULL)
12539 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012540
12541 xmlParseDocument(ctxt);
12542
12543 if (ctxt->wellFormed)
12544 ret = 0;
12545 else {
12546 if (ctxt->errNo != 0)
12547 ret = ctxt->errNo;
12548 else
12549 ret = -1;
12550 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012551 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012552 if (ctxt->myDoc != NULL) {
12553 xmlFreeDoc(ctxt->myDoc);
12554 ctxt->myDoc = NULL;
12555 }
Owen Taylor3473f882001-02-23 17:55:21 +000012556 xmlFreeParserCtxt(ctxt);
12557
12558 return ret;
12559}
Daniel Veillard81273902003-09-30 00:43:48 +000012560#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012561
12562/**
12563 * xmlCreateDocParserCtxt:
12564 * @cur: a pointer to an array of xmlChar
12565 *
12566 * Creates a parser context for an XML in-memory document.
12567 *
12568 * Returns the new parser context or NULL
12569 */
12570xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012571xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012572 int len;
12573
12574 if (cur == NULL)
12575 return(NULL);
12576 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012577 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012578}
12579
Daniel Veillard81273902003-09-30 00:43:48 +000012580#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012581/**
12582 * xmlSAXParseDoc:
12583 * @sax: the SAX handler block
12584 * @cur: a pointer to an array of xmlChar
12585 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12586 * documents
12587 *
12588 * parse an XML in-memory document and build a tree.
12589 * It use the given SAX function block to handle the parsing callback.
12590 * If sax is NULL, fallback to the default DOM tree building routines.
12591 *
12592 * Returns the resulting document tree
12593 */
12594
12595xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012596xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012597 xmlDocPtr ret;
12598 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012599 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012600
Daniel Veillard38936062004-11-04 17:45:11 +000012601 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012602
12603
12604 ctxt = xmlCreateDocParserCtxt(cur);
12605 if (ctxt == NULL) return(NULL);
12606 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012607 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012608 ctxt->sax = sax;
12609 ctxt->userData = NULL;
12610 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012611 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012612
12613 xmlParseDocument(ctxt);
12614 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12615 else {
12616 ret = NULL;
12617 xmlFreeDoc(ctxt->myDoc);
12618 ctxt->myDoc = NULL;
12619 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012620 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012621 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012622 xmlFreeParserCtxt(ctxt);
12623
12624 return(ret);
12625}
12626
12627/**
12628 * xmlParseDoc:
12629 * @cur: a pointer to an array of xmlChar
12630 *
12631 * parse an XML in-memory document and build a tree.
12632 *
12633 * Returns the resulting document tree
12634 */
12635
12636xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012637xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012638 return(xmlSAXParseDoc(NULL, cur, 0));
12639}
Daniel Veillard81273902003-09-30 00:43:48 +000012640#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012641
Daniel Veillard81273902003-09-30 00:43:48 +000012642#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012643/************************************************************************
12644 * *
12645 * Specific function to keep track of entities references *
12646 * and used by the XSLT debugger *
12647 * *
12648 ************************************************************************/
12649
12650static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12651
12652/**
12653 * xmlAddEntityReference:
12654 * @ent : A valid entity
12655 * @firstNode : A valid first node for children of entity
12656 * @lastNode : A valid last node of children entity
12657 *
12658 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12659 */
12660static void
12661xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12662 xmlNodePtr lastNode)
12663{
12664 if (xmlEntityRefFunc != NULL) {
12665 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12666 }
12667}
12668
12669
12670/**
12671 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012672 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012673 *
12674 * Set the function to call call back when a xml reference has been made
12675 */
12676void
12677xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12678{
12679 xmlEntityRefFunc = func;
12680}
Daniel Veillard81273902003-09-30 00:43:48 +000012681#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012682
12683/************************************************************************
12684 * *
12685 * Miscellaneous *
12686 * *
12687 ************************************************************************/
12688
12689#ifdef LIBXML_XPATH_ENABLED
12690#include <libxml/xpath.h>
12691#endif
12692
Daniel Veillardffa3c742005-07-21 13:24:09 +000012693extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012694static int xmlParserInitialized = 0;
12695
12696/**
12697 * xmlInitParser:
12698 *
12699 * Initialization function for the XML parser.
12700 * This is not reentrant. Call once before processing in case of
12701 * use in multithreaded programs.
12702 */
12703
12704void
12705xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012706 if (xmlParserInitialized != 0)
12707 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012708
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012709 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12710 (xmlGenericError == NULL))
12711 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012712 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012713 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012714 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012715 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012716 xmlDefaultSAXHandlerInit();
12717 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012718#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012719 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012720#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012721#ifdef LIBXML_HTML_ENABLED
12722 htmlInitAutoClose();
12723 htmlDefaultSAXHandlerInit();
12724#endif
12725#ifdef LIBXML_XPATH_ENABLED
12726 xmlXPathInit();
12727#endif
12728 xmlParserInitialized = 1;
12729}
12730
12731/**
12732 * xmlCleanupParser:
12733 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012734 * Cleanup function for the XML library. It tries to reclaim all
12735 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012736 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012737 * function should not prevent reusing the library but one should
12738 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012739 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012740 */
12741
12742void
12743xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012744 if (!xmlParserInitialized)
12745 return;
12746
Owen Taylor3473f882001-02-23 17:55:21 +000012747 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012748#ifdef LIBXML_CATALOG_ENABLED
12749 xmlCatalogCleanup();
12750#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012751 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012752 xmlCleanupInputCallbacks();
12753#ifdef LIBXML_OUTPUT_ENABLED
12754 xmlCleanupOutputCallbacks();
12755#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012756#ifdef LIBXML_SCHEMAS_ENABLED
12757 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012758 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012759#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012760 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012761 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012762 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012763 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012764 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012765}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012766
12767/************************************************************************
12768 * *
12769 * New set (2.6.0) of simpler and more flexible APIs *
12770 * *
12771 ************************************************************************/
12772
12773/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012774 * DICT_FREE:
12775 * @str: a string
12776 *
12777 * Free a string if it is not owned by the "dict" dictionnary in the
12778 * current scope
12779 */
12780#define DICT_FREE(str) \
12781 if ((str) && ((!dict) || \
12782 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12783 xmlFree((char *)(str));
12784
12785/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012786 * xmlCtxtReset:
12787 * @ctxt: an XML parser context
12788 *
12789 * Reset a parser context
12790 */
12791void
12792xmlCtxtReset(xmlParserCtxtPtr ctxt)
12793{
12794 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012795 xmlDictPtr dict;
12796
12797 if (ctxt == NULL)
12798 return;
12799
12800 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012801
12802 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12803 xmlFreeInputStream(input);
12804 }
12805 ctxt->inputNr = 0;
12806 ctxt->input = NULL;
12807
12808 ctxt->spaceNr = 0;
12809 ctxt->spaceTab[0] = -1;
12810 ctxt->space = &ctxt->spaceTab[0];
12811
12812
12813 ctxt->nodeNr = 0;
12814 ctxt->node = NULL;
12815
12816 ctxt->nameNr = 0;
12817 ctxt->name = NULL;
12818
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012819 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012820 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012821 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012822 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012823 DICT_FREE(ctxt->directory);
12824 ctxt->directory = NULL;
12825 DICT_FREE(ctxt->extSubURI);
12826 ctxt->extSubURI = NULL;
12827 DICT_FREE(ctxt->extSubSystem);
12828 ctxt->extSubSystem = NULL;
12829 if (ctxt->myDoc != NULL)
12830 xmlFreeDoc(ctxt->myDoc);
12831 ctxt->myDoc = NULL;
12832
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012833 ctxt->standalone = -1;
12834 ctxt->hasExternalSubset = 0;
12835 ctxt->hasPErefs = 0;
12836 ctxt->html = 0;
12837 ctxt->external = 0;
12838 ctxt->instate = XML_PARSER_START;
12839 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012840
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012841 ctxt->wellFormed = 1;
12842 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012843 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012844 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012845#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012846 ctxt->vctxt.userData = ctxt;
12847 ctxt->vctxt.error = xmlParserValidityError;
12848 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012849#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012850 ctxt->record_info = 0;
12851 ctxt->nbChars = 0;
12852 ctxt->checkIndex = 0;
12853 ctxt->inSubset = 0;
12854 ctxt->errNo = XML_ERR_OK;
12855 ctxt->depth = 0;
12856 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12857 ctxt->catalogs = NULL;
12858 xmlInitNodeInfoSeq(&ctxt->node_seq);
12859
12860 if (ctxt->attsDefault != NULL) {
12861 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12862 ctxt->attsDefault = NULL;
12863 }
12864 if (ctxt->attsSpecial != NULL) {
12865 xmlHashFree(ctxt->attsSpecial, NULL);
12866 ctxt->attsSpecial = NULL;
12867 }
12868
Daniel Veillard4432df22003-09-28 18:58:27 +000012869#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012870 if (ctxt->catalogs != NULL)
12871 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012872#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012873 if (ctxt->lastError.code != XML_ERR_OK)
12874 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012875}
12876
12877/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012878 * xmlCtxtResetPush:
12879 * @ctxt: an XML parser context
12880 * @chunk: a pointer to an array of chars
12881 * @size: number of chars in the array
12882 * @filename: an optional file name or URI
12883 * @encoding: the document encoding, or NULL
12884 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012885 * Reset a push parser context
12886 *
12887 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012888 */
12889int
12890xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12891 int size, const char *filename, const char *encoding)
12892{
12893 xmlParserInputPtr inputStream;
12894 xmlParserInputBufferPtr buf;
12895 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12896
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012897 if (ctxt == NULL)
12898 return(1);
12899
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012900 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12901 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12902
12903 buf = xmlAllocParserInputBuffer(enc);
12904 if (buf == NULL)
12905 return(1);
12906
12907 if (ctxt == NULL) {
12908 xmlFreeParserInputBuffer(buf);
12909 return(1);
12910 }
12911
12912 xmlCtxtReset(ctxt);
12913
12914 if (ctxt->pushTab == NULL) {
12915 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12916 sizeof(xmlChar *));
12917 if (ctxt->pushTab == NULL) {
12918 xmlErrMemory(ctxt, NULL);
12919 xmlFreeParserInputBuffer(buf);
12920 return(1);
12921 }
12922 }
12923
12924 if (filename == NULL) {
12925 ctxt->directory = NULL;
12926 } else {
12927 ctxt->directory = xmlParserGetDirectory(filename);
12928 }
12929
12930 inputStream = xmlNewInputStream(ctxt);
12931 if (inputStream == NULL) {
12932 xmlFreeParserInputBuffer(buf);
12933 return(1);
12934 }
12935
12936 if (filename == NULL)
12937 inputStream->filename = NULL;
12938 else
12939 inputStream->filename = (char *)
12940 xmlCanonicPath((const xmlChar *) filename);
12941 inputStream->buf = buf;
12942 inputStream->base = inputStream->buf->buffer->content;
12943 inputStream->cur = inputStream->buf->buffer->content;
12944 inputStream->end =
12945 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12946
12947 inputPush(ctxt, inputStream);
12948
12949 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12950 (ctxt->input->buf != NULL)) {
12951 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12952 int cur = ctxt->input->cur - ctxt->input->base;
12953
12954 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12955
12956 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12957 ctxt->input->cur = ctxt->input->base + cur;
12958 ctxt->input->end =
12959 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12960 use];
12961#ifdef DEBUG_PUSH
12962 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12963#endif
12964 }
12965
12966 if (encoding != NULL) {
12967 xmlCharEncodingHandlerPtr hdlr;
12968
12969 hdlr = xmlFindCharEncodingHandler(encoding);
12970 if (hdlr != NULL) {
12971 xmlSwitchToEncoding(ctxt, hdlr);
12972 } else {
12973 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12974 "Unsupported encoding %s\n", BAD_CAST encoding);
12975 }
12976 } else if (enc != XML_CHAR_ENCODING_NONE) {
12977 xmlSwitchEncoding(ctxt, enc);
12978 }
12979
12980 return(0);
12981}
12982
12983/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012984 * xmlCtxtUseOptions:
12985 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012986 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012987 *
12988 * Applies the options to the parser context
12989 *
12990 * Returns 0 in case of success, the set of unknown or unimplemented options
12991 * in case of error.
12992 */
12993int
12994xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12995{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012996 if (ctxt == NULL)
12997 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998 if (options & XML_PARSE_RECOVER) {
12999 ctxt->recovery = 1;
13000 options -= XML_PARSE_RECOVER;
13001 } else
13002 ctxt->recovery = 0;
13003 if (options & XML_PARSE_DTDLOAD) {
13004 ctxt->loadsubset = XML_DETECT_IDS;
13005 options -= XML_PARSE_DTDLOAD;
13006 } else
13007 ctxt->loadsubset = 0;
13008 if (options & XML_PARSE_DTDATTR) {
13009 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13010 options -= XML_PARSE_DTDATTR;
13011 }
13012 if (options & XML_PARSE_NOENT) {
13013 ctxt->replaceEntities = 1;
13014 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13015 options -= XML_PARSE_NOENT;
13016 } else
13017 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013018 if (options & XML_PARSE_PEDANTIC) {
13019 ctxt->pedantic = 1;
13020 options -= XML_PARSE_PEDANTIC;
13021 } else
13022 ctxt->pedantic = 0;
13023 if (options & XML_PARSE_NOBLANKS) {
13024 ctxt->keepBlanks = 0;
13025 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13026 options -= XML_PARSE_NOBLANKS;
13027 } else
13028 ctxt->keepBlanks = 1;
13029 if (options & XML_PARSE_DTDVALID) {
13030 ctxt->validate = 1;
13031 if (options & XML_PARSE_NOWARNING)
13032 ctxt->vctxt.warning = NULL;
13033 if (options & XML_PARSE_NOERROR)
13034 ctxt->vctxt.error = NULL;
13035 options -= XML_PARSE_DTDVALID;
13036 } else
13037 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013038 if (options & XML_PARSE_NOWARNING) {
13039 ctxt->sax->warning = NULL;
13040 options -= XML_PARSE_NOWARNING;
13041 }
13042 if (options & XML_PARSE_NOERROR) {
13043 ctxt->sax->error = NULL;
13044 ctxt->sax->fatalError = NULL;
13045 options -= XML_PARSE_NOERROR;
13046 }
Daniel Veillard81273902003-09-30 00:43:48 +000013047#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048 if (options & XML_PARSE_SAX1) {
13049 ctxt->sax->startElement = xmlSAX2StartElement;
13050 ctxt->sax->endElement = xmlSAX2EndElement;
13051 ctxt->sax->startElementNs = NULL;
13052 ctxt->sax->endElementNs = NULL;
13053 ctxt->sax->initialized = 1;
13054 options -= XML_PARSE_SAX1;
13055 }
Daniel Veillard81273902003-09-30 00:43:48 +000013056#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013057 if (options & XML_PARSE_NODICT) {
13058 ctxt->dictNames = 0;
13059 options -= XML_PARSE_NODICT;
13060 } else {
13061 ctxt->dictNames = 1;
13062 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013063 if (options & XML_PARSE_NOCDATA) {
13064 ctxt->sax->cdataBlock = NULL;
13065 options -= XML_PARSE_NOCDATA;
13066 }
13067 if (options & XML_PARSE_NSCLEAN) {
13068 ctxt->options |= XML_PARSE_NSCLEAN;
13069 options -= XML_PARSE_NSCLEAN;
13070 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013071 if (options & XML_PARSE_NONET) {
13072 ctxt->options |= XML_PARSE_NONET;
13073 options -= XML_PARSE_NONET;
13074 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013075 if (options & XML_PARSE_COMPACT) {
13076 ctxt->options |= XML_PARSE_COMPACT;
13077 options -= XML_PARSE_COMPACT;
13078 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013079 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013080 return (options);
13081}
13082
13083/**
13084 * xmlDoRead:
13085 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013086 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013087 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013088 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013089 * @reuse: keep the context for reuse
13090 *
13091 * Common front-end for the xmlRead functions
13092 *
13093 * Returns the resulting document tree or NULL
13094 */
13095static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013096xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13097 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013098{
13099 xmlDocPtr ret;
13100
13101 xmlCtxtUseOptions(ctxt, options);
13102 if (encoding != NULL) {
13103 xmlCharEncodingHandlerPtr hdlr;
13104
13105 hdlr = xmlFindCharEncodingHandler(encoding);
13106 if (hdlr != NULL)
13107 xmlSwitchToEncoding(ctxt, hdlr);
13108 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013109 if ((URL != NULL) && (ctxt->input != NULL) &&
13110 (ctxt->input->filename == NULL))
13111 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013112 xmlParseDocument(ctxt);
13113 if ((ctxt->wellFormed) || ctxt->recovery)
13114 ret = ctxt->myDoc;
13115 else {
13116 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013117 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013118 xmlFreeDoc(ctxt->myDoc);
13119 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013120 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013121 ctxt->myDoc = NULL;
13122 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013123 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013124 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013125
13126 return (ret);
13127}
13128
13129/**
13130 * xmlReadDoc:
13131 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013132 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013133 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013134 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013135 *
13136 * parse an XML in-memory document and build a tree.
13137 *
13138 * Returns the resulting document tree
13139 */
13140xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013141xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013142{
13143 xmlParserCtxtPtr ctxt;
13144
13145 if (cur == NULL)
13146 return (NULL);
13147
13148 ctxt = xmlCreateDocParserCtxt(cur);
13149 if (ctxt == NULL)
13150 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013151 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013152}
13153
13154/**
13155 * xmlReadFile:
13156 * @filename: a file or URL
13157 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013158 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013159 *
13160 * parse an XML file from the filesystem or the network.
13161 *
13162 * Returns the resulting document tree
13163 */
13164xmlDocPtr
13165xmlReadFile(const char *filename, const char *encoding, int options)
13166{
13167 xmlParserCtxtPtr ctxt;
13168
Daniel Veillard61b93382003-11-03 14:28:31 +000013169 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013170 if (ctxt == NULL)
13171 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013172 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013173}
13174
13175/**
13176 * xmlReadMemory:
13177 * @buffer: a pointer to a char array
13178 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013179 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013180 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013181 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013182 *
13183 * parse an XML in-memory document and build a tree.
13184 *
13185 * Returns the resulting document tree
13186 */
13187xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013188xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013189{
13190 xmlParserCtxtPtr ctxt;
13191
13192 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13193 if (ctxt == NULL)
13194 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013195 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013196}
13197
13198/**
13199 * xmlReadFd:
13200 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013201 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013202 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013203 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013204 *
13205 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013206 * NOTE that the file descriptor will not be closed when the
13207 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013208 *
13209 * Returns the resulting document tree
13210 */
13211xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013212xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013213{
13214 xmlParserCtxtPtr ctxt;
13215 xmlParserInputBufferPtr input;
13216 xmlParserInputPtr stream;
13217
13218 if (fd < 0)
13219 return (NULL);
13220
13221 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13222 if (input == NULL)
13223 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013224 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013225 ctxt = xmlNewParserCtxt();
13226 if (ctxt == NULL) {
13227 xmlFreeParserInputBuffer(input);
13228 return (NULL);
13229 }
13230 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13231 if (stream == NULL) {
13232 xmlFreeParserInputBuffer(input);
13233 xmlFreeParserCtxt(ctxt);
13234 return (NULL);
13235 }
13236 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013237 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013238}
13239
13240/**
13241 * xmlReadIO:
13242 * @ioread: an I/O read function
13243 * @ioclose: an I/O close function
13244 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013245 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013247 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013248 *
13249 * parse an XML document from I/O functions and source and build a tree.
13250 *
13251 * Returns the resulting document tree
13252 */
13253xmlDocPtr
13254xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013255 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256{
13257 xmlParserCtxtPtr ctxt;
13258 xmlParserInputBufferPtr input;
13259 xmlParserInputPtr stream;
13260
13261 if (ioread == NULL)
13262 return (NULL);
13263
13264 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13265 XML_CHAR_ENCODING_NONE);
13266 if (input == NULL)
13267 return (NULL);
13268 ctxt = xmlNewParserCtxt();
13269 if (ctxt == NULL) {
13270 xmlFreeParserInputBuffer(input);
13271 return (NULL);
13272 }
13273 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13274 if (stream == NULL) {
13275 xmlFreeParserInputBuffer(input);
13276 xmlFreeParserCtxt(ctxt);
13277 return (NULL);
13278 }
13279 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013280 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013281}
13282
13283/**
13284 * xmlCtxtReadDoc:
13285 * @ctxt: an XML parser context
13286 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013287 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013288 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013289 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013290 *
13291 * parse an XML in-memory document and build a tree.
13292 * This reuses the existing @ctxt parser context
13293 *
13294 * Returns the resulting document tree
13295 */
13296xmlDocPtr
13297xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013298 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013299{
13300 xmlParserInputPtr stream;
13301
13302 if (cur == NULL)
13303 return (NULL);
13304 if (ctxt == NULL)
13305 return (NULL);
13306
13307 xmlCtxtReset(ctxt);
13308
13309 stream = xmlNewStringInputStream(ctxt, cur);
13310 if (stream == NULL) {
13311 return (NULL);
13312 }
13313 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013314 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013315}
13316
13317/**
13318 * xmlCtxtReadFile:
13319 * @ctxt: an XML parser context
13320 * @filename: a file or URL
13321 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013322 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013323 *
13324 * parse an XML file from the filesystem or the network.
13325 * This reuses the existing @ctxt parser context
13326 *
13327 * Returns the resulting document tree
13328 */
13329xmlDocPtr
13330xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13331 const char *encoding, int options)
13332{
13333 xmlParserInputPtr stream;
13334
13335 if (filename == NULL)
13336 return (NULL);
13337 if (ctxt == NULL)
13338 return (NULL);
13339
13340 xmlCtxtReset(ctxt);
13341
Daniel Veillard29614c72004-11-26 10:47:26 +000013342 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013343 if (stream == NULL) {
13344 return (NULL);
13345 }
13346 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013347 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013348}
13349
13350/**
13351 * xmlCtxtReadMemory:
13352 * @ctxt: an XML parser context
13353 * @buffer: a pointer to a char array
13354 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013355 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013356 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013357 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013358 *
13359 * parse an XML in-memory document and build a tree.
13360 * This reuses the existing @ctxt parser context
13361 *
13362 * Returns the resulting document tree
13363 */
13364xmlDocPtr
13365xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013366 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013367{
13368 xmlParserInputBufferPtr input;
13369 xmlParserInputPtr stream;
13370
13371 if (ctxt == NULL)
13372 return (NULL);
13373 if (buffer == NULL)
13374 return (NULL);
13375
13376 xmlCtxtReset(ctxt);
13377
13378 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13379 if (input == NULL) {
13380 return(NULL);
13381 }
13382
13383 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13384 if (stream == NULL) {
13385 xmlFreeParserInputBuffer(input);
13386 return(NULL);
13387 }
13388
13389 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013390 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013391}
13392
13393/**
13394 * xmlCtxtReadFd:
13395 * @ctxt: an XML parser context
13396 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013397 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013398 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013399 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013400 *
13401 * parse an XML from a file descriptor and build a tree.
13402 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013403 * NOTE that the file descriptor will not be closed when the
13404 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013405 *
13406 * Returns the resulting document tree
13407 */
13408xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013409xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13410 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013411{
13412 xmlParserInputBufferPtr input;
13413 xmlParserInputPtr stream;
13414
13415 if (fd < 0)
13416 return (NULL);
13417 if (ctxt == NULL)
13418 return (NULL);
13419
13420 xmlCtxtReset(ctxt);
13421
13422
13423 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13424 if (input == NULL)
13425 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013426 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013427 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13428 if (stream == NULL) {
13429 xmlFreeParserInputBuffer(input);
13430 return (NULL);
13431 }
13432 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013433 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013434}
13435
13436/**
13437 * xmlCtxtReadIO:
13438 * @ctxt: an XML parser context
13439 * @ioread: an I/O read function
13440 * @ioclose: an I/O close function
13441 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013442 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013443 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013444 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013445 *
13446 * parse an XML document from I/O functions and source and build a tree.
13447 * This reuses the existing @ctxt parser context
13448 *
13449 * Returns the resulting document tree
13450 */
13451xmlDocPtr
13452xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13453 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013454 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013455 const char *encoding, int options)
13456{
13457 xmlParserInputBufferPtr input;
13458 xmlParserInputPtr stream;
13459
13460 if (ioread == NULL)
13461 return (NULL);
13462 if (ctxt == NULL)
13463 return (NULL);
13464
13465 xmlCtxtReset(ctxt);
13466
13467 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13468 XML_CHAR_ENCODING_NONE);
13469 if (input == NULL)
13470 return (NULL);
13471 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13472 if (stream == NULL) {
13473 xmlFreeParserInputBuffer(input);
13474 return (NULL);
13475 }
13476 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013477 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013478}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013479
13480#define bottom_parser
13481#include "elfgcchack.h"