blob: 4c76bf952d39d1810f71c9a66a14054db8ca14f9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
857 xmlRealloc(ctxt->nsTab,
858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
1847 buffer##_size *= 2; \
1848 buffer = (xmlChar *) \
1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001850 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001851}
1852
1853/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001854 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * @ctxt: the parser context
1856 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001857 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1859 * @end: an end marker xmlChar, 0 if none
1860 * @end2: an end marker xmlChar, 0 if none
1861 * @end3: an end marker xmlChar, 0 if none
1862 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001863 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001864 *
1865 * [67] Reference ::= EntityRef | CharRef
1866 *
1867 * [69] PEReference ::= '%' Name ';'
1868 *
1869 * Returns A newly allocated string with the substitution done. The caller
1870 * must deallocate it !
1871 */
1872xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1874 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 xmlChar *buffer = NULL;
1876 int buffer_size = 0;
1877
1878 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlEntityPtr ent;
1881 int c,l;
1882 int nbchars = 0;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001885 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001887
1888 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 return(NULL);
1891 }
1892
1893 /*
1894 * allocate a translation buffer.
1895 */
1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001898 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 * we are operating on already parsed values.
1903 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 if (str < last)
1905 c = CUR_SCHAR(str, l);
1906 else
1907 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001908 while ((c != 0) && (c != end) && /* non input consuming loop */
1909 (c != end2) && (c != end3)) {
1910
1911 if (c == 0) break;
1912 if ((c == '&') && (str[1] == '#')) {
1913 int val = xmlParseStringCharRef(ctxt, &str);
1914 if (val != 0) {
1915 COPY_BUF(0,buffer,nbchars,val);
1916 }
1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext,
1920 "String decoding Entity Reference: %.30s\n",
1921 str);
1922 ent = xmlParseStringEntityRef(ctxt, &str);
1923 if ((ent != NULL) &&
1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1925 if (ent->content != NULL) {
1926 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1927 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1929 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931 } else if ((ent != NULL) && (ent->content != NULL)) {
1932 xmlChar *rep;
1933
1934 ctxt->depth++;
1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1936 0, 0, 0);
1937 ctxt->depth--;
1938 if (rep != NULL) {
1939 current = rep;
1940 while (*current != 0) { /* non input consuming loop */
1941 buffer[nbchars++] = *current++;
1942 if (nbchars >
1943 buffer_size - XML_PARSER_BUFFER_SIZE) {
1944 growBuffer(buffer);
1945 }
1946 }
1947 xmlFree(rep);
1948 }
1949 } else if (ent != NULL) {
1950 int i = xmlStrlen(ent->name);
1951 const xmlChar *cur = ent->name;
1952
1953 buffer[nbchars++] = '&';
1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1955 growBuffer(buffer);
1956 }
1957 for (;i > 0;i--)
1958 buffer[nbchars++] = *cur++;
1959 buffer[nbchars++] = ';';
1960 }
1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "String decoding PE Reference: %.30s\n", str);
1965 ent = xmlParseStringPEReference(ctxt, &str);
1966 if (ent != NULL) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 }
1985 } else {
1986 COPY_BUF(l,buffer,nbchars,c);
1987 str += l;
1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001992 if (str < last)
1993 c = CUR_SCHAR(str, l);
1994 else
1995 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 }
1997 buffer[nbchars++] = 0;
1998 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001999
2000mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002002 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003}
2004
Daniel Veillarde57ec792003-09-10 10:50:59 +00002005/**
2006 * xmlStringDecodeEntities:
2007 * @ctxt: the parser context
2008 * @str: the input string
2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2010 * @end: an end marker xmlChar, 0 if none
2011 * @end2: an end marker xmlChar, 0 if none
2012 * @end3: an end marker xmlChar, 0 if none
2013 *
2014 * Takes a entity string content and process to do the adequate substitutions.
2015 *
2016 * [67] Reference ::= EntityRef | CharRef
2017 *
2018 * [69] PEReference ::= '%' Name ';'
2019 *
2020 * Returns A newly allocated string with the substitution done. The caller
2021 * must deallocate it !
2022 */
2023xmlChar *
2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2025 xmlChar end, xmlChar end2, xmlChar end3) {
2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2027 end, end2, end3));
2028}
Owen Taylor3473f882001-02-23 17:55:21 +00002029
2030/************************************************************************
2031 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002032 * Commodity functions, cleanup needed ? *
2033 * *
2034 ************************************************************************/
2035
2036/**
2037 * areBlanks:
2038 * @ctxt: an XML parser context
2039 * @str: a xmlChar *
2040 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002041 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002042 *
2043 * Is this a sequence of blank chars that one can ignore ?
2044 *
2045 * Returns 1 if ignorable 0 otherwise.
2046 */
2047
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002048static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2049 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002050 int i, ret;
2051 xmlNodePtr lastChild;
2052
Daniel Veillard05c13a22001-09-09 08:38:09 +00002053 /*
2054 * Don't spend time trying to differentiate them, the same callback is
2055 * used !
2056 */
2057 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002058 return(0);
2059
Owen Taylor3473f882001-02-23 17:55:21 +00002060 /*
2061 * Check for xml:space value.
2062 */
2063 if (*(ctxt->space) == 1)
2064 return(0);
2065
2066 /*
2067 * Check that the string is made of blanks
2068 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002069 if (blank_chars == 0) {
2070 for (i = 0;i < len;i++)
2071 if (!(IS_BLANK_CH(str[i]))) return(0);
2072 }
Owen Taylor3473f882001-02-23 17:55:21 +00002073
2074 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002075 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002076 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002077 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002078 if (ctxt->myDoc != NULL) {
2079 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2080 if (ret == 0) return(1);
2081 if (ret == 1) return(0);
2082 }
2083
2084 /*
2085 * Otherwise, heuristic :-\
2086 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002087 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002088 if ((ctxt->node->children == NULL) &&
2089 (RAW == '<') && (NXT(1) == '/')) return(0);
2090
2091 lastChild = xmlGetLastChild(ctxt->node);
2092 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002093 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2094 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 } else if (xmlNodeIsText(lastChild))
2096 return(0);
2097 else if ((ctxt->node->children != NULL) &&
2098 (xmlNodeIsText(ctxt->node->children)))
2099 return(0);
2100 return(1);
2101}
2102
Owen Taylor3473f882001-02-23 17:55:21 +00002103/************************************************************************
2104 * *
2105 * Extra stuff for namespace support *
2106 * Relates to http://www.w3.org/TR/WD-xml-names *
2107 * *
2108 ************************************************************************/
2109
2110/**
2111 * xmlSplitQName:
2112 * @ctxt: an XML parser context
2113 * @name: an XML parser context
2114 * @prefix: a xmlChar **
2115 *
2116 * parse an UTF8 encoded XML qualified name string
2117 *
2118 * [NS 5] QName ::= (Prefix ':')? LocalPart
2119 *
2120 * [NS 6] Prefix ::= NCName
2121 *
2122 * [NS 7] LocalPart ::= NCName
2123 *
2124 * Returns the local part, and prefix is updated
2125 * to get the Prefix if any.
2126 */
2127
2128xmlChar *
2129xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2130 xmlChar buf[XML_MAX_NAMELEN + 5];
2131 xmlChar *buffer = NULL;
2132 int len = 0;
2133 int max = XML_MAX_NAMELEN;
2134 xmlChar *ret = NULL;
2135 const xmlChar *cur = name;
2136 int c;
2137
2138 *prefix = NULL;
2139
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002140 if (cur == NULL) return(NULL);
2141
Owen Taylor3473f882001-02-23 17:55:21 +00002142#ifndef XML_XML_NAMESPACE
2143 /* xml: prefix is not really a namespace */
2144 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2145 (cur[2] == 'l') && (cur[3] == ':'))
2146 return(xmlStrdup(name));
2147#endif
2148
Daniel Veillard597bc482003-07-24 16:08:28 +00002149 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002150 if (cur[0] == ':')
2151 return(xmlStrdup(name));
2152
2153 c = *cur++;
2154 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2155 buf[len++] = c;
2156 c = *cur++;
2157 }
2158 if (len >= max) {
2159 /*
2160 * Okay someone managed to make a huge name, so he's ready to pay
2161 * for the processing speed.
2162 */
2163 max = len * 2;
2164
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002165 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002166 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002167 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002168 return(NULL);
2169 }
2170 memcpy(buffer, buf, len);
2171 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2172 if (len + 10 > max) {
2173 max *= 2;
2174 buffer = (xmlChar *) xmlRealloc(buffer,
2175 max * sizeof(xmlChar));
2176 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002177 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002178 return(NULL);
2179 }
2180 }
2181 buffer[len++] = c;
2182 c = *cur++;
2183 }
2184 buffer[len] = 0;
2185 }
2186
Daniel Veillard597bc482003-07-24 16:08:28 +00002187 /* nasty but well=formed
2188 if ((c == ':') && (*cur == 0)) {
2189 return(xmlStrdup(name));
2190 } */
2191
Owen Taylor3473f882001-02-23 17:55:21 +00002192 if (buffer == NULL)
2193 ret = xmlStrndup(buf, len);
2194 else {
2195 ret = buffer;
2196 buffer = NULL;
2197 max = XML_MAX_NAMELEN;
2198 }
2199
2200
2201 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002202 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002204 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002205 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002206 }
Owen Taylor3473f882001-02-23 17:55:21 +00002207 len = 0;
2208
Daniel Veillardbb284f42002-10-16 18:02:47 +00002209 /*
2210 * Check that the first character is proper to start
2211 * a new name
2212 */
2213 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2214 ((c >= 0x41) && (c <= 0x5A)) ||
2215 (c == '_') || (c == ':'))) {
2216 int l;
2217 int first = CUR_SCHAR(cur, l);
2218
2219 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002220 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002221 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002222 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002223 }
2224 }
2225 cur++;
2226
Owen Taylor3473f882001-02-23 17:55:21 +00002227 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2228 buf[len++] = c;
2229 c = *cur++;
2230 }
2231 if (len >= max) {
2232 /*
2233 * Okay someone managed to make a huge name, so he's ready to pay
2234 * for the processing speed.
2235 */
2236 max = len * 2;
2237
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002238 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002239 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
2243 memcpy(buffer, buf, len);
2244 while (c != 0) { /* tested bigname2.xml */
2245 if (len + 10 > max) {
2246 max *= 2;
2247 buffer = (xmlChar *) xmlRealloc(buffer,
2248 max * sizeof(xmlChar));
2249 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002250 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002251 return(NULL);
2252 }
2253 }
2254 buffer[len++] = c;
2255 c = *cur++;
2256 }
2257 buffer[len] = 0;
2258 }
2259
2260 if (buffer == NULL)
2261 ret = xmlStrndup(buf, len);
2262 else {
2263 ret = buffer;
2264 }
2265 }
2266
2267 return(ret);
2268}
2269
2270/************************************************************************
2271 * *
2272 * The parser itself *
2273 * Relates to http://www.w3.org/TR/REC-xml *
2274 * *
2275 ************************************************************************/
2276
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002278static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002279 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002280
Owen Taylor3473f882001-02-23 17:55:21 +00002281/**
2282 * xmlParseName:
2283 * @ctxt: an XML parser context
2284 *
2285 * parse an XML name.
2286 *
2287 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2288 * CombiningChar | Extender
2289 *
2290 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2291 *
2292 * [6] Names ::= Name (S Name)*
2293 *
2294 * Returns the Name parsed or NULL
2295 */
2296
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002297const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002298xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002299 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002300 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002301 int count = 0;
2302
2303 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002304
2305 /*
2306 * Accelerator for simple ASCII names
2307 */
2308 in = ctxt->input->cur;
2309 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2310 ((*in >= 0x41) && (*in <= 0x5A)) ||
2311 (*in == '_') || (*in == ':')) {
2312 in++;
2313 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2314 ((*in >= 0x41) && (*in <= 0x5A)) ||
2315 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002316 (*in == '_') || (*in == '-') ||
2317 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002318 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002319 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002320 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002321 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002322 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002323 ctxt->nbChars += count;
2324 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002325 if (ret == NULL)
2326 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002327 return(ret);
2328 }
2329 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002330 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002331}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002332
Daniel Veillard46de64e2002-05-29 08:21:33 +00002333/**
2334 * xmlParseNameAndCompare:
2335 * @ctxt: an XML parser context
2336 *
2337 * parse an XML name and compares for match
2338 * (specialized for endtag parsing)
2339 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002340 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2341 * and the name for mismatch
2342 */
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002345xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002346 register const xmlChar *cmp = other;
2347 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002348 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002349
2350 GROW;
2351
2352 in = ctxt->input->cur;
2353 while (*in != 0 && *in == *cmp) {
2354 ++in;
2355 ++cmp;
2356 }
William M. Brack76e95df2003-10-18 16:20:14 +00002357 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002358 /* success */
2359 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002360 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002361 }
2362 /* failure (or end of input buffer), check with full function */
2363 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002364 /* strings coming from the dictionnary direct compare possible */
2365 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002366 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002367 }
2368 return ret;
2369}
2370
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002371static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002372xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002373 int len = 0, l;
2374 int c;
2375 int count = 0;
2376
2377 /*
2378 * Handler for more complex cases
2379 */
2380 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002381 c = CUR_CHAR(l);
2382 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2383 (!IS_LETTER(c) && (c != '_') &&
2384 (c != ':'))) {
2385 return(NULL);
2386 }
2387
2388 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002389 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002390 (c == '.') || (c == '-') ||
2391 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002392 (IS_COMBINING(c)) ||
2393 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002394 if (count++ > 100) {
2395 count = 0;
2396 GROW;
2397 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002398 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002399 NEXTL(l);
2400 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002401 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002403}
2404
2405/**
2406 * xmlParseStringName:
2407 * @ctxt: an XML parser context
2408 * @str: a pointer to the string pointer (IN/OUT)
2409 *
2410 * parse an XML name.
2411 *
2412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2413 * CombiningChar | Extender
2414 *
2415 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2416 *
2417 * [6] Names ::= Name (S Name)*
2418 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002419 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002420 * is updated to the current location in the string.
2421 */
2422
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002423static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002424xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2425 xmlChar buf[XML_MAX_NAMELEN + 5];
2426 const xmlChar *cur = *str;
2427 int len = 0, l;
2428 int c;
2429
2430 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002431 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002432 (c != ':')) {
2433 return(NULL);
2434 }
2435
William M. Brack871611b2003-10-18 04:53:14 +00002436 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002437 (c == '.') || (c == '-') ||
2438 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002439 (IS_COMBINING(c)) ||
2440 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002441 COPY_BUF(l,buf,len,c);
2442 cur += l;
2443 c = CUR_SCHAR(cur, l);
2444 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2445 /*
2446 * Okay someone managed to make a huge name, so he's ready to pay
2447 * for the processing speed.
2448 */
2449 xmlChar *buffer;
2450 int max = len * 2;
2451
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002452 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002454 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002455 return(NULL);
2456 }
2457 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002458 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002459 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002460 (c == '.') || (c == '-') ||
2461 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002462 (IS_COMBINING(c)) ||
2463 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002464 if (len + 10 > max) {
2465 max *= 2;
2466 buffer = (xmlChar *) xmlRealloc(buffer,
2467 max * sizeof(xmlChar));
2468 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002469 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 return(NULL);
2471 }
2472 }
2473 COPY_BUF(l,buffer,len,c);
2474 cur += l;
2475 c = CUR_SCHAR(cur, l);
2476 }
2477 buffer[len] = 0;
2478 *str = cur;
2479 return(buffer);
2480 }
2481 }
2482 *str = cur;
2483 return(xmlStrndup(buf, len));
2484}
2485
2486/**
2487 * xmlParseNmtoken:
2488 * @ctxt: an XML parser context
2489 *
2490 * parse an XML Nmtoken.
2491 *
2492 * [7] Nmtoken ::= (NameChar)+
2493 *
2494 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2495 *
2496 * Returns the Nmtoken parsed or NULL
2497 */
2498
2499xmlChar *
2500xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2501 xmlChar buf[XML_MAX_NAMELEN + 5];
2502 int len = 0, l;
2503 int c;
2504 int count = 0;
2505
2506 GROW;
2507 c = CUR_CHAR(l);
2508
William M. Brack871611b2003-10-18 04:53:14 +00002509 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002510 (c == '.') || (c == '-') ||
2511 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002512 (IS_COMBINING(c)) ||
2513 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (count++ > 100) {
2515 count = 0;
2516 GROW;
2517 }
2518 COPY_BUF(l,buf,len,c);
2519 NEXTL(l);
2520 c = CUR_CHAR(l);
2521 if (len >= XML_MAX_NAMELEN) {
2522 /*
2523 * Okay someone managed to make a huge token, so he's ready to pay
2524 * for the processing speed.
2525 */
2526 xmlChar *buffer;
2527 int max = len * 2;
2528
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002529 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002531 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002532 return(NULL);
2533 }
2534 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002535 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002536 (c == '.') || (c == '-') ||
2537 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002538 (IS_COMBINING(c)) ||
2539 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002540 if (count++ > 100) {
2541 count = 0;
2542 GROW;
2543 }
2544 if (len + 10 > max) {
2545 max *= 2;
2546 buffer = (xmlChar *) xmlRealloc(buffer,
2547 max * sizeof(xmlChar));
2548 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002549 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 return(NULL);
2551 }
2552 }
2553 COPY_BUF(l,buffer,len,c);
2554 NEXTL(l);
2555 c = CUR_CHAR(l);
2556 }
2557 buffer[len] = 0;
2558 return(buffer);
2559 }
2560 }
2561 if (len == 0)
2562 return(NULL);
2563 return(xmlStrndup(buf, len));
2564}
2565
2566/**
2567 * xmlParseEntityValue:
2568 * @ctxt: an XML parser context
2569 * @orig: if non-NULL store a copy of the original entity value
2570 *
2571 * parse a value for ENTITY declarations
2572 *
2573 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2574 * "'" ([^%&'] | PEReference | Reference)* "'"
2575 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002576 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002577 */
2578
2579xmlChar *
2580xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2581 xmlChar *buf = NULL;
2582 int len = 0;
2583 int size = XML_PARSER_BUFFER_SIZE;
2584 int c, l;
2585 xmlChar stop;
2586 xmlChar *ret = NULL;
2587 const xmlChar *cur = NULL;
2588 xmlParserInputPtr input;
2589
2590 if (RAW == '"') stop = '"';
2591 else if (RAW == '\'') stop = '\'';
2592 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002593 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002594 return(NULL);
2595 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002596 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002597 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002598 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 return(NULL);
2600 }
2601
2602 /*
2603 * The content of the entity definition is copied in a buffer.
2604 */
2605
2606 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2607 input = ctxt->input;
2608 GROW;
2609 NEXT;
2610 c = CUR_CHAR(l);
2611 /*
2612 * NOTE: 4.4.5 Included in Literal
2613 * When a parameter entity reference appears in a literal entity
2614 * value, ... a single or double quote character in the replacement
2615 * text is always treated as a normal data character and will not
2616 * terminate the literal.
2617 * In practice it means we stop the loop only when back at parsing
2618 * the initial entity and the quote is found
2619 */
William M. Brack871611b2003-10-18 04:53:14 +00002620 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002621 (ctxt->input != input))) {
2622 if (len + 5 >= size) {
2623 size *= 2;
2624 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2625 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002626 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 return(NULL);
2628 }
2629 }
2630 COPY_BUF(l,buf,len,c);
2631 NEXTL(l);
2632 /*
2633 * Pop-up of finished entities.
2634 */
2635 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2636 xmlPopInput(ctxt);
2637
2638 GROW;
2639 c = CUR_CHAR(l);
2640 if (c == 0) {
2641 GROW;
2642 c = CUR_CHAR(l);
2643 }
2644 }
2645 buf[len] = 0;
2646
2647 /*
2648 * Raise problem w.r.t. '&' and '%' being used in non-entities
2649 * reference constructs. Note Charref will be handled in
2650 * xmlStringDecodeEntities()
2651 */
2652 cur = buf;
2653 while (*cur != 0) { /* non input consuming */
2654 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2655 xmlChar *name;
2656 xmlChar tmp = *cur;
2657
2658 cur++;
2659 name = xmlParseStringName(ctxt, &cur);
2660 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002661 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002662 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002663 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002664 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002665 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2666 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002667 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002668 }
2669 if (name != NULL)
2670 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002671 if (*cur == 0)
2672 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002673 }
2674 cur++;
2675 }
2676
2677 /*
2678 * Then PEReference entities are substituted.
2679 */
2680 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002682 xmlFree(buf);
2683 } else {
2684 NEXT;
2685 /*
2686 * NOTE: 4.4.7 Bypassed
2687 * When a general entity reference appears in the EntityValue in
2688 * an entity declaration, it is bypassed and left as is.
2689 * so XML_SUBSTITUTE_REF is not set here.
2690 */
2691 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2692 0, 0, 0);
2693 if (orig != NULL)
2694 *orig = buf;
2695 else
2696 xmlFree(buf);
2697 }
2698
2699 return(ret);
2700}
2701
2702/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002703 * xmlParseAttValueComplex:
2704 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002705 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002706 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002707 *
2708 * parse a value for an attribute, this is the fallback function
2709 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002710 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002711 *
2712 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2713 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002714static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002715xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002716 xmlChar limit = 0;
2717 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002718 int len = 0;
2719 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002720 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 xmlChar *current = NULL;
2722 xmlEntityPtr ent;
2723
Owen Taylor3473f882001-02-23 17:55:21 +00002724 if (NXT(0) == '"') {
2725 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2726 limit = '"';
2727 NEXT;
2728 } else if (NXT(0) == '\'') {
2729 limit = '\'';
2730 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2731 NEXT;
2732 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002733 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
2736
2737 /*
2738 * allocate a translation buffer.
2739 */
2740 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002741 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002742 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002743
2744 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002745 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002746 */
2747 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002748 while ((NXT(0) != limit) && /* checked */
2749 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002750 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002751 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002752 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 if (NXT(1) == '#') {
2754 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002755
Owen Taylor3473f882001-02-23 17:55:21 +00002756 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002757 if (ctxt->replaceEntities) {
2758 if (len > buf_size - 10) {
2759 growBuffer(buf);
2760 }
2761 buf[len++] = '&';
2762 } else {
2763 /*
2764 * The reparsing will be done in xmlStringGetNodeList()
2765 * called by the attribute() function in SAX.c
2766 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002767 if (len > buf_size - 10) {
2768 growBuffer(buf);
2769 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002770 buf[len++] = '&';
2771 buf[len++] = '#';
2772 buf[len++] = '3';
2773 buf[len++] = '8';
2774 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002775 }
2776 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002777 if (len > buf_size - 10) {
2778 growBuffer(buf);
2779 }
Owen Taylor3473f882001-02-23 17:55:21 +00002780 len += xmlCopyChar(0, &buf[len], val);
2781 }
2782 } else {
2783 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002784 if ((ent != NULL) &&
2785 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2786 if (len > buf_size - 10) {
2787 growBuffer(buf);
2788 }
2789 if ((ctxt->replaceEntities == 0) &&
2790 (ent->content[0] == '&')) {
2791 buf[len++] = '&';
2792 buf[len++] = '#';
2793 buf[len++] = '3';
2794 buf[len++] = '8';
2795 buf[len++] = ';';
2796 } else {
2797 buf[len++] = ent->content[0];
2798 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 } else if ((ent != NULL) &&
2800 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002801 xmlChar *rep;
2802
2803 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2804 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002805 XML_SUBSTITUTE_REF,
2806 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002807 if (rep != NULL) {
2808 current = rep;
2809 while (*current != 0) { /* non input consuming */
2810 buf[len++] = *current++;
2811 if (len > buf_size - 10) {
2812 growBuffer(buf);
2813 }
2814 }
2815 xmlFree(rep);
2816 }
2817 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002818 if (len > buf_size - 10) {
2819 growBuffer(buf);
2820 }
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (ent->content != NULL)
2822 buf[len++] = ent->content[0];
2823 }
2824 } else if (ent != NULL) {
2825 int i = xmlStrlen(ent->name);
2826 const xmlChar *cur = ent->name;
2827
2828 /*
2829 * This may look absurd but is needed to detect
2830 * entities problems
2831 */
2832 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2833 (ent->content != NULL)) {
2834 xmlChar *rep;
2835 rep = xmlStringDecodeEntities(ctxt, ent->content,
2836 XML_SUBSTITUTE_REF, 0, 0, 0);
2837 if (rep != NULL)
2838 xmlFree(rep);
2839 }
2840
2841 /*
2842 * Just output the reference
2843 */
2844 buf[len++] = '&';
2845 if (len > buf_size - i - 10) {
2846 growBuffer(buf);
2847 }
2848 for (;i > 0;i--)
2849 buf[len++] = *cur++;
2850 buf[len++] = ';';
2851 }
2852 }
2853 } else {
2854 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002855 if ((len != 0) || (!normalize)) {
2856 if ((!normalize) || (!in_space)) {
2857 COPY_BUF(l,buf,len,0x20);
2858 if (len > buf_size - 10) {
2859 growBuffer(buf);
2860 }
2861 }
2862 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002863 }
2864 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002865 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002866 COPY_BUF(l,buf,len,c);
2867 if (len > buf_size - 10) {
2868 growBuffer(buf);
2869 }
2870 }
2871 NEXTL(l);
2872 }
2873 GROW;
2874 c = CUR_CHAR(l);
2875 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002876 if ((in_space) && (normalize)) {
2877 while (buf[len - 1] == 0x20) len--;
2878 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002879 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002880 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002881 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002882 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002883 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2884 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002885 } else
2886 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002887 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002889
2890mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002891 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002892 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002893}
2894
2895/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002896 * xmlParseAttValue:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse a value for an attribute
2900 * Note: the parser won't do substitution of entities here, this
2901 * will be handled later in xmlStringGetNodeList
2902 *
2903 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2904 * "'" ([^<&'] | Reference)* "'"
2905 *
2906 * 3.3.3 Attribute-Value Normalization:
2907 * Before the value of an attribute is passed to the application or
2908 * checked for validity, the XML processor must normalize it as follows:
2909 * - a character reference is processed by appending the referenced
2910 * character to the attribute value
2911 * - an entity reference is processed by recursively processing the
2912 * replacement text of the entity
2913 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2914 * appending #x20 to the normalized value, except that only a single
2915 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2916 * parsed entity or the literal entity value of an internal parsed entity
2917 * - other characters are processed by appending them to the normalized value
2918 * If the declared value is not CDATA, then the XML processor must further
2919 * process the normalized attribute value by discarding any leading and
2920 * trailing space (#x20) characters, and by replacing sequences of space
2921 * (#x20) characters by a single space (#x20) character.
2922 * All attributes for which no declaration has been read should be treated
2923 * by a non-validating parser as if declared CDATA.
2924 *
2925 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2926 */
2927
2928
2929xmlChar *
2930xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002931 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002932}
2933
2934/**
Owen Taylor3473f882001-02-23 17:55:21 +00002935 * xmlParseSystemLiteral:
2936 * @ctxt: an XML parser context
2937 *
2938 * parse an XML Literal
2939 *
2940 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2941 *
2942 * Returns the SystemLiteral parsed or NULL
2943 */
2944
2945xmlChar *
2946xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2947 xmlChar *buf = NULL;
2948 int len = 0;
2949 int size = XML_PARSER_BUFFER_SIZE;
2950 int cur, l;
2951 xmlChar stop;
2952 int state = ctxt->instate;
2953 int count = 0;
2954
2955 SHRINK;
2956 if (RAW == '"') {
2957 NEXT;
2958 stop = '"';
2959 } else if (RAW == '\'') {
2960 NEXT;
2961 stop = '\'';
2962 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002963 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(NULL);
2965 }
2966
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002967 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002968 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002969 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002970 return(NULL);
2971 }
2972 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2973 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002974 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002975 if (len + 5 >= size) {
2976 size *= 2;
2977 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2978 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002979 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002980 ctxt->instate = (xmlParserInputState) state;
2981 return(NULL);
2982 }
2983 }
2984 count++;
2985 if (count > 50) {
2986 GROW;
2987 count = 0;
2988 }
2989 COPY_BUF(l,buf,len,cur);
2990 NEXTL(l);
2991 cur = CUR_CHAR(l);
2992 if (cur == 0) {
2993 GROW;
2994 SHRINK;
2995 cur = CUR_CHAR(l);
2996 }
2997 }
2998 buf[len] = 0;
2999 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003000 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003001 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003002 } else {
3003 NEXT;
3004 }
3005 return(buf);
3006}
3007
3008/**
3009 * xmlParsePubidLiteral:
3010 * @ctxt: an XML parser context
3011 *
3012 * parse an XML public literal
3013 *
3014 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3015 *
3016 * Returns the PubidLiteral parsed or NULL.
3017 */
3018
3019xmlChar *
3020xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3021 xmlChar *buf = NULL;
3022 int len = 0;
3023 int size = XML_PARSER_BUFFER_SIZE;
3024 xmlChar cur;
3025 xmlChar stop;
3026 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003027 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003028
3029 SHRINK;
3030 if (RAW == '"') {
3031 NEXT;
3032 stop = '"';
3033 } else if (RAW == '\'') {
3034 NEXT;
3035 stop = '\'';
3036 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003040 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003041 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003042 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003045 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003047 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (len + 1 >= size) {
3049 size *= 2;
3050 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3051 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003052 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003053 return(NULL);
3054 }
3055 }
3056 buf[len++] = cur;
3057 count++;
3058 if (count > 50) {
3059 GROW;
3060 count = 0;
3061 }
3062 NEXT;
3063 cur = CUR;
3064 if (cur == 0) {
3065 GROW;
3066 SHRINK;
3067 cur = CUR;
3068 }
3069 }
3070 buf[len] = 0;
3071 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003073 } else {
3074 NEXT;
3075 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003076 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003077 return(buf);
3078}
3079
Daniel Veillard48b2f892001-02-25 16:11:03 +00003080void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003081/**
3082 * xmlParseCharData:
3083 * @ctxt: an XML parser context
3084 * @cdata: int indicating whether we are within a CDATA section
3085 *
3086 * parse a CharData section.
3087 * if we are within a CDATA section ']]>' marks an end of section.
3088 *
3089 * The right angle bracket (>) may be represented using the string "&gt;",
3090 * and must, for compatibility, be escaped using "&gt;" or a character
3091 * reference when it appears in the string "]]>" in content, when that
3092 * string is not marking the end of a CDATA section.
3093 *
3094 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3095 */
3096
3097void
3098xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003099 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003100 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003101 int line = ctxt->input->line;
3102 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003103
3104 SHRINK;
3105 GROW;
3106 /*
3107 * Accelerated common case where input don't need to be
3108 * modified before passing it to the handler.
3109 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003110 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003111 in = ctxt->input->cur;
3112 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003113get_more_space:
3114 while (*in == 0x20) in++;
3115 if (*in == 0xA) {
3116 ctxt->input->line++;
3117 in++;
3118 while (*in == 0xA) {
3119 ctxt->input->line++;
3120 in++;
3121 }
3122 goto get_more_space;
3123 }
3124 if (*in == '<') {
3125 nbchar = in - ctxt->input->cur;
3126 if (nbchar > 0) {
3127 const xmlChar *tmp = ctxt->input->cur;
3128 ctxt->input->cur = in;
3129
3130 if (ctxt->sax->ignorableWhitespace !=
3131 ctxt->sax->characters) {
3132 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3133 ctxt->sax->ignorableWhitespace(ctxt->userData,
3134 tmp, nbchar);
3135 } else if (ctxt->sax->characters != NULL)
3136 ctxt->sax->characters(ctxt->userData,
3137 tmp, nbchar);
3138 } else if (ctxt->sax->characters != NULL) {
3139 ctxt->sax->characters(ctxt->userData,
3140 tmp, nbchar);
3141 }
3142 }
3143 return;
3144 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003145get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003146#if 0
Daniel Veillard561b7f82002-03-20 21:55:57 +00003147 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3148 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003149 in++;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003150#endif
3151 while (((*in > ']') && (*in <= 0x7F)) ||
3152 ((*in > '&') && (*in < '<')) ||
3153 ((*in > '<') && (*in < ']')) ||
3154 ((*in >= 0x20) && (*in < '&')) ||
3155 (*in == 0x09))
3156 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003157 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003158 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003159 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003160 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003161 ctxt->input->line++;
3162 in++;
3163 }
3164 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003165 }
3166 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003167 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003168 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003169 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003170 return;
3171 }
3172 in++;
3173 goto get_more;
3174 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003175 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003176 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003177 if ((ctxt->sax->ignorableWhitespace !=
3178 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003179 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003180 const xmlChar *tmp = ctxt->input->cur;
3181 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003182
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003183 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003184 ctxt->sax->ignorableWhitespace(ctxt->userData,
3185 tmp, nbchar);
3186 } else if (ctxt->sax->characters != NULL)
3187 ctxt->sax->characters(ctxt->userData,
3188 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003189 line = ctxt->input->line;
3190 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003191 } else {
3192 if (ctxt->sax->characters != NULL)
3193 ctxt->sax->characters(ctxt->userData,
3194 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003195 line = ctxt->input->line;
3196 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003197 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003198 }
3199 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003200 if (*in == 0xD) {
3201 in++;
3202 if (*in == 0xA) {
3203 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003204 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003205 ctxt->input->line++;
3206 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003207 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003208 in--;
3209 }
3210 if (*in == '<') {
3211 return;
3212 }
3213 if (*in == '&') {
3214 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003215 }
3216 SHRINK;
3217 GROW;
3218 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003219 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003220 nbchar = 0;
3221 }
Daniel Veillard50582112001-03-26 22:52:16 +00003222 ctxt->input->line = line;
3223 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003224 xmlParseCharDataComplex(ctxt, cdata);
3225}
3226
Daniel Veillard01c13b52002-12-10 15:19:08 +00003227/**
3228 * xmlParseCharDataComplex:
3229 * @ctxt: an XML parser context
3230 * @cdata: int indicating whether we are within a CDATA section
3231 *
3232 * parse a CharData section.this is the fallback function
3233 * of xmlParseCharData() when the parsing requires handling
3234 * of non-ASCII characters.
3235 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003236void
3237xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003238 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3239 int nbchar = 0;
3240 int cur, l;
3241 int count = 0;
3242
3243 SHRINK;
3244 GROW;
3245 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003246 while ((cur != '<') && /* checked */
3247 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003248 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003249 if ((cur == ']') && (NXT(1) == ']') &&
3250 (NXT(2) == '>')) {
3251 if (cdata) break;
3252 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003253 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 }
3255 }
3256 COPY_BUF(l,buf,nbchar,cur);
3257 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003258 buf[nbchar] = 0;
3259
Owen Taylor3473f882001-02-23 17:55:21 +00003260 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003261 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003262 */
3263 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003264 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003265 if (ctxt->sax->ignorableWhitespace != NULL)
3266 ctxt->sax->ignorableWhitespace(ctxt->userData,
3267 buf, nbchar);
3268 } else {
3269 if (ctxt->sax->characters != NULL)
3270 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3271 }
3272 }
3273 nbchar = 0;
3274 }
3275 count++;
3276 if (count > 50) {
3277 GROW;
3278 count = 0;
3279 }
3280 NEXTL(l);
3281 cur = CUR_CHAR(l);
3282 }
3283 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003284 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003285 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003286 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003287 */
3288 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003289 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003290 if (ctxt->sax->ignorableWhitespace != NULL)
3291 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3292 } else {
3293 if (ctxt->sax->characters != NULL)
3294 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3295 }
3296 }
3297 }
3298}
3299
3300/**
3301 * xmlParseExternalID:
3302 * @ctxt: an XML parser context
3303 * @publicID: a xmlChar** receiving PubidLiteral
3304 * @strict: indicate whether we should restrict parsing to only
3305 * production [75], see NOTE below
3306 *
3307 * Parse an External ID or a Public ID
3308 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003309 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003310 * 'PUBLIC' S PubidLiteral S SystemLiteral
3311 *
3312 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3313 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3314 *
3315 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3316 *
3317 * Returns the function returns SystemLiteral and in the second
3318 * case publicID receives PubidLiteral, is strict is off
3319 * it is possible to return NULL and have publicID set.
3320 */
3321
3322xmlChar *
3323xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3324 xmlChar *URI = NULL;
3325
3326 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003327
3328 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003329 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003330 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003331 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3333 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003334 }
3335 SKIP_BLANKS;
3336 URI = xmlParseSystemLiteral(ctxt);
3337 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003338 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003339 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003340 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003341 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003342 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003343 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003344 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003345 }
3346 SKIP_BLANKS;
3347 *publicID = xmlParsePubidLiteral(ctxt);
3348 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003349 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003350 }
3351 if (strict) {
3352 /*
3353 * We don't handle [83] so "S SystemLiteral" is required.
3354 */
William M. Brack76e95df2003-10-18 16:20:14 +00003355 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003358 }
3359 } else {
3360 /*
3361 * We handle [83] so we return immediately, if
3362 * "S SystemLiteral" is not detected. From a purely parsing
3363 * point of view that's a nice mess.
3364 */
3365 const xmlChar *ptr;
3366 GROW;
3367
3368 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003369 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003370
William M. Brack76e95df2003-10-18 16:20:14 +00003371 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3373 }
3374 SKIP_BLANKS;
3375 URI = xmlParseSystemLiteral(ctxt);
3376 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003377 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003378 }
3379 }
3380 return(URI);
3381}
3382
3383/**
3384 * xmlParseComment:
3385 * @ctxt: an XML parser context
3386 *
3387 * Skip an XML (SGML) comment <!-- .... -->
3388 * The spec says that "For compatibility, the string "--" (double-hyphen)
3389 * must not occur within comments. "
3390 *
3391 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3392 */
3393void
3394xmlParseComment(xmlParserCtxtPtr ctxt) {
3395 xmlChar *buf = NULL;
3396 int len;
3397 int size = XML_PARSER_BUFFER_SIZE;
3398 int q, ql;
3399 int r, rl;
3400 int cur, l;
3401 xmlParserInputState state;
3402 xmlParserInputPtr input = ctxt->input;
3403 int count = 0;
3404
3405 /*
3406 * Check that there is a comment right here.
3407 */
3408 if ((RAW != '<') || (NXT(1) != '!') ||
3409 (NXT(2) != '-') || (NXT(3) != '-')) return;
3410
3411 state = ctxt->instate;
3412 ctxt->instate = XML_PARSER_COMMENT;
3413 SHRINK;
3414 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003415 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003416 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003417 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003418 ctxt->instate = state;
3419 return;
3420 }
3421 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003422 if (q == 0)
3423 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003424 NEXTL(ql);
3425 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003426 if (r == 0)
3427 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003428 NEXTL(rl);
3429 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003430 if (cur == 0)
3431 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003432 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003433 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003434 ((cur != '>') ||
3435 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003436 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003437 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003438 }
3439 if (len + 5 >= size) {
3440 size *= 2;
3441 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3442 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003444 ctxt->instate = state;
3445 return;
3446 }
3447 }
3448 COPY_BUF(ql,buf,len,q);
3449 q = r;
3450 ql = rl;
3451 r = cur;
3452 rl = l;
3453
3454 count++;
3455 if (count > 50) {
3456 GROW;
3457 count = 0;
3458 }
3459 NEXTL(l);
3460 cur = CUR_CHAR(l);
3461 if (cur == 0) {
3462 SHRINK;
3463 GROW;
3464 cur = CUR_CHAR(l);
3465 }
3466 }
3467 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003468 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003469 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003470 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 xmlFree(buf);
3472 } else {
3473 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003474 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3475 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003476 }
3477 NEXT;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3479 (!ctxt->disableSAX))
3480 ctxt->sax->comment(ctxt->userData, buf);
3481 xmlFree(buf);
3482 }
3483 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003484 return;
3485not_terminated:
3486 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3487 "Comment not terminated\n", NULL);
3488 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003489}
3490
3491/**
3492 * xmlParsePITarget:
3493 * @ctxt: an XML parser context
3494 *
3495 * parse the name of a PI
3496 *
3497 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3498 *
3499 * Returns the PITarget name or NULL
3500 */
3501
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003502const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003503xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003504 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003505
3506 name = xmlParseName(ctxt);
3507 if ((name != NULL) &&
3508 ((name[0] == 'x') || (name[0] == 'X')) &&
3509 ((name[1] == 'm') || (name[1] == 'M')) &&
3510 ((name[2] == 'l') || (name[2] == 'L'))) {
3511 int i;
3512 if ((name[0] == 'x') && (name[1] == 'm') &&
3513 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003514 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003515 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003516 return(name);
3517 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003518 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003519 return(name);
3520 }
3521 for (i = 0;;i++) {
3522 if (xmlW3CPIs[i] == NULL) break;
3523 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3524 return(name);
3525 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003526 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3527 "xmlParsePITarget: invalid name prefix 'xml'\n",
3528 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003529 }
3530 return(name);
3531}
3532
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003533#ifdef LIBXML_CATALOG_ENABLED
3534/**
3535 * xmlParseCatalogPI:
3536 * @ctxt: an XML parser context
3537 * @catalog: the PI value string
3538 *
3539 * parse an XML Catalog Processing Instruction.
3540 *
3541 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3542 *
3543 * Occurs only if allowed by the user and if happening in the Misc
3544 * part of the document before any doctype informations
3545 * This will add the given catalog to the parsing context in order
3546 * to be used if there is a resolution need further down in the document
3547 */
3548
3549static void
3550xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3551 xmlChar *URL = NULL;
3552 const xmlChar *tmp, *base;
3553 xmlChar marker;
3554
3555 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003556 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003557 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3558 goto error;
3559 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003560 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003561 if (*tmp != '=') {
3562 return;
3563 }
3564 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003565 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003566 marker = *tmp;
3567 if ((marker != '\'') && (marker != '"'))
3568 goto error;
3569 tmp++;
3570 base = tmp;
3571 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3572 if (*tmp == 0)
3573 goto error;
3574 URL = xmlStrndup(base, tmp - base);
3575 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003576 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003577 if (*tmp != 0)
3578 goto error;
3579
3580 if (URL != NULL) {
3581 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3582 xmlFree(URL);
3583 }
3584 return;
3585
3586error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003587 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3588 "Catalog PI syntax error: %s\n",
3589 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003590 if (URL != NULL)
3591 xmlFree(URL);
3592}
3593#endif
3594
Owen Taylor3473f882001-02-23 17:55:21 +00003595/**
3596 * xmlParsePI:
3597 * @ctxt: an XML parser context
3598 *
3599 * parse an XML Processing Instruction.
3600 *
3601 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3602 *
3603 * The processing is transfered to SAX once parsed.
3604 */
3605
3606void
3607xmlParsePI(xmlParserCtxtPtr ctxt) {
3608 xmlChar *buf = NULL;
3609 int len = 0;
3610 int size = XML_PARSER_BUFFER_SIZE;
3611 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003612 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003613 xmlParserInputState state;
3614 int count = 0;
3615
3616 if ((RAW == '<') && (NXT(1) == '?')) {
3617 xmlParserInputPtr input = ctxt->input;
3618 state = ctxt->instate;
3619 ctxt->instate = XML_PARSER_PI;
3620 /*
3621 * this is a Processing Instruction.
3622 */
3623 SKIP(2);
3624 SHRINK;
3625
3626 /*
3627 * Parse the target name and check for special support like
3628 * namespace.
3629 */
3630 target = xmlParsePITarget(ctxt);
3631 if (target != NULL) {
3632 if ((RAW == '?') && (NXT(1) == '>')) {
3633 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003634 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3635 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003636 }
3637 SKIP(2);
3638
3639 /*
3640 * SAX: PI detected.
3641 */
3642 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3643 (ctxt->sax->processingInstruction != NULL))
3644 ctxt->sax->processingInstruction(ctxt->userData,
3645 target, NULL);
3646 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 return;
3648 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003649 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003651 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003652 ctxt->instate = state;
3653 return;
3654 }
3655 cur = CUR;
3656 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003657 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3658 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003659 }
3660 SKIP_BLANKS;
3661 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003662 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003663 ((cur != '?') || (NXT(1) != '>'))) {
3664 if (len + 5 >= size) {
3665 size *= 2;
3666 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3667 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003668 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003669 ctxt->instate = state;
3670 return;
3671 }
3672 }
3673 count++;
3674 if (count > 50) {
3675 GROW;
3676 count = 0;
3677 }
3678 COPY_BUF(l,buf,len,cur);
3679 NEXTL(l);
3680 cur = CUR_CHAR(l);
3681 if (cur == 0) {
3682 SHRINK;
3683 GROW;
3684 cur = CUR_CHAR(l);
3685 }
3686 }
3687 buf[len] = 0;
3688 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003689 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3690 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003691 } else {
3692 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3694 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003695 }
3696 SKIP(2);
3697
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003698#ifdef LIBXML_CATALOG_ENABLED
3699 if (((state == XML_PARSER_MISC) ||
3700 (state == XML_PARSER_START)) &&
3701 (xmlStrEqual(target, XML_CATALOG_PI))) {
3702 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3703 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3704 (allow == XML_CATA_ALLOW_ALL))
3705 xmlParseCatalogPI(ctxt, buf);
3706 }
3707#endif
3708
3709
Owen Taylor3473f882001-02-23 17:55:21 +00003710 /*
3711 * SAX: PI detected.
3712 */
3713 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3714 (ctxt->sax->processingInstruction != NULL))
3715 ctxt->sax->processingInstruction(ctxt->userData,
3716 target, buf);
3717 }
3718 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003719 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 ctxt->instate = state;
3723 }
3724}
3725
3726/**
3727 * xmlParseNotationDecl:
3728 * @ctxt: an XML parser context
3729 *
3730 * parse a notation declaration
3731 *
3732 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3733 *
3734 * Hence there is actually 3 choices:
3735 * 'PUBLIC' S PubidLiteral
3736 * 'PUBLIC' S PubidLiteral S SystemLiteral
3737 * and 'SYSTEM' S SystemLiteral
3738 *
3739 * See the NOTE on xmlParseExternalID().
3740 */
3741
3742void
3743xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003744 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003745 xmlChar *Pubid;
3746 xmlChar *Systemid;
3747
Daniel Veillarda07050d2003-10-19 14:46:32 +00003748 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003749 xmlParserInputPtr input = ctxt->input;
3750 SHRINK;
3751 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3754 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return;
3756 }
3757 SKIP_BLANKS;
3758
Daniel Veillard76d66f42001-05-16 21:05:17 +00003759 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003760 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003761 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003762 return;
3763 }
William M. Brack76e95df2003-10-18 16:20:14 +00003764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003766 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003767 return;
3768 }
3769 SKIP_BLANKS;
3770
3771 /*
3772 * Parse the IDs.
3773 */
3774 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3775 SKIP_BLANKS;
3776
3777 if (RAW == '>') {
3778 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003779 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3780 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 NEXT;
3783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3784 (ctxt->sax->notationDecl != NULL))
3785 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3786 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003787 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003788 }
Owen Taylor3473f882001-02-23 17:55:21 +00003789 if (Systemid != NULL) xmlFree(Systemid);
3790 if (Pubid != NULL) xmlFree(Pubid);
3791 }
3792}
3793
3794/**
3795 * xmlParseEntityDecl:
3796 * @ctxt: an XML parser context
3797 *
3798 * parse <!ENTITY declarations
3799 *
3800 * [70] EntityDecl ::= GEDecl | PEDecl
3801 *
3802 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3803 *
3804 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3805 *
3806 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3807 *
3808 * [74] PEDef ::= EntityValue | ExternalID
3809 *
3810 * [76] NDataDecl ::= S 'NDATA' S Name
3811 *
3812 * [ VC: Notation Declared ]
3813 * The Name must match the declared name of a notation.
3814 */
3815
3816void
3817xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003818 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 xmlChar *value = NULL;
3820 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003821 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003822 int isParameter = 0;
3823 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003824 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003825
3826 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003827 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003828 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003829 SHRINK;
3830 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003831 skipped = SKIP_BLANKS;
3832 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003833 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3834 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003835 }
Owen Taylor3473f882001-02-23 17:55:21 +00003836
3837 if (RAW == '%') {
3838 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003839 skipped = SKIP_BLANKS;
3840 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3842 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003843 }
Owen Taylor3473f882001-02-23 17:55:21 +00003844 isParameter = 1;
3845 }
3846
Daniel Veillard76d66f42001-05-16 21:05:17 +00003847 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003848 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003849 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3850 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003851 return;
3852 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003853 skipped = SKIP_BLANKS;
3854 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3856 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003857 }
Owen Taylor3473f882001-02-23 17:55:21 +00003858
Daniel Veillardf5582f12002-06-11 10:08:16 +00003859 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003860 /*
3861 * handle the various case of definitions...
3862 */
3863 if (isParameter) {
3864 if ((RAW == '"') || (RAW == '\'')) {
3865 value = xmlParseEntityValue(ctxt, &orig);
3866 if (value) {
3867 if ((ctxt->sax != NULL) &&
3868 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3869 ctxt->sax->entityDecl(ctxt->userData, name,
3870 XML_INTERNAL_PARAMETER_ENTITY,
3871 NULL, NULL, value);
3872 }
3873 } else {
3874 URI = xmlParseExternalID(ctxt, &literal, 1);
3875 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003876 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003877 }
3878 if (URI) {
3879 xmlURIPtr uri;
3880
3881 uri = xmlParseURI((const char *) URI);
3882 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003883 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3884 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003885 /*
3886 * This really ought to be a well formedness error
3887 * but the XML Core WG decided otherwise c.f. issue
3888 * E26 of the XML erratas.
3889 */
Owen Taylor3473f882001-02-23 17:55:21 +00003890 } else {
3891 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003892 /*
3893 * Okay this is foolish to block those but not
3894 * invalid URIs.
3895 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003896 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003897 } else {
3898 if ((ctxt->sax != NULL) &&
3899 (!ctxt->disableSAX) &&
3900 (ctxt->sax->entityDecl != NULL))
3901 ctxt->sax->entityDecl(ctxt->userData, name,
3902 XML_EXTERNAL_PARAMETER_ENTITY,
3903 literal, URI, NULL);
3904 }
3905 xmlFreeURI(uri);
3906 }
3907 }
3908 }
3909 } else {
3910 if ((RAW == '"') || (RAW == '\'')) {
3911 value = xmlParseEntityValue(ctxt, &orig);
3912 if ((ctxt->sax != NULL) &&
3913 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3914 ctxt->sax->entityDecl(ctxt->userData, name,
3915 XML_INTERNAL_GENERAL_ENTITY,
3916 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003917 /*
3918 * For expat compatibility in SAX mode.
3919 */
3920 if ((ctxt->myDoc == NULL) ||
3921 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3922 if (ctxt->myDoc == NULL) {
3923 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3924 }
3925 if (ctxt->myDoc->intSubset == NULL)
3926 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3927 BAD_CAST "fake", NULL, NULL);
3928
Daniel Veillard1af9a412003-08-20 22:54:39 +00003929 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3930 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003931 }
Owen Taylor3473f882001-02-23 17:55:21 +00003932 } else {
3933 URI = xmlParseExternalID(ctxt, &literal, 1);
3934 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003935 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 }
3937 if (URI) {
3938 xmlURIPtr uri;
3939
3940 uri = xmlParseURI((const char *)URI);
3941 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003942 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3943 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003944 /*
3945 * This really ought to be a well formedness error
3946 * but the XML Core WG decided otherwise c.f. issue
3947 * E26 of the XML erratas.
3948 */
Owen Taylor3473f882001-02-23 17:55:21 +00003949 } else {
3950 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003951 /*
3952 * Okay this is foolish to block those but not
3953 * invalid URIs.
3954 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003955 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003956 }
3957 xmlFreeURI(uri);
3958 }
3959 }
William M. Brack76e95df2003-10-18 16:20:14 +00003960 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003961 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3962 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003963 }
3964 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003965 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003966 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003967 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3969 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003970 }
3971 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003972 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3974 (ctxt->sax->unparsedEntityDecl != NULL))
3975 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3976 literal, URI, ndata);
3977 } else {
3978 if ((ctxt->sax != NULL) &&
3979 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3980 ctxt->sax->entityDecl(ctxt->userData, name,
3981 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3982 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003983 /*
3984 * For expat compatibility in SAX mode.
3985 * assuming the entity repalcement was asked for
3986 */
3987 if ((ctxt->replaceEntities != 0) &&
3988 ((ctxt->myDoc == NULL) ||
3989 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3990 if (ctxt->myDoc == NULL) {
3991 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3992 }
3993
3994 if (ctxt->myDoc->intSubset == NULL)
3995 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3996 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00003997 xmlSAX2EntityDecl(ctxt, name,
3998 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3999 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004000 }
Owen Taylor3473f882001-02-23 17:55:21 +00004001 }
4002 }
4003 }
4004 SKIP_BLANKS;
4005 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004006 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004007 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004008 } else {
4009 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004010 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4011 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 NEXT;
4014 }
4015 if (orig != NULL) {
4016 /*
4017 * Ugly mechanism to save the raw entity value.
4018 */
4019 xmlEntityPtr cur = NULL;
4020
4021 if (isParameter) {
4022 if ((ctxt->sax != NULL) &&
4023 (ctxt->sax->getParameterEntity != NULL))
4024 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4025 } else {
4026 if ((ctxt->sax != NULL) &&
4027 (ctxt->sax->getEntity != NULL))
4028 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004029 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004030 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004031 }
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
4033 if (cur != NULL) {
4034 if (cur->orig != NULL)
4035 xmlFree(orig);
4036 else
4037 cur->orig = orig;
4038 } else
4039 xmlFree(orig);
4040 }
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (value != NULL) xmlFree(value);
4042 if (URI != NULL) xmlFree(URI);
4043 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045}
4046
4047/**
4048 * xmlParseDefaultDecl:
4049 * @ctxt: an XML parser context
4050 * @value: Receive a possible fixed default value for the attribute
4051 *
4052 * Parse an attribute default declaration
4053 *
4054 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4055 *
4056 * [ VC: Required Attribute ]
4057 * if the default declaration is the keyword #REQUIRED, then the
4058 * attribute must be specified for all elements of the type in the
4059 * attribute-list declaration.
4060 *
4061 * [ VC: Attribute Default Legal ]
4062 * The declared default value must meet the lexical constraints of
4063 * the declared attribute type c.f. xmlValidateAttributeDecl()
4064 *
4065 * [ VC: Fixed Attribute Default ]
4066 * if an attribute has a default value declared with the #FIXED
4067 * keyword, instances of that attribute must match the default value.
4068 *
4069 * [ WFC: No < in Attribute Values ]
4070 * handled in xmlParseAttValue()
4071 *
4072 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4073 * or XML_ATTRIBUTE_FIXED.
4074 */
4075
4076int
4077xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4078 int val;
4079 xmlChar *ret;
4080
4081 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004082 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004083 SKIP(9);
4084 return(XML_ATTRIBUTE_REQUIRED);
4085 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004086 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004087 SKIP(8);
4088 return(XML_ATTRIBUTE_IMPLIED);
4089 }
4090 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004091 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004092 SKIP(6);
4093 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004094 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004095 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4096 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004097 }
4098 SKIP_BLANKS;
4099 }
4100 ret = xmlParseAttValue(ctxt);
4101 ctxt->instate = XML_PARSER_DTD;
4102 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004103 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004104 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004105 } else
4106 *value = ret;
4107 return(val);
4108}
4109
4110/**
4111 * xmlParseNotationType:
4112 * @ctxt: an XML parser context
4113 *
4114 * parse an Notation attribute type.
4115 *
4116 * Note: the leading 'NOTATION' S part has already being parsed...
4117 *
4118 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4119 *
4120 * [ VC: Notation Attributes ]
4121 * Values of this type must match one of the notation names included
4122 * in the declaration; all notation names in the declaration must be declared.
4123 *
4124 * Returns: the notation attribute tree built while parsing
4125 */
4126
4127xmlEnumerationPtr
4128xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004129 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4131
4132 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004133 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004134 return(NULL);
4135 }
4136 SHRINK;
4137 do {
4138 NEXT;
4139 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004140 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004141 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004142 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4143 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004144 return(ret);
4145 }
4146 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004147 if (cur == NULL) return(ret);
4148 if (last == NULL) ret = last = cur;
4149 else {
4150 last->next = cur;
4151 last = cur;
4152 }
4153 SKIP_BLANKS;
4154 } while (RAW == '|');
4155 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004156 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 if ((last != NULL) && (last != ret))
4158 xmlFreeEnumeration(last);
4159 return(ret);
4160 }
4161 NEXT;
4162 return(ret);
4163}
4164
4165/**
4166 * xmlParseEnumerationType:
4167 * @ctxt: an XML parser context
4168 *
4169 * parse an Enumeration attribute type.
4170 *
4171 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4172 *
4173 * [ VC: Enumeration ]
4174 * Values of this type must match one of the Nmtoken tokens in
4175 * the declaration
4176 *
4177 * Returns: the enumeration attribute tree built while parsing
4178 */
4179
4180xmlEnumerationPtr
4181xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4182 xmlChar *name;
4183 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4184
4185 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 return(NULL);
4188 }
4189 SHRINK;
4190 do {
4191 NEXT;
4192 SKIP_BLANKS;
4193 name = xmlParseNmtoken(ctxt);
4194 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004195 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004196 return(ret);
4197 }
4198 cur = xmlCreateEnumeration(name);
4199 xmlFree(name);
4200 if (cur == NULL) return(ret);
4201 if (last == NULL) ret = last = cur;
4202 else {
4203 last->next = cur;
4204 last = cur;
4205 }
4206 SKIP_BLANKS;
4207 } while (RAW == '|');
4208 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004209 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 return(ret);
4211 }
4212 NEXT;
4213 return(ret);
4214}
4215
4216/**
4217 * xmlParseEnumeratedType:
4218 * @ctxt: an XML parser context
4219 * @tree: the enumeration tree built while parsing
4220 *
4221 * parse an Enumerated attribute type.
4222 *
4223 * [57] EnumeratedType ::= NotationType | Enumeration
4224 *
4225 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4226 *
4227 *
4228 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4229 */
4230
4231int
4232xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004233 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004234 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004235 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004236 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4237 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004238 return(0);
4239 }
4240 SKIP_BLANKS;
4241 *tree = xmlParseNotationType(ctxt);
4242 if (*tree == NULL) return(0);
4243 return(XML_ATTRIBUTE_NOTATION);
4244 }
4245 *tree = xmlParseEnumerationType(ctxt);
4246 if (*tree == NULL) return(0);
4247 return(XML_ATTRIBUTE_ENUMERATION);
4248}
4249
4250/**
4251 * xmlParseAttributeType:
4252 * @ctxt: an XML parser context
4253 * @tree: the enumeration tree built while parsing
4254 *
4255 * parse the Attribute list def for an element
4256 *
4257 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4258 *
4259 * [55] StringType ::= 'CDATA'
4260 *
4261 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4262 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4263 *
4264 * Validity constraints for attribute values syntax are checked in
4265 * xmlValidateAttributeValue()
4266 *
4267 * [ VC: ID ]
4268 * Values of type ID must match the Name production. A name must not
4269 * appear more than once in an XML document as a value of this type;
4270 * i.e., ID values must uniquely identify the elements which bear them.
4271 *
4272 * [ VC: One ID per Element Type ]
4273 * No element type may have more than one ID attribute specified.
4274 *
4275 * [ VC: ID Attribute Default ]
4276 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4277 *
4278 * [ VC: IDREF ]
4279 * Values of type IDREF must match the Name production, and values
4280 * of type IDREFS must match Names; each IDREF Name must match the value
4281 * of an ID attribute on some element in the XML document; i.e. IDREF
4282 * values must match the value of some ID attribute.
4283 *
4284 * [ VC: Entity Name ]
4285 * Values of type ENTITY must match the Name production, values
4286 * of type ENTITIES must match Names; each Entity Name must match the
4287 * name of an unparsed entity declared in the DTD.
4288 *
4289 * [ VC: Name Token ]
4290 * Values of type NMTOKEN must match the Nmtoken production; values
4291 * of type NMTOKENS must match Nmtokens.
4292 *
4293 * Returns the attribute type
4294 */
4295int
4296xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4297 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004298 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004299 SKIP(5);
4300 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004301 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004302 SKIP(6);
4303 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004304 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004305 SKIP(5);
4306 return(XML_ATTRIBUTE_IDREF);
4307 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4308 SKIP(2);
4309 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004310 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004311 SKIP(6);
4312 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(8);
4315 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004316 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004317 SKIP(8);
4318 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004319 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004320 SKIP(7);
4321 return(XML_ATTRIBUTE_NMTOKEN);
4322 }
4323 return(xmlParseEnumeratedType(ctxt, tree));
4324}
4325
4326/**
4327 * xmlParseAttributeListDecl:
4328 * @ctxt: an XML parser context
4329 *
4330 * : parse the Attribute list def for an element
4331 *
4332 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4333 *
4334 * [53] AttDef ::= S Name S AttType S DefaultDecl
4335 *
4336 */
4337void
4338xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004339 const xmlChar *elemName;
4340 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004341 xmlEnumerationPtr tree;
4342
Daniel Veillarda07050d2003-10-19 14:46:32 +00004343 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004344 xmlParserInputPtr input = ctxt->input;
4345
4346 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004347 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004349 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004350 }
4351 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004352 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004353 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004354 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4355 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004356 return;
4357 }
4358 SKIP_BLANKS;
4359 GROW;
4360 while (RAW != '>') {
4361 const xmlChar *check = CUR_PTR;
4362 int type;
4363 int def;
4364 xmlChar *defaultValue = NULL;
4365
4366 GROW;
4367 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004368 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004369 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004370 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4371 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004372 break;
4373 }
4374 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004377 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004378 if (defaultValue != NULL)
4379 xmlFree(defaultValue);
4380 break;
4381 }
4382 SKIP_BLANKS;
4383
4384 type = xmlParseAttributeType(ctxt, &tree);
4385 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004386 if (defaultValue != NULL)
4387 xmlFree(defaultValue);
4388 break;
4389 }
4390
4391 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004392 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004393 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4394 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004395 if (defaultValue != NULL)
4396 xmlFree(defaultValue);
4397 if (tree != NULL)
4398 xmlFreeEnumeration(tree);
4399 break;
4400 }
4401 SKIP_BLANKS;
4402
4403 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4404 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004405 if (defaultValue != NULL)
4406 xmlFree(defaultValue);
4407 if (tree != NULL)
4408 xmlFreeEnumeration(tree);
4409 break;
4410 }
4411
4412 GROW;
4413 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004414 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004416 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004417 if (defaultValue != NULL)
4418 xmlFree(defaultValue);
4419 if (tree != NULL)
4420 xmlFreeEnumeration(tree);
4421 break;
4422 }
4423 SKIP_BLANKS;
4424 }
4425 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004426 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4427 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004428 if (defaultValue != NULL)
4429 xmlFree(defaultValue);
4430 if (tree != NULL)
4431 xmlFreeEnumeration(tree);
4432 break;
4433 }
4434 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4435 (ctxt->sax->attributeDecl != NULL))
4436 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4437 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004438 else if (tree != NULL)
4439 xmlFreeEnumeration(tree);
4440
4441 if ((ctxt->sax2) && (defaultValue != NULL) &&
4442 (def != XML_ATTRIBUTE_IMPLIED) &&
4443 (def != XML_ATTRIBUTE_REQUIRED)) {
4444 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4445 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004446 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4447 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4448 }
Owen Taylor3473f882001-02-23 17:55:21 +00004449 if (defaultValue != NULL)
4450 xmlFree(defaultValue);
4451 GROW;
4452 }
4453 if (RAW == '>') {
4454 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004455 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4456 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
4458 NEXT;
4459 }
Owen Taylor3473f882001-02-23 17:55:21 +00004460 }
4461}
4462
4463/**
4464 * xmlParseElementMixedContentDecl:
4465 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004466 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004467 *
4468 * parse the declaration for a Mixed Element content
4469 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4470 *
4471 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4472 * '(' S? '#PCDATA' S? ')'
4473 *
4474 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4475 *
4476 * [ VC: No Duplicate Types ]
4477 * The same name must not appear more than once in a single
4478 * mixed-content declaration.
4479 *
4480 * returns: the list of the xmlElementContentPtr describing the element choices
4481 */
4482xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004483xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004484 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004485 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004486
4487 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004488 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004489 SKIP(7);
4490 SKIP_BLANKS;
4491 SHRINK;
4492 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004493 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004494 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4495"Element content declaration doesn't start and stop in the same entity\n",
4496 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004497 }
Owen Taylor3473f882001-02-23 17:55:21 +00004498 NEXT;
4499 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4500 if (RAW == '*') {
4501 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4502 NEXT;
4503 }
4504 return(ret);
4505 }
4506 if ((RAW == '(') || (RAW == '|')) {
4507 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4508 if (ret == NULL) return(NULL);
4509 }
4510 while (RAW == '|') {
4511 NEXT;
4512 if (elem == NULL) {
4513 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4514 if (ret == NULL) return(NULL);
4515 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004516 if (cur != NULL)
4517 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004518 cur = ret;
4519 } else {
4520 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4521 if (n == NULL) return(NULL);
4522 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004523 if (n->c1 != NULL)
4524 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004525 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004526 if (n != NULL)
4527 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004528 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004529 }
4530 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004531 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004532 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004533 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004534 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004535 xmlFreeElementContent(cur);
4536 return(NULL);
4537 }
4538 SKIP_BLANKS;
4539 GROW;
4540 }
4541 if ((RAW == ')') && (NXT(1) == '*')) {
4542 if (elem != NULL) {
4543 cur->c2 = xmlNewElementContent(elem,
4544 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004545 if (cur->c2 != NULL)
4546 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004547 }
4548 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004549 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004550 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4551"Element content declaration doesn't start and stop in the same entity\n",
4552 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004553 }
Owen Taylor3473f882001-02-23 17:55:21 +00004554 SKIP(2);
4555 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004556 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004557 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 return(NULL);
4559 }
4560
4561 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004562 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004563 }
4564 return(ret);
4565}
4566
4567/**
4568 * xmlParseElementChildrenContentDecl:
4569 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004570 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004571 *
4572 * parse the declaration for a Mixed Element content
4573 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4574 *
4575 *
4576 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4577 *
4578 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4579 *
4580 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4581 *
4582 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4583 *
4584 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4585 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004586 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004587 * opening or closing parentheses in a choice, seq, or Mixed
4588 * construct is contained in the replacement text for a parameter
4589 * entity, both must be contained in the same replacement text. For
4590 * interoperability, if a parameter-entity reference appears in a
4591 * choice, seq, or Mixed construct, its replacement text should not
4592 * be empty, and neither the first nor last non-blank character of
4593 * the replacement text should be a connector (| or ,).
4594 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004595 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004596 * hierarchy.
4597 */
4598xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004599xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004600 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004601 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004602 xmlChar type = 0;
4603
4604 SKIP_BLANKS;
4605 GROW;
4606 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004607 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004608
Owen Taylor3473f882001-02-23 17:55:21 +00004609 /* Recurse on first child */
4610 NEXT;
4611 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004612 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004613 SKIP_BLANKS;
4614 GROW;
4615 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004616 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004617 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004618 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004619 return(NULL);
4620 }
4621 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004622 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004623 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004624 return(NULL);
4625 }
Owen Taylor3473f882001-02-23 17:55:21 +00004626 GROW;
4627 if (RAW == '?') {
4628 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4629 NEXT;
4630 } else if (RAW == '*') {
4631 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4632 NEXT;
4633 } else if (RAW == '+') {
4634 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4635 NEXT;
4636 } else {
4637 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4638 }
Owen Taylor3473f882001-02-23 17:55:21 +00004639 GROW;
4640 }
4641 SKIP_BLANKS;
4642 SHRINK;
4643 while (RAW != ')') {
4644 /*
4645 * Each loop we parse one separator and one element.
4646 */
4647 if (RAW == ',') {
4648 if (type == 0) type = CUR;
4649
4650 /*
4651 * Detect "Name | Name , Name" error
4652 */
4653 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004654 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004655 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004656 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004657 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004658 xmlFreeElementContent(last);
4659 if (ret != NULL)
4660 xmlFreeElementContent(ret);
4661 return(NULL);
4662 }
4663 NEXT;
4664
4665 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4666 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004667 if ((last != NULL) && (last != ret))
4668 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004669 xmlFreeElementContent(ret);
4670 return(NULL);
4671 }
4672 if (last == NULL) {
4673 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004674 if (ret != NULL)
4675 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 ret = cur = op;
4677 } else {
4678 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (op != NULL)
4680 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004682 if (last != NULL)
4683 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 cur =op;
4685 last = NULL;
4686 }
4687 } else if (RAW == '|') {
4688 if (type == 0) type = CUR;
4689
4690 /*
4691 * Detect "Name , Name | Name" error
4692 */
4693 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004695 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004696 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004697 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004698 xmlFreeElementContent(last);
4699 if (ret != NULL)
4700 xmlFreeElementContent(ret);
4701 return(NULL);
4702 }
4703 NEXT;
4704
4705 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4706 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004707 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004708 xmlFreeElementContent(last);
4709 if (ret != NULL)
4710 xmlFreeElementContent(ret);
4711 return(NULL);
4712 }
4713 if (last == NULL) {
4714 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004715 if (ret != NULL)
4716 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 ret = cur = op;
4718 } else {
4719 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004720 if (op != NULL)
4721 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004722 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004723 if (last != NULL)
4724 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004725 cur =op;
4726 last = NULL;
4727 }
4728 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004729 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004730 if (ret != NULL)
4731 xmlFreeElementContent(ret);
4732 return(NULL);
4733 }
4734 GROW;
4735 SKIP_BLANKS;
4736 GROW;
4737 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004738 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004739 /* Recurse on second child */
4740 NEXT;
4741 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004742 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 SKIP_BLANKS;
4744 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004745 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004746 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004747 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004748 if (ret != NULL)
4749 xmlFreeElementContent(ret);
4750 return(NULL);
4751 }
4752 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004753 if (RAW == '?') {
4754 last->ocur = XML_ELEMENT_CONTENT_OPT;
4755 NEXT;
4756 } else if (RAW == '*') {
4757 last->ocur = XML_ELEMENT_CONTENT_MULT;
4758 NEXT;
4759 } else if (RAW == '+') {
4760 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4761 NEXT;
4762 } else {
4763 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4764 }
4765 }
4766 SKIP_BLANKS;
4767 GROW;
4768 }
4769 if ((cur != NULL) && (last != NULL)) {
4770 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004771 if (last != NULL)
4772 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004773 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004774 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004775 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4776"Element content declaration doesn't start and stop in the same entity\n",
4777 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 NEXT;
4780 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004781 if (ret != NULL)
4782 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004783 NEXT;
4784 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004785 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004786 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004787 cur = ret;
4788 /*
4789 * Some normalization:
4790 * (a | b* | c?)* == (a | b | c)*
4791 */
4792 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4793 if ((cur->c1 != NULL) &&
4794 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4795 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4796 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4797 if ((cur->c2 != NULL) &&
4798 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4799 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4800 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4801 cur = cur->c2;
4802 }
4803 }
Owen Taylor3473f882001-02-23 17:55:21 +00004804 NEXT;
4805 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004806 if (ret != NULL) {
4807 int found = 0;
4808
Daniel Veillarde470df72001-04-18 21:41:07 +00004809 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004810 /*
4811 * Some normalization:
4812 * (a | b*)+ == (a | b)*
4813 * (a | b?)+ == (a | b)*
4814 */
4815 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4816 if ((cur->c1 != NULL) &&
4817 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4818 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4819 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4820 found = 1;
4821 }
4822 if ((cur->c2 != NULL) &&
4823 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4824 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4825 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4826 found = 1;
4827 }
4828 cur = cur->c2;
4829 }
4830 if (found)
4831 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4832 }
Owen Taylor3473f882001-02-23 17:55:21 +00004833 NEXT;
4834 }
4835 return(ret);
4836}
4837
4838/**
4839 * xmlParseElementContentDecl:
4840 * @ctxt: an XML parser context
4841 * @name: the name of the element being defined.
4842 * @result: the Element Content pointer will be stored here if any
4843 *
4844 * parse the declaration for an Element content either Mixed or Children,
4845 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4846 *
4847 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4848 *
4849 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4850 */
4851
4852int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004853xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004854 xmlElementContentPtr *result) {
4855
4856 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004858 int res;
4859
4860 *result = NULL;
4861
4862 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004863 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004864 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004865 return(-1);
4866 }
4867 NEXT;
4868 GROW;
4869 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004870 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004871 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004872 res = XML_ELEMENT_TYPE_MIXED;
4873 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004874 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004875 res = XML_ELEMENT_TYPE_ELEMENT;
4876 }
Owen Taylor3473f882001-02-23 17:55:21 +00004877 SKIP_BLANKS;
4878 *result = tree;
4879 return(res);
4880}
4881
4882/**
4883 * xmlParseElementDecl:
4884 * @ctxt: an XML parser context
4885 *
4886 * parse an Element declaration.
4887 *
4888 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4889 *
4890 * [ VC: Unique Element Type Declaration ]
4891 * No element type may be declared more than once
4892 *
4893 * Returns the type of the element, or -1 in case of error
4894 */
4895int
4896xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004897 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004898 int ret = -1;
4899 xmlElementContentPtr content = NULL;
4900
4901 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004902 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004903 xmlParserInputPtr input = ctxt->input;
4904
4905 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004906 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004907 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4908 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004909 }
4910 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004911 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004912 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004913 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4914 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004915 return(-1);
4916 }
4917 while ((RAW == 0) && (ctxt->inputNr > 1))
4918 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4921 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 }
4923 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004924 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004925 SKIP(5);
4926 /*
4927 * Element must always be empty.
4928 */
4929 ret = XML_ELEMENT_TYPE_EMPTY;
4930 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4931 (NXT(2) == 'Y')) {
4932 SKIP(3);
4933 /*
4934 * Element is a generic container.
4935 */
4936 ret = XML_ELEMENT_TYPE_ANY;
4937 } else if (RAW == '(') {
4938 ret = xmlParseElementContentDecl(ctxt, name, &content);
4939 } else {
4940 /*
4941 * [ WFC: PEs in Internal Subset ] error handling.
4942 */
4943 if ((RAW == '%') && (ctxt->external == 0) &&
4944 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004945 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004947 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004948 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004949 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4950 }
Owen Taylor3473f882001-02-23 17:55:21 +00004951 return(-1);
4952 }
4953
4954 SKIP_BLANKS;
4955 /*
4956 * Pop-up of finished entities.
4957 */
4958 while ((RAW == 0) && (ctxt->inputNr > 1))
4959 xmlPopInput(ctxt);
4960 SKIP_BLANKS;
4961
4962 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004963 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004964 } else {
4965 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004966 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4967 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004968 }
4969
4970 NEXT;
4971 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4972 (ctxt->sax->elementDecl != NULL))
4973 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4974 content);
4975 }
4976 if (content != NULL) {
4977 xmlFreeElementContent(content);
4978 }
Owen Taylor3473f882001-02-23 17:55:21 +00004979 }
4980 return(ret);
4981}
4982
4983/**
Owen Taylor3473f882001-02-23 17:55:21 +00004984 * xmlParseConditionalSections
4985 * @ctxt: an XML parser context
4986 *
4987 * [61] conditionalSect ::= includeSect | ignoreSect
4988 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4989 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4990 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4991 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4992 */
4993
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004994static void
Owen Taylor3473f882001-02-23 17:55:21 +00004995xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4996 SKIP(3);
4997 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004998 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004999 SKIP(7);
5000 SKIP_BLANKS;
5001 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005002 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005003 } else {
5004 NEXT;
5005 }
5006 if (xmlParserDebugEntities) {
5007 if ((ctxt->input != NULL) && (ctxt->input->filename))
5008 xmlGenericError(xmlGenericErrorContext,
5009 "%s(%d): ", ctxt->input->filename,
5010 ctxt->input->line);
5011 xmlGenericError(xmlGenericErrorContext,
5012 "Entering INCLUDE Conditional Section\n");
5013 }
5014
5015 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5016 (NXT(2) != '>'))) {
5017 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005018 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005019
5020 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5021 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005022 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005023 NEXT;
5024 } else if (RAW == '%') {
5025 xmlParsePEReference(ctxt);
5026 } else
5027 xmlParseMarkupDecl(ctxt);
5028
5029 /*
5030 * Pop-up of finished entities.
5031 */
5032 while ((RAW == 0) && (ctxt->inputNr > 1))
5033 xmlPopInput(ctxt);
5034
Daniel Veillardfdc91562002-07-01 21:52:03 +00005035 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005036 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005037 break;
5038 }
5039 }
5040 if (xmlParserDebugEntities) {
5041 if ((ctxt->input != NULL) && (ctxt->input->filename))
5042 xmlGenericError(xmlGenericErrorContext,
5043 "%s(%d): ", ctxt->input->filename,
5044 ctxt->input->line);
5045 xmlGenericError(xmlGenericErrorContext,
5046 "Leaving INCLUDE Conditional Section\n");
5047 }
5048
Daniel Veillarda07050d2003-10-19 14:46:32 +00005049 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005050 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005051 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005052 int depth = 0;
5053
5054 SKIP(6);
5055 SKIP_BLANKS;
5056 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005057 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005058 } else {
5059 NEXT;
5060 }
5061 if (xmlParserDebugEntities) {
5062 if ((ctxt->input != NULL) && (ctxt->input->filename))
5063 xmlGenericError(xmlGenericErrorContext,
5064 "%s(%d): ", ctxt->input->filename,
5065 ctxt->input->line);
5066 xmlGenericError(xmlGenericErrorContext,
5067 "Entering IGNORE Conditional Section\n");
5068 }
5069
5070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005071 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005072 * But disable SAX event generating DTD building in the meantime
5073 */
5074 state = ctxt->disableSAX;
5075 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005077 ctxt->instate = XML_PARSER_IGNORE;
5078
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005079 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005080 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5081 depth++;
5082 SKIP(3);
5083 continue;
5084 }
5085 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5086 if (--depth >= 0) SKIP(3);
5087 continue;
5088 }
5089 NEXT;
5090 continue;
5091 }
5092
5093 ctxt->disableSAX = state;
5094 ctxt->instate = instate;
5095
5096 if (xmlParserDebugEntities) {
5097 if ((ctxt->input != NULL) && (ctxt->input->filename))
5098 xmlGenericError(xmlGenericErrorContext,
5099 "%s(%d): ", ctxt->input->filename,
5100 ctxt->input->line);
5101 xmlGenericError(xmlGenericErrorContext,
5102 "Leaving IGNORE Conditional Section\n");
5103 }
5104
5105 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005106 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005107 }
5108
5109 if (RAW == 0)
5110 SHRINK;
5111
5112 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005113 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005114 } else {
5115 SKIP(3);
5116 }
5117}
5118
5119/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005120 * xmlParseMarkupDecl:
5121 * @ctxt: an XML parser context
5122 *
5123 * parse Markup declarations
5124 *
5125 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5126 * NotationDecl | PI | Comment
5127 *
5128 * [ VC: Proper Declaration/PE Nesting ]
5129 * Parameter-entity replacement text must be properly nested with
5130 * markup declarations. That is to say, if either the first character
5131 * or the last character of a markup declaration (markupdecl above) is
5132 * contained in the replacement text for a parameter-entity reference,
5133 * both must be contained in the same replacement text.
5134 *
5135 * [ WFC: PEs in Internal Subset ]
5136 * In the internal DTD subset, parameter-entity references can occur
5137 * only where markup declarations can occur, not within markup declarations.
5138 * (This does not apply to references that occur in external parameter
5139 * entities or to the external subset.)
5140 */
5141void
5142xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5143 GROW;
5144 xmlParseElementDecl(ctxt);
5145 xmlParseAttributeListDecl(ctxt);
5146 xmlParseEntityDecl(ctxt);
5147 xmlParseNotationDecl(ctxt);
5148 xmlParsePI(ctxt);
5149 xmlParseComment(ctxt);
5150 /*
5151 * This is only for internal subset. On external entities,
5152 * the replacement is done before parsing stage
5153 */
5154 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5155 xmlParsePEReference(ctxt);
5156
5157 /*
5158 * Conditional sections are allowed from entities included
5159 * by PE References in the internal subset.
5160 */
5161 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5162 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5163 xmlParseConditionalSections(ctxt);
5164 }
5165 }
5166
5167 ctxt->instate = XML_PARSER_DTD;
5168}
5169
5170/**
5171 * xmlParseTextDecl:
5172 * @ctxt: an XML parser context
5173 *
5174 * parse an XML declaration header for external entities
5175 *
5176 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5177 *
5178 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5179 */
5180
5181void
5182xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5183 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005184 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005185
5186 /*
5187 * We know that '<?xml' is here.
5188 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005189 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005190 SKIP(5);
5191 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005192 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005193 return;
5194 }
5195
William M. Brack76e95df2003-10-18 16:20:14 +00005196 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005197 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5198 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005199 }
5200 SKIP_BLANKS;
5201
5202 /*
5203 * We may have the VersionInfo here.
5204 */
5205 version = xmlParseVersionInfo(ctxt);
5206 if (version == NULL)
5207 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005208 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005209 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5211 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005212 }
5213 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005214 ctxt->input->version = version;
5215
5216 /*
5217 * We must have the encoding declaration
5218 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005219 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5221 /*
5222 * The XML REC instructs us to stop parsing right here
5223 */
5224 return;
5225 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005226 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5227 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5228 "Missing encoding in text declaration\n");
5229 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005230
5231 SKIP_BLANKS;
5232 if ((RAW == '?') && (NXT(1) == '>')) {
5233 SKIP(2);
5234 } else if (RAW == '>') {
5235 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005236 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005237 NEXT;
5238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005239 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005240 MOVETO_ENDTAG(CUR_PTR);
5241 NEXT;
5242 }
5243}
5244
5245/**
Owen Taylor3473f882001-02-23 17:55:21 +00005246 * xmlParseExternalSubset:
5247 * @ctxt: an XML parser context
5248 * @ExternalID: the external identifier
5249 * @SystemID: the system identifier (or URL)
5250 *
5251 * parse Markup declarations from an external subset
5252 *
5253 * [30] extSubset ::= textDecl? extSubsetDecl
5254 *
5255 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5256 */
5257void
5258xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5259 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005260 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005261 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005262 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005263 xmlParseTextDecl(ctxt);
5264 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5265 /*
5266 * The XML REC instructs us to stop parsing right here
5267 */
5268 ctxt->instate = XML_PARSER_EOF;
5269 return;
5270 }
5271 }
5272 if (ctxt->myDoc == NULL) {
5273 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5274 }
5275 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5276 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5277
5278 ctxt->instate = XML_PARSER_DTD;
5279 ctxt->external = 1;
5280 while (((RAW == '<') && (NXT(1) == '?')) ||
5281 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005282 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005283 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005284 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005285
5286 GROW;
5287 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5288 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005289 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005290 NEXT;
5291 } else if (RAW == '%') {
5292 xmlParsePEReference(ctxt);
5293 } else
5294 xmlParseMarkupDecl(ctxt);
5295
5296 /*
5297 * Pop-up of finished entities.
5298 */
5299 while ((RAW == 0) && (ctxt->inputNr > 1))
5300 xmlPopInput(ctxt);
5301
Daniel Veillardfdc91562002-07-01 21:52:03 +00005302 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005303 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005304 break;
5305 }
5306 }
5307
5308 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005309 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 }
5311
5312}
5313
5314/**
5315 * xmlParseReference:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse and handle entity references in content, depending on the SAX
5319 * interface, this may end-up in a call to character() if this is a
5320 * CharRef, a predefined entity, if there is no reference() callback.
5321 * or if the parser was asked to switch to that mode.
5322 *
5323 * [67] Reference ::= EntityRef | CharRef
5324 */
5325void
5326xmlParseReference(xmlParserCtxtPtr ctxt) {
5327 xmlEntityPtr ent;
5328 xmlChar *val;
5329 if (RAW != '&') return;
5330
5331 if (NXT(1) == '#') {
5332 int i = 0;
5333 xmlChar out[10];
5334 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005335 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005336
5337 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5338 /*
5339 * So we are using non-UTF-8 buffers
5340 * Check that the char fit on 8bits, if not
5341 * generate a CharRef.
5342 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005343 if (value <= 0xFF) {
5344 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005345 out[1] = 0;
5346 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5347 (!ctxt->disableSAX))
5348 ctxt->sax->characters(ctxt->userData, out, 1);
5349 } else {
5350 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005351 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005352 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005353 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005354 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5355 (!ctxt->disableSAX))
5356 ctxt->sax->reference(ctxt->userData, out);
5357 }
5358 } else {
5359 /*
5360 * Just encode the value in UTF-8
5361 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005362 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 out[i] = 0;
5364 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5365 (!ctxt->disableSAX))
5366 ctxt->sax->characters(ctxt->userData, out, i);
5367 }
5368 } else {
5369 ent = xmlParseEntityRef(ctxt);
5370 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005371 if (!ctxt->wellFormed)
5372 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005373 if ((ent->name != NULL) &&
5374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5375 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005376 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005377
5378
5379 /*
5380 * The first reference to the entity trigger a parsing phase
5381 * where the ent->children is filled with the result from
5382 * the parsing.
5383 */
5384 if (ent->children == NULL) {
5385 xmlChar *value;
5386 value = ent->content;
5387
5388 /*
5389 * Check that this entity is well formed
5390 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005391 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005392 (value[1] == 0) && (value[0] == '<') &&
5393 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5394 /*
5395 * DONE: get definite answer on this !!!
5396 * Lots of entity decls are used to declare a single
5397 * char
5398 * <!ENTITY lt "<">
5399 * Which seems to be valid since
5400 * 2.4: The ampersand character (&) and the left angle
5401 * bracket (<) may appear in their literal form only
5402 * when used ... They are also legal within the literal
5403 * entity value of an internal entity declaration;i
5404 * see "4.3.2 Well-Formed Parsed Entities".
5405 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5406 * Looking at the OASIS test suite and James Clark
5407 * tests, this is broken. However the XML REC uses
5408 * it. Is the XML REC not well-formed ????
5409 * This is a hack to avoid this problem
5410 *
5411 * ANSWER: since lt gt amp .. are already defined,
5412 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005413 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005414 * is lousy but acceptable.
5415 */
5416 list = xmlNewDocText(ctxt->myDoc, value);
5417 if (list != NULL) {
5418 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5419 (ent->children == NULL)) {
5420 ent->children = list;
5421 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005422 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005423 list->parent = (xmlNodePtr) ent;
5424 } else {
5425 xmlFreeNodeList(list);
5426 }
5427 } else if (list != NULL) {
5428 xmlFreeNodeList(list);
5429 }
5430 } else {
5431 /*
5432 * 4.3.2: An internal general parsed entity is well-formed
5433 * if its replacement text matches the production labeled
5434 * content.
5435 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005436
5437 void *user_data;
5438 /*
5439 * This is a bit hackish but this seems the best
5440 * way to make sure both SAX and DOM entity support
5441 * behaves okay.
5442 */
5443 if (ctxt->userData == ctxt)
5444 user_data = NULL;
5445 else
5446 user_data = ctxt->userData;
5447
Owen Taylor3473f882001-02-23 17:55:21 +00005448 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5449 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005450 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5451 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005452 ctxt->depth--;
5453 } else if (ent->etype ==
5454 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5455 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005456 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005457 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005458 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 ctxt->depth--;
5460 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005461 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005462 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5463 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 }
5465 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005466 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005467 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005468 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005469 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5470 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005471 (ent->children == NULL)) {
5472 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005473 if (ctxt->replaceEntities) {
5474 /*
5475 * Prune it directly in the generated document
5476 * except for single text nodes.
5477 */
5478 if ((list->type == XML_TEXT_NODE) &&
5479 (list->next == NULL)) {
5480 list->parent = (xmlNodePtr) ent;
5481 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005482 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005483 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005484 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005485 while (list != NULL) {
5486 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005487 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005488 if (list->next == NULL)
5489 ent->last = list;
5490 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005491 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005492 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005493#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005494 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5495 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005496#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005497 }
5498 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005499 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005500 while (list != NULL) {
5501 list->parent = (xmlNodePtr) ent;
5502 if (list->next == NULL)
5503 ent->last = list;
5504 list = list->next;
5505 }
Owen Taylor3473f882001-02-23 17:55:21 +00005506 }
5507 } else {
5508 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005509 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005510 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005511 } else if ((ret != XML_ERR_OK) &&
5512 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005513 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else if (list != NULL) {
5515 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005516 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005517 }
5518 }
5519 }
5520 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5521 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5522 /*
5523 * Create a node.
5524 */
5525 ctxt->sax->reference(ctxt->userData, ent->name);
5526 return;
5527 } else if (ctxt->replaceEntities) {
5528 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5529 /*
5530 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005531 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005532 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005533 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005534 if ((list == NULL) && (ent->owner == 0)) {
5535 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005536 cur = ent->children;
5537 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005538 nw = xmlCopyNode(cur, 1);
5539 if (nw != NULL) {
5540 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005541 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005542 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005543 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005544 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005545 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 if (cur == ent->last)
5547 break;
5548 cur = cur->next;
5549 }
Daniel Veillard81273902003-09-30 00:43:48 +00005550#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005551 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005552 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005553#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005554 } else if (list == NULL) {
5555 xmlNodePtr nw = NULL, cur, next, last,
5556 firstChild = NULL;
5557 /*
5558 * Copy the entity child list and make it the new
5559 * entity child list. The goal is to make sure any
5560 * ID or REF referenced will be the one from the
5561 * document content and not the entity copy.
5562 */
5563 cur = ent->children;
5564 ent->children = NULL;
5565 last = ent->last;
5566 ent->last = NULL;
5567 while (cur != NULL) {
5568 next = cur->next;
5569 cur->next = NULL;
5570 cur->parent = NULL;
5571 nw = xmlCopyNode(cur, 1);
5572 if (nw != NULL) {
5573 nw->_private = cur->_private;
5574 if (firstChild == NULL){
5575 firstChild = cur;
5576 }
5577 xmlAddChild((xmlNodePtr) ent, nw);
5578 xmlAddChild(ctxt->node, cur);
5579 }
5580 if (cur == last)
5581 break;
5582 cur = next;
5583 }
5584 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005585#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005586 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5587 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005588#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005589 } else {
5590 /*
5591 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005592 * node with a possible previous text one which
5593 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005594 */
5595 if (ent->children->type == XML_TEXT_NODE)
5596 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5597 if ((ent->last != ent->children) &&
5598 (ent->last->type == XML_TEXT_NODE))
5599 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5600 xmlAddChildList(ctxt->node, ent->children);
5601 }
5602
Owen Taylor3473f882001-02-23 17:55:21 +00005603 /*
5604 * This is to avoid a nasty side effect, see
5605 * characters() in SAX.c
5606 */
5607 ctxt->nodemem = 0;
5608 ctxt->nodelen = 0;
5609 return;
5610 } else {
5611 /*
5612 * Probably running in SAX mode
5613 */
5614 xmlParserInputPtr input;
5615
5616 input = xmlNewEntityInputStream(ctxt, ent);
5617 xmlPushInput(ctxt, input);
5618 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005619 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5620 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005621 xmlParseTextDecl(ctxt);
5622 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5623 /*
5624 * The XML REC instructs us to stop parsing right here
5625 */
5626 ctxt->instate = XML_PARSER_EOF;
5627 return;
5628 }
5629 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005630 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5631 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005632 }
5633 }
5634 return;
5635 }
5636 }
5637 } else {
5638 val = ent->content;
5639 if (val == NULL) return;
5640 /*
5641 * inline the entity.
5642 */
5643 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5644 (!ctxt->disableSAX))
5645 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5646 }
5647 }
5648}
5649
5650/**
5651 * xmlParseEntityRef:
5652 * @ctxt: an XML parser context
5653 *
5654 * parse ENTITY references declarations
5655 *
5656 * [68] EntityRef ::= '&' Name ';'
5657 *
5658 * [ WFC: Entity Declared ]
5659 * In a document without any DTD, a document with only an internal DTD
5660 * subset which contains no parameter entity references, or a document
5661 * with "standalone='yes'", the Name given in the entity reference
5662 * must match that in an entity declaration, except that well-formed
5663 * documents need not declare any of the following entities: amp, lt,
5664 * gt, apos, quot. The declaration of a parameter entity must precede
5665 * any reference to it. Similarly, the declaration of a general entity
5666 * must precede any reference to it which appears in a default value in an
5667 * attribute-list declaration. Note that if entities are declared in the
5668 * external subset or in external parameter entities, a non-validating
5669 * processor is not obligated to read and process their declarations;
5670 * for such documents, the rule that an entity must be declared is a
5671 * well-formedness constraint only if standalone='yes'.
5672 *
5673 * [ WFC: Parsed Entity ]
5674 * An entity reference must not contain the name of an unparsed entity
5675 *
5676 * Returns the xmlEntityPtr if found, or NULL otherwise.
5677 */
5678xmlEntityPtr
5679xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005680 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005681 xmlEntityPtr ent = NULL;
5682
5683 GROW;
5684
5685 if (RAW == '&') {
5686 NEXT;
5687 name = xmlParseName(ctxt);
5688 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005689 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5690 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005691 } else {
5692 if (RAW == ';') {
5693 NEXT;
5694 /*
5695 * Ask first SAX for entity resolution, otherwise try the
5696 * predefined set.
5697 */
5698 if (ctxt->sax != NULL) {
5699 if (ctxt->sax->getEntity != NULL)
5700 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005701 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005702 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005703 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5704 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005705 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005706 }
Owen Taylor3473f882001-02-23 17:55:21 +00005707 }
5708 /*
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an
5711 * internal DTD subset which contains no parameter entity
5712 * references, or a document with "standalone='yes'", the
5713 * Name given in the entity reference must match that in an
5714 * entity declaration, except that well-formed documents
5715 * need not declare any of the following entities: amp, lt,
5716 * gt, apos, quot.
5717 * The declaration of a parameter entity must precede any
5718 * reference to it.
5719 * Similarly, the declaration of a general entity must
5720 * precede any reference to it which appears in a default
5721 * value in an attribute-list declaration. Note that if
5722 * entities are declared in the external subset or in
5723 * external parameter entities, a non-validating processor
5724 * is not obligated to read and process their declarations;
5725 * for such documents, the rule that an entity must be
5726 * declared is a well-formedness constraint only if
5727 * standalone='yes'.
5728 */
5729 if (ent == NULL) {
5730 if ((ctxt->standalone == 1) ||
5731 ((ctxt->hasExternalSubset == 0) &&
5732 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005734 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005735 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005736 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005737 "Entity '%s' not defined\n", name);
5738 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005739 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005740 }
5741
5742 /*
5743 * [ WFC: Parsed Entity ]
5744 * An entity reference must not contain the name of an
5745 * unparsed entity
5746 */
5747 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005748 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005749 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751
5752 /*
5753 * [ WFC: No External Entity References ]
5754 * Attribute values cannot contain direct or indirect
5755 * entity references to external entities.
5756 */
5757 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5758 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005759 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5760 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005761 }
5762 /*
5763 * [ WFC: No < in Attribute Values ]
5764 * The replacement text of any entity referred to directly or
5765 * indirectly in an attribute value (other than "&lt;") must
5766 * not contain a <.
5767 */
5768 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5769 (ent != NULL) &&
5770 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5771 (ent->content != NULL) &&
5772 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005773 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005774 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776
5777 /*
5778 * Internal check, no parameter entities here ...
5779 */
5780 else {
5781 switch (ent->etype) {
5782 case XML_INTERNAL_PARAMETER_ENTITY:
5783 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005784 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5785 "Attempt to reference the parameter entity '%s'\n",
5786 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005787 break;
5788 default:
5789 break;
5790 }
5791 }
5792
5793 /*
5794 * [ WFC: No Recursion ]
5795 * A parsed entity must not contain a recursive reference
5796 * to itself, either directly or indirectly.
5797 * Done somewhere else
5798 */
5799
5800 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005801 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 }
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 }
5805 return(ent);
5806}
5807
5808/**
5809 * xmlParseStringEntityRef:
5810 * @ctxt: an XML parser context
5811 * @str: a pointer to an index in the string
5812 *
5813 * parse ENTITY references declarations, but this version parses it from
5814 * a string value.
5815 *
5816 * [68] EntityRef ::= '&' Name ';'
5817 *
5818 * [ WFC: Entity Declared ]
5819 * In a document without any DTD, a document with only an internal DTD
5820 * subset which contains no parameter entity references, or a document
5821 * with "standalone='yes'", the Name given in the entity reference
5822 * must match that in an entity declaration, except that well-formed
5823 * documents need not declare any of the following entities: amp, lt,
5824 * gt, apos, quot. The declaration of a parameter entity must precede
5825 * any reference to it. Similarly, the declaration of a general entity
5826 * must precede any reference to it which appears in a default value in an
5827 * attribute-list declaration. Note that if entities are declared in the
5828 * external subset or in external parameter entities, a non-validating
5829 * processor is not obligated to read and process their declarations;
5830 * for such documents, the rule that an entity must be declared is a
5831 * well-formedness constraint only if standalone='yes'.
5832 *
5833 * [ WFC: Parsed Entity ]
5834 * An entity reference must not contain the name of an unparsed entity
5835 *
5836 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5837 * is updated to the current location in the string.
5838 */
5839xmlEntityPtr
5840xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5841 xmlChar *name;
5842 const xmlChar *ptr;
5843 xmlChar cur;
5844 xmlEntityPtr ent = NULL;
5845
5846 if ((str == NULL) || (*str == NULL))
5847 return(NULL);
5848 ptr = *str;
5849 cur = *ptr;
5850 if (cur == '&') {
5851 ptr++;
5852 cur = *ptr;
5853 name = xmlParseStringName(ctxt, &ptr);
5854 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005855 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5856 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005857 } else {
5858 if (*ptr == ';') {
5859 ptr++;
5860 /*
5861 * Ask first SAX for entity resolution, otherwise try the
5862 * predefined set.
5863 */
5864 if (ctxt->sax != NULL) {
5865 if (ctxt->sax->getEntity != NULL)
5866 ent = ctxt->sax->getEntity(ctxt->userData, name);
5867 if (ent == NULL)
5868 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005869 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005870 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005871 }
Owen Taylor3473f882001-02-23 17:55:21 +00005872 }
5873 /*
5874 * [ WFC: Entity Declared ]
5875 * In a document without any DTD, a document with only an
5876 * internal DTD subset which contains no parameter entity
5877 * references, or a document with "standalone='yes'", the
5878 * Name given in the entity reference must match that in an
5879 * entity declaration, except that well-formed documents
5880 * need not declare any of the following entities: amp, lt,
5881 * gt, apos, quot.
5882 * The declaration of a parameter entity must precede any
5883 * reference to it.
5884 * Similarly, the declaration of a general entity must
5885 * precede any reference to it which appears in a default
5886 * value in an attribute-list declaration. Note that if
5887 * entities are declared in the external subset or in
5888 * external parameter entities, a non-validating processor
5889 * is not obligated to read and process their declarations;
5890 * for such documents, the rule that an entity must be
5891 * declared is a well-formedness constraint only if
5892 * standalone='yes'.
5893 */
5894 if (ent == NULL) {
5895 if ((ctxt->standalone == 1) ||
5896 ((ctxt->hasExternalSubset == 0) &&
5897 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005898 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005899 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005901 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005902 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005903 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005904 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005905 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005906 }
5907
5908 /*
5909 * [ WFC: Parsed Entity ]
5910 * An entity reference must not contain the name of an
5911 * unparsed entity
5912 */
5913 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005914 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005915 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
5917
5918 /*
5919 * [ WFC: No External Entity References ]
5920 * Attribute values cannot contain direct or indirect
5921 * entity references to external entities.
5922 */
5923 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5924 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005925 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005926 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 }
5928 /*
5929 * [ WFC: No < in Attribute Values ]
5930 * The replacement text of any entity referred to directly or
5931 * indirectly in an attribute value (other than "&lt;") must
5932 * not contain a <.
5933 */
5934 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5935 (ent != NULL) &&
5936 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5937 (ent->content != NULL) &&
5938 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005939 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5940 "'<' in entity '%s' is not allowed in attributes values\n",
5941 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 }
5943
5944 /*
5945 * Internal check, no parameter entities here ...
5946 */
5947 else {
5948 switch (ent->etype) {
5949 case XML_INTERNAL_PARAMETER_ENTITY:
5950 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005951 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5952 "Attempt to reference the parameter entity '%s'\n",
5953 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005954 break;
5955 default:
5956 break;
5957 }
5958 }
5959
5960 /*
5961 * [ WFC: No Recursion ]
5962 * A parsed entity must not contain a recursive reference
5963 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005964 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005965 */
5966
5967 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005968 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005969 }
5970 xmlFree(name);
5971 }
5972 }
5973 *str = ptr;
5974 return(ent);
5975}
5976
5977/**
5978 * xmlParsePEReference:
5979 * @ctxt: an XML parser context
5980 *
5981 * parse PEReference declarations
5982 * The entity content is handled directly by pushing it's content as
5983 * a new input stream.
5984 *
5985 * [69] PEReference ::= '%' Name ';'
5986 *
5987 * [ WFC: No Recursion ]
5988 * A parsed entity must not contain a recursive
5989 * reference to itself, either directly or indirectly.
5990 *
5991 * [ WFC: Entity Declared ]
5992 * In a document without any DTD, a document with only an internal DTD
5993 * subset which contains no parameter entity references, or a document
5994 * with "standalone='yes'", ... ... The declaration of a parameter
5995 * entity must precede any reference to it...
5996 *
5997 * [ VC: Entity Declared ]
5998 * In a document with an external subset or external parameter entities
5999 * with "standalone='no'", ... ... The declaration of a parameter entity
6000 * must precede any reference to it...
6001 *
6002 * [ WFC: In DTD ]
6003 * Parameter-entity references may only appear in the DTD.
6004 * NOTE: misleading but this is handled.
6005 */
6006void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006007xmlParsePEReference(xmlParserCtxtPtr ctxt)
6008{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006009 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006010 xmlEntityPtr entity = NULL;
6011 xmlParserInputPtr input;
6012
6013 if (RAW == '%') {
6014 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006015 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006016 if (name == NULL) {
6017 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6018 "xmlParsePEReference: no name\n");
6019 } else {
6020 if (RAW == ';') {
6021 NEXT;
6022 if ((ctxt->sax != NULL) &&
6023 (ctxt->sax->getParameterEntity != NULL))
6024 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6025 name);
6026 if (entity == NULL) {
6027 /*
6028 * [ WFC: Entity Declared ]
6029 * In a document without any DTD, a document with only an
6030 * internal DTD subset which contains no parameter entity
6031 * references, or a document with "standalone='yes'", ...
6032 * ... The declaration of a parameter entity must precede
6033 * any reference to it...
6034 */
6035 if ((ctxt->standalone == 1) ||
6036 ((ctxt->hasExternalSubset == 0) &&
6037 (ctxt->hasPErefs == 0))) {
6038 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6039 "PEReference: %%%s; not found\n",
6040 name);
6041 } else {
6042 /*
6043 * [ VC: Entity Declared ]
6044 * In a document with an external subset or external
6045 * parameter entities with "standalone='no'", ...
6046 * ... The declaration of a parameter entity must
6047 * precede any reference to it...
6048 */
6049 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6050 "PEReference: %%%s; not found\n",
6051 name, NULL);
6052 ctxt->valid = 0;
6053 }
6054 } else {
6055 /*
6056 * Internal checking in case the entity quest barfed
6057 */
6058 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6059 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6060 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6061 "Internal: %%%s; is not a parameter entity\n",
6062 name, NULL);
6063 } else if (ctxt->input->free != deallocblankswrapper) {
6064 input =
6065 xmlNewBlanksWrapperInputStream(ctxt, entity);
6066 xmlPushInput(ctxt, input);
6067 } else {
6068 /*
6069 * TODO !!!
6070 * handle the extra spaces added before and after
6071 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6072 */
6073 input = xmlNewEntityInputStream(ctxt, entity);
6074 xmlPushInput(ctxt, input);
6075 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006076 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006077 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006078 xmlParseTextDecl(ctxt);
6079 if (ctxt->errNo ==
6080 XML_ERR_UNSUPPORTED_ENCODING) {
6081 /*
6082 * The XML REC instructs us to stop parsing
6083 * right here
6084 */
6085 ctxt->instate = XML_PARSER_EOF;
6086 return;
6087 }
6088 }
6089 }
6090 }
6091 ctxt->hasPErefs = 1;
6092 } else {
6093 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6094 }
6095 }
Owen Taylor3473f882001-02-23 17:55:21 +00006096 }
6097}
6098
6099/**
6100 * xmlParseStringPEReference:
6101 * @ctxt: an XML parser context
6102 * @str: a pointer to an index in the string
6103 *
6104 * parse PEReference declarations
6105 *
6106 * [69] PEReference ::= '%' Name ';'
6107 *
6108 * [ WFC: No Recursion ]
6109 * A parsed entity must not contain a recursive
6110 * reference to itself, either directly or indirectly.
6111 *
6112 * [ WFC: Entity Declared ]
6113 * In a document without any DTD, a document with only an internal DTD
6114 * subset which contains no parameter entity references, or a document
6115 * with "standalone='yes'", ... ... The declaration of a parameter
6116 * entity must precede any reference to it...
6117 *
6118 * [ VC: Entity Declared ]
6119 * In a document with an external subset or external parameter entities
6120 * with "standalone='no'", ... ... The declaration of a parameter entity
6121 * must precede any reference to it...
6122 *
6123 * [ WFC: In DTD ]
6124 * Parameter-entity references may only appear in the DTD.
6125 * NOTE: misleading but this is handled.
6126 *
6127 * Returns the string of the entity content.
6128 * str is updated to the current value of the index
6129 */
6130xmlEntityPtr
6131xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6132 const xmlChar *ptr;
6133 xmlChar cur;
6134 xmlChar *name;
6135 xmlEntityPtr entity = NULL;
6136
6137 if ((str == NULL) || (*str == NULL)) return(NULL);
6138 ptr = *str;
6139 cur = *ptr;
6140 if (cur == '%') {
6141 ptr++;
6142 cur = *ptr;
6143 name = xmlParseStringName(ctxt, &ptr);
6144 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006145 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6146 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006147 } else {
6148 cur = *ptr;
6149 if (cur == ';') {
6150 ptr++;
6151 cur = *ptr;
6152 if ((ctxt->sax != NULL) &&
6153 (ctxt->sax->getParameterEntity != NULL))
6154 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6155 name);
6156 if (entity == NULL) {
6157 /*
6158 * [ WFC: Entity Declared ]
6159 * In a document without any DTD, a document with only an
6160 * internal DTD subset which contains no parameter entity
6161 * references, or a document with "standalone='yes'", ...
6162 * ... The declaration of a parameter entity must precede
6163 * any reference to it...
6164 */
6165 if ((ctxt->standalone == 1) ||
6166 ((ctxt->hasExternalSubset == 0) &&
6167 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006168 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006169 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 } else {
6171 /*
6172 * [ VC: Entity Declared ]
6173 * In a document with an external subset or external
6174 * parameter entities with "standalone='no'", ...
6175 * ... The declaration of a parameter entity must
6176 * precede any reference to it...
6177 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006178 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6179 "PEReference: %%%s; not found\n",
6180 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006181 ctxt->valid = 0;
6182 }
6183 } else {
6184 /*
6185 * Internal checking in case the entity quest barfed
6186 */
6187 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6188 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006189 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6190 "%%%s; is not a parameter entity\n",
6191 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006192 }
6193 }
6194 ctxt->hasPErefs = 1;
6195 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006196 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006197 }
6198 xmlFree(name);
6199 }
6200 }
6201 *str = ptr;
6202 return(entity);
6203}
6204
6205/**
6206 * xmlParseDocTypeDecl:
6207 * @ctxt: an XML parser context
6208 *
6209 * parse a DOCTYPE declaration
6210 *
6211 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6212 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6213 *
6214 * [ VC: Root Element Type ]
6215 * The Name in the document type declaration must match the element
6216 * type of the root element.
6217 */
6218
6219void
6220xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006221 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006222 xmlChar *ExternalID = NULL;
6223 xmlChar *URI = NULL;
6224
6225 /*
6226 * We know that '<!DOCTYPE' has been detected.
6227 */
6228 SKIP(9);
6229
6230 SKIP_BLANKS;
6231
6232 /*
6233 * Parse the DOCTYPE name.
6234 */
6235 name = xmlParseName(ctxt);
6236 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006237 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6238 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006239 }
6240 ctxt->intSubName = name;
6241
6242 SKIP_BLANKS;
6243
6244 /*
6245 * Check for SystemID and ExternalID
6246 */
6247 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6248
6249 if ((URI != NULL) || (ExternalID != NULL)) {
6250 ctxt->hasExternalSubset = 1;
6251 }
6252 ctxt->extSubURI = URI;
6253 ctxt->extSubSystem = ExternalID;
6254
6255 SKIP_BLANKS;
6256
6257 /*
6258 * Create and update the internal subset.
6259 */
6260 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6261 (!ctxt->disableSAX))
6262 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6263
6264 /*
6265 * Is there any internal subset declarations ?
6266 * they are handled separately in xmlParseInternalSubset()
6267 */
6268 if (RAW == '[')
6269 return;
6270
6271 /*
6272 * We should be at the end of the DOCTYPE declaration.
6273 */
6274 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006275 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
6277 NEXT;
6278}
6279
6280/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006281 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006282 * @ctxt: an XML parser context
6283 *
6284 * parse the internal subset declaration
6285 *
6286 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6287 */
6288
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006289static void
Owen Taylor3473f882001-02-23 17:55:21 +00006290xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6291 /*
6292 * Is there any DTD definition ?
6293 */
6294 if (RAW == '[') {
6295 ctxt->instate = XML_PARSER_DTD;
6296 NEXT;
6297 /*
6298 * Parse the succession of Markup declarations and
6299 * PEReferences.
6300 * Subsequence (markupdecl | PEReference | S)*
6301 */
6302 while (RAW != ']') {
6303 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006304 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006305
6306 SKIP_BLANKS;
6307 xmlParseMarkupDecl(ctxt);
6308 xmlParsePEReference(ctxt);
6309
6310 /*
6311 * Pop-up of finished entities.
6312 */
6313 while ((RAW == 0) && (ctxt->inputNr > 1))
6314 xmlPopInput(ctxt);
6315
6316 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006318 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006319 break;
6320 }
6321 }
6322 if (RAW == ']') {
6323 NEXT;
6324 SKIP_BLANKS;
6325 }
6326 }
6327
6328 /*
6329 * We should be at the end of the DOCTYPE declaration.
6330 */
6331 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006332 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006333 }
6334 NEXT;
6335}
6336
Daniel Veillard81273902003-09-30 00:43:48 +00006337#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006338/**
6339 * xmlParseAttribute:
6340 * @ctxt: an XML parser context
6341 * @value: a xmlChar ** used to store the value of the attribute
6342 *
6343 * parse an attribute
6344 *
6345 * [41] Attribute ::= Name Eq AttValue
6346 *
6347 * [ WFC: No External Entity References ]
6348 * Attribute values cannot contain direct or indirect entity references
6349 * to external entities.
6350 *
6351 * [ WFC: No < in Attribute Values ]
6352 * The replacement text of any entity referred to directly or indirectly in
6353 * an attribute value (other than "&lt;") must not contain a <.
6354 *
6355 * [ VC: Attribute Value Type ]
6356 * The attribute must have been declared; the value must be of the type
6357 * declared for it.
6358 *
6359 * [25] Eq ::= S? '=' S?
6360 *
6361 * With namespace:
6362 *
6363 * [NS 11] Attribute ::= QName Eq AttValue
6364 *
6365 * Also the case QName == xmlns:??? is handled independently as a namespace
6366 * definition.
6367 *
6368 * Returns the attribute name, and the value in *value.
6369 */
6370
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006371const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006372xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006373 const xmlChar *name;
6374 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006375
6376 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006377 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006378 name = xmlParseName(ctxt);
6379 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006380 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006381 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006382 return(NULL);
6383 }
6384
6385 /*
6386 * read the value
6387 */
6388 SKIP_BLANKS;
6389 if (RAW == '=') {
6390 NEXT;
6391 SKIP_BLANKS;
6392 val = xmlParseAttValue(ctxt);
6393 ctxt->instate = XML_PARSER_CONTENT;
6394 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006395 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006396 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006397 return(NULL);
6398 }
6399
6400 /*
6401 * Check that xml:lang conforms to the specification
6402 * No more registered as an error, just generate a warning now
6403 * since this was deprecated in XML second edition
6404 */
6405 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6406 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006407 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6408 "Malformed value for xml:lang : %s\n",
6409 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006410 }
6411 }
6412
6413 /*
6414 * Check that xml:space conforms to the specification
6415 */
6416 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6417 if (xmlStrEqual(val, BAD_CAST "default"))
6418 *(ctxt->space) = 0;
6419 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6420 *(ctxt->space) = 1;
6421 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006422 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006423"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006424 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006425 }
6426 }
6427
6428 *value = val;
6429 return(name);
6430}
6431
6432/**
6433 * xmlParseStartTag:
6434 * @ctxt: an XML parser context
6435 *
6436 * parse a start of tag either for rule element or
6437 * EmptyElement. In both case we don't parse the tag closing chars.
6438 *
6439 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6440 *
6441 * [ WFC: Unique Att Spec ]
6442 * No attribute name may appear more than once in the same start-tag or
6443 * empty-element tag.
6444 *
6445 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6446 *
6447 * [ WFC: Unique Att Spec ]
6448 * No attribute name may appear more than once in the same start-tag or
6449 * empty-element tag.
6450 *
6451 * With namespace:
6452 *
6453 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6454 *
6455 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6456 *
6457 * Returns the element name parsed
6458 */
6459
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006460const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006461xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006462 const xmlChar *name;
6463 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006464 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006465 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006466 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006467 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 int i;
6469
6470 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006471 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006472
6473 name = xmlParseName(ctxt);
6474 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006475 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006476 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006477 return(NULL);
6478 }
6479
6480 /*
6481 * Now parse the attributes, it ends up with the ending
6482 *
6483 * (S Attribute)* S?
6484 */
6485 SKIP_BLANKS;
6486 GROW;
6487
Daniel Veillard21a0f912001-02-25 19:54:14 +00006488 while ((RAW != '>') &&
6489 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006490 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006491 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006492 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006493
6494 attname = xmlParseAttribute(ctxt, &attvalue);
6495 if ((attname != NULL) && (attvalue != NULL)) {
6496 /*
6497 * [ WFC: Unique Att Spec ]
6498 * No attribute name may appear more than once in the same
6499 * start-tag or empty-element tag.
6500 */
6501 for (i = 0; i < nbatts;i += 2) {
6502 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006503 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006504 xmlFree(attvalue);
6505 goto failed;
6506 }
6507 }
Owen Taylor3473f882001-02-23 17:55:21 +00006508 /*
6509 * Add the pair to atts
6510 */
6511 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006512 maxatts = 22; /* allow for 10 attrs by default */
6513 atts = (const xmlChar **)
6514 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006515 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006516 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006517 if (attvalue != NULL)
6518 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006519 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006520 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006521 ctxt->atts = atts;
6522 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006523 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006524 const xmlChar **n;
6525
Owen Taylor3473f882001-02-23 17:55:21 +00006526 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006527 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006528 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006529 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006530 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006531 if (attvalue != NULL)
6532 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006533 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006534 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006535 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006536 ctxt->atts = atts;
6537 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006538 }
6539 atts[nbatts++] = attname;
6540 atts[nbatts++] = attvalue;
6541 atts[nbatts] = NULL;
6542 atts[nbatts + 1] = NULL;
6543 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 if (attvalue != NULL)
6545 xmlFree(attvalue);
6546 }
6547
6548failed:
6549
Daniel Veillard3772de32002-12-17 10:31:45 +00006550 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006551 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6552 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006553 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006554 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6555 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006556 }
6557 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006558 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6559 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006560 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6561 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006562 break;
6563 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006564 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006565 GROW;
6566 }
6567
6568 /*
6569 * SAX: Start of Element !
6570 */
6571 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006572 (!ctxt->disableSAX)) {
6573 if (nbatts > 0)
6574 ctxt->sax->startElement(ctxt->userData, name, atts);
6575 else
6576 ctxt->sax->startElement(ctxt->userData, name, NULL);
6577 }
Owen Taylor3473f882001-02-23 17:55:21 +00006578
6579 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006580 /* Free only the content strings */
6581 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006582 if (atts[i] != NULL)
6583 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006584 }
6585 return(name);
6586}
6587
6588/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006589 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006590 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006591 * @line: line of the start tag
6592 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006593 *
6594 * parse an end of tag
6595 *
6596 * [42] ETag ::= '</' Name S? '>'
6597 *
6598 * With namespace
6599 *
6600 * [NS 9] ETag ::= '</' QName S? '>'
6601 */
6602
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006603static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006604xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006605 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006606
6607 GROW;
6608 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006609 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006610 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006611 return;
6612 }
6613 SKIP(2);
6614
Daniel Veillard46de64e2002-05-29 08:21:33 +00006615 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006616
6617 /*
6618 * We should definitely be at the ending "S? '>'" part
6619 */
6620 GROW;
6621 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006622 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006623 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006624 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006625 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006626
6627 /*
6628 * [ WFC: Element Type Match ]
6629 * The Name in an element's end-tag must match the element type in the
6630 * start-tag.
6631 *
6632 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006633 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006634 if (name == NULL) name = BAD_CAST "unparseable";
6635 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006636 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006637 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006638 }
6639
6640 /*
6641 * SAX: End of Tag
6642 */
6643 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6644 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006645 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006646
Daniel Veillarde57ec792003-09-10 10:50:59 +00006647 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006648 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 return;
6650}
6651
6652/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006653 * xmlParseEndTag:
6654 * @ctxt: an XML parser context
6655 *
6656 * parse an end of tag
6657 *
6658 * [42] ETag ::= '</' Name S? '>'
6659 *
6660 * With namespace
6661 *
6662 * [NS 9] ETag ::= '</' QName S? '>'
6663 */
6664
6665void
6666xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006667 xmlParseEndTag1(ctxt, 0);
6668}
Daniel Veillard81273902003-09-30 00:43:48 +00006669#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006670
6671/************************************************************************
6672 * *
6673 * SAX 2 specific operations *
6674 * *
6675 ************************************************************************/
6676
6677static const xmlChar *
6678xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6679 int len = 0, l;
6680 int c;
6681 int count = 0;
6682
6683 /*
6684 * Handler for more complex cases
6685 */
6686 GROW;
6687 c = CUR_CHAR(l);
6688 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006689 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006690 return(NULL);
6691 }
6692
6693 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006694 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006695 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006696 (IS_COMBINING(c)) ||
6697 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006698 if (count++ > 100) {
6699 count = 0;
6700 GROW;
6701 }
6702 len += l;
6703 NEXTL(l);
6704 c = CUR_CHAR(l);
6705 }
6706 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6707}
6708
6709/*
6710 * xmlGetNamespace:
6711 * @ctxt: an XML parser context
6712 * @prefix: the prefix to lookup
6713 *
6714 * Lookup the namespace name for the @prefix (which ca be NULL)
6715 * The prefix must come from the @ctxt->dict dictionnary
6716 *
6717 * Returns the namespace name or NULL if not bound
6718 */
6719static const xmlChar *
6720xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6721 int i;
6722
Daniel Veillarde57ec792003-09-10 10:50:59 +00006723 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006724 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006725 if (ctxt->nsTab[i] == prefix) {
6726 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6727 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006728 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006729 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006730 return(NULL);
6731}
6732
6733/**
6734 * xmlParseNCName:
6735 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006736 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006737 *
6738 * parse an XML name.
6739 *
6740 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6741 * CombiningChar | Extender
6742 *
6743 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6744 *
6745 * Returns the Name parsed or NULL
6746 */
6747
6748static const xmlChar *
6749xmlParseNCName(xmlParserCtxtPtr ctxt) {
6750 const xmlChar *in;
6751 const xmlChar *ret;
6752 int count = 0;
6753
6754 /*
6755 * Accelerator for simple ASCII names
6756 */
6757 in = ctxt->input->cur;
6758 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6759 ((*in >= 0x41) && (*in <= 0x5A)) ||
6760 (*in == '_')) {
6761 in++;
6762 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6763 ((*in >= 0x41) && (*in <= 0x5A)) ||
6764 ((*in >= 0x30) && (*in <= 0x39)) ||
6765 (*in == '_') || (*in == '-') ||
6766 (*in == '.'))
6767 in++;
6768 if ((*in > 0) && (*in < 0x80)) {
6769 count = in - ctxt->input->cur;
6770 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6771 ctxt->input->cur = in;
6772 ctxt->nbChars += count;
6773 ctxt->input->col += count;
6774 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006775 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006776 }
6777 return(ret);
6778 }
6779 }
6780 return(xmlParseNCNameComplex(ctxt));
6781}
6782
6783/**
6784 * xmlParseQName:
6785 * @ctxt: an XML parser context
6786 * @prefix: pointer to store the prefix part
6787 *
6788 * parse an XML Namespace QName
6789 *
6790 * [6] QName ::= (Prefix ':')? LocalPart
6791 * [7] Prefix ::= NCName
6792 * [8] LocalPart ::= NCName
6793 *
6794 * Returns the Name parsed or NULL
6795 */
6796
6797static const xmlChar *
6798xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6799 const xmlChar *l, *p;
6800
6801 GROW;
6802
6803 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006804 if (l == NULL) {
6805 if (CUR == ':') {
6806 l = xmlParseName(ctxt);
6807 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006808 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6809 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006810 *prefix = NULL;
6811 return(l);
6812 }
6813 }
6814 return(NULL);
6815 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006816 if (CUR == ':') {
6817 NEXT;
6818 p = l;
6819 l = xmlParseNCName(ctxt);
6820 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006821 xmlChar *tmp;
6822
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006823 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6824 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006825 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6826 p = xmlDictLookup(ctxt->dict, tmp, -1);
6827 if (tmp != NULL) xmlFree(tmp);
6828 *prefix = NULL;
6829 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006830 }
6831 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006832 xmlChar *tmp;
6833
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006834 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6835 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006836 NEXT;
6837 tmp = (xmlChar *) xmlParseName(ctxt);
6838 if (tmp != NULL) {
6839 tmp = xmlBuildQName(tmp, l, NULL, 0);
6840 l = xmlDictLookup(ctxt->dict, tmp, -1);
6841 if (tmp != NULL) xmlFree(tmp);
6842 *prefix = p;
6843 return(l);
6844 }
6845 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6846 l = xmlDictLookup(ctxt->dict, tmp, -1);
6847 if (tmp != NULL) xmlFree(tmp);
6848 *prefix = p;
6849 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006850 }
6851 *prefix = p;
6852 } else
6853 *prefix = NULL;
6854 return(l);
6855}
6856
6857/**
6858 * xmlParseQNameAndCompare:
6859 * @ctxt: an XML parser context
6860 * @name: the localname
6861 * @prefix: the prefix, if any.
6862 *
6863 * parse an XML name and compares for match
6864 * (specialized for endtag parsing)
6865 *
6866 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6867 * and the name for mismatch
6868 */
6869
6870static const xmlChar *
6871xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6872 xmlChar const *prefix) {
6873 const xmlChar *cmp = name;
6874 const xmlChar *in;
6875 const xmlChar *ret;
6876 const xmlChar *prefix2;
6877
6878 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6879
6880 GROW;
6881 in = ctxt->input->cur;
6882
6883 cmp = prefix;
6884 while (*in != 0 && *in == *cmp) {
6885 ++in;
6886 ++cmp;
6887 }
6888 if ((*cmp == 0) && (*in == ':')) {
6889 in++;
6890 cmp = name;
6891 while (*in != 0 && *in == *cmp) {
6892 ++in;
6893 ++cmp;
6894 }
William M. Brack76e95df2003-10-18 16:20:14 +00006895 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006896 /* success */
6897 ctxt->input->cur = in;
6898 return((const xmlChar*) 1);
6899 }
6900 }
6901 /*
6902 * all strings coms from the dictionary, equality can be done directly
6903 */
6904 ret = xmlParseQName (ctxt, &prefix2);
6905 if ((ret == name) && (prefix == prefix2))
6906 return((const xmlChar*) 1);
6907 return ret;
6908}
6909
6910/**
6911 * xmlParseAttValueInternal:
6912 * @ctxt: an XML parser context
6913 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006914 * @alloc: whether the attribute was reallocated as a new string
6915 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006916 *
6917 * parse a value for an attribute.
6918 * NOTE: if no normalization is needed, the routine will return pointers
6919 * directly from the data buffer.
6920 *
6921 * 3.3.3 Attribute-Value Normalization:
6922 * Before the value of an attribute is passed to the application or
6923 * checked for validity, the XML processor must normalize it as follows:
6924 * - a character reference is processed by appending the referenced
6925 * character to the attribute value
6926 * - an entity reference is processed by recursively processing the
6927 * replacement text of the entity
6928 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6929 * appending #x20 to the normalized value, except that only a single
6930 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6931 * parsed entity or the literal entity value of an internal parsed entity
6932 * - other characters are processed by appending them to the normalized value
6933 * If the declared value is not CDATA, then the XML processor must further
6934 * process the normalized attribute value by discarding any leading and
6935 * trailing space (#x20) characters, and by replacing sequences of space
6936 * (#x20) characters by a single space (#x20) character.
6937 * All attributes for which no declaration has been read should be treated
6938 * by a non-validating parser as if declared CDATA.
6939 *
6940 * Returns the AttValue parsed or NULL. The value has to be freed by the
6941 * caller if it was copied, this can be detected by val[*len] == 0.
6942 */
6943
6944static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006945xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6946 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006947{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006948 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006949 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006950 xmlChar *ret = NULL;
6951
6952 GROW;
6953 in = (xmlChar *) CUR_PTR;
6954 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006955 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006956 return (NULL);
6957 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006958 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006959
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006960 /*
6961 * try to handle in this routine the most common case where no
6962 * allocation of a new string is required and where content is
6963 * pure ASCII.
6964 */
6965 limit = *in++;
6966 end = ctxt->input->end;
6967 start = in;
6968 if (in >= end) {
6969 const xmlChar *oldbase = ctxt->input->base;
6970 GROW;
6971 if (oldbase != ctxt->input->base) {
6972 long delta = ctxt->input->base - oldbase;
6973 start = start + delta;
6974 in = in + delta;
6975 }
6976 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006977 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006978 if (normalize) {
6979 /*
6980 * Skip any leading spaces
6981 */
6982 while ((in < end) && (*in != limit) &&
6983 ((*in == 0x20) || (*in == 0x9) ||
6984 (*in == 0xA) || (*in == 0xD))) {
6985 in++;
6986 start = in;
6987 if (in >= end) {
6988 const xmlChar *oldbase = ctxt->input->base;
6989 GROW;
6990 if (oldbase != ctxt->input->base) {
6991 long delta = ctxt->input->base - oldbase;
6992 start = start + delta;
6993 in = in + delta;
6994 }
6995 end = ctxt->input->end;
6996 }
6997 }
6998 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
6999 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7000 if ((*in++ == 0x20) && (*in == 0x20)) break;
7001 if (in >= end) {
7002 const xmlChar *oldbase = ctxt->input->base;
7003 GROW;
7004 if (oldbase != ctxt->input->base) {
7005 long delta = ctxt->input->base - oldbase;
7006 start = start + delta;
7007 in = in + delta;
7008 }
7009 end = ctxt->input->end;
7010 }
7011 }
7012 last = in;
7013 /*
7014 * skip the trailing blanks
7015 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007016 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007017 while ((in < end) && (*in != limit) &&
7018 ((*in == 0x20) || (*in == 0x9) ||
7019 (*in == 0xA) || (*in == 0xD))) {
7020 in++;
7021 if (in >= end) {
7022 const xmlChar *oldbase = ctxt->input->base;
7023 GROW;
7024 if (oldbase != ctxt->input->base) {
7025 long delta = ctxt->input->base - oldbase;
7026 start = start + delta;
7027 in = in + delta;
7028 last = last + delta;
7029 }
7030 end = ctxt->input->end;
7031 }
7032 }
7033 if (*in != limit) goto need_complex;
7034 } else {
7035 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7036 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7037 in++;
7038 if (in >= end) {
7039 const xmlChar *oldbase = ctxt->input->base;
7040 GROW;
7041 if (oldbase != ctxt->input->base) {
7042 long delta = ctxt->input->base - oldbase;
7043 start = start + delta;
7044 in = in + delta;
7045 }
7046 end = ctxt->input->end;
7047 }
7048 }
7049 last = in;
7050 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007051 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007052 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007054 *len = last - start;
7055 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007056 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007057 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007058 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007059 }
7060 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007061 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007062 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007063need_complex:
7064 if (alloc) *alloc = 1;
7065 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007066}
7067
7068/**
7069 * xmlParseAttribute2:
7070 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007071 * @pref: the element prefix
7072 * @elem: the element name
7073 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007074 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007075 * @len: an int * to save the length of the attribute
7076 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007077 *
7078 * parse an attribute in the new SAX2 framework.
7079 *
7080 * Returns the attribute name, and the value in *value, .
7081 */
7082
7083static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007084xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7085 const xmlChar *pref, const xmlChar *elem,
7086 const xmlChar **prefix, xmlChar **value,
7087 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007088 const xmlChar *name;
7089 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007090 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091
7092 *value = NULL;
7093 GROW;
7094 name = xmlParseQName(ctxt, prefix);
7095 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007096 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7097 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007098 return(NULL);
7099 }
7100
7101 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007102 * get the type if needed
7103 */
7104 if (ctxt->attsSpecial != NULL) {
7105 int type;
7106
7107 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7108 pref, elem, *prefix, name);
7109 if (type != 0) normalize = 1;
7110 }
7111
7112 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007113 * read the value
7114 */
7115 SKIP_BLANKS;
7116 if (RAW == '=') {
7117 NEXT;
7118 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007119 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 ctxt->instate = XML_PARSER_CONTENT;
7121 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007122 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007123 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007124 return(NULL);
7125 }
7126
7127 /*
7128 * Check that xml:lang conforms to the specification
7129 * No more registered as an error, just generate a warning now
7130 * since this was deprecated in XML second edition
7131 */
7132 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7133 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007134 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7135 "Malformed value for xml:lang : %s\n",
7136 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007137 }
7138 }
7139
7140 /*
7141 * Check that xml:space conforms to the specification
7142 */
7143 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7144 if (xmlStrEqual(val, BAD_CAST "default"))
7145 *(ctxt->space) = 0;
7146 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7147 *(ctxt->space) = 1;
7148 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007149 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007150"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7151 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007152 }
7153 }
7154
7155 *value = val;
7156 return(name);
7157}
7158
7159/**
7160 * xmlParseStartTag2:
7161 * @ctxt: an XML parser context
7162 *
7163 * parse a start of tag either for rule element or
7164 * EmptyElement. In both case we don't parse the tag closing chars.
7165 * This routine is called when running SAX2 parsing
7166 *
7167 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7168 *
7169 * [ WFC: Unique Att Spec ]
7170 * No attribute name may appear more than once in the same start-tag or
7171 * empty-element tag.
7172 *
7173 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7174 *
7175 * [ WFC: Unique Att Spec ]
7176 * No attribute name may appear more than once in the same start-tag or
7177 * empty-element tag.
7178 *
7179 * With namespace:
7180 *
7181 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7182 *
7183 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7184 *
7185 * Returns the element name parsed
7186 */
7187
7188static const xmlChar *
7189xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007190 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007191 const xmlChar *localname;
7192 const xmlChar *prefix;
7193 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007194 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007195 const xmlChar *nsname;
7196 xmlChar *attvalue;
7197 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007199 int nratts, nbatts, nbdef;
7200 int i, j, nbNs, attval;
7201 const xmlChar *base;
7202 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007203
7204 if (RAW != '<') return(NULL);
7205 NEXT1;
7206
7207 /*
7208 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7209 * point since the attribute values may be stored as pointers to
7210 * the buffer and calling SHRINK would destroy them !
7211 * The Shrinking is only possible once the full set of attribute
7212 * callbacks have been done.
7213 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007214reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007215 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007216 base = ctxt->input->base;
7217 cur = ctxt->input->cur - ctxt->input->base;
7218 nbatts = 0;
7219 nratts = 0;
7220 nbdef = 0;
7221 nbNs = 0;
7222 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223
7224 localname = xmlParseQName(ctxt, &prefix);
7225 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007226 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7227 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007228 return(NULL);
7229 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007230 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007231
7232 /*
7233 * Now parse the attributes, it ends up with the ending
7234 *
7235 * (S Attribute)* S?
7236 */
7237 SKIP_BLANKS;
7238 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007239 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007240
7241 while ((RAW != '>') &&
7242 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007243 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244 const xmlChar *q = CUR_PTR;
7245 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007246 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007247
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007248 attname = xmlParseAttribute2(ctxt, prefix, localname,
7249 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 if ((attname != NULL) && (attvalue != NULL)) {
7251 if (len < 0) len = xmlStrlen(attvalue);
7252 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007253 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7254 xmlURIPtr uri;
7255
7256 if (*URL != 0) {
7257 uri = xmlParseURI((const char *) URL);
7258 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007259 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7260 "xmlns: %s not a valid URI\n",
7261 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007262 } else {
7263 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007264 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7265 "xmlns: URI %s is not absolute\n",
7266 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007267 }
7268 xmlFreeURI(uri);
7269 }
7270 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007271 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007272 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007273 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007274 for (j = 1;j <= nbNs;j++)
7275 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7276 break;
7277 if (j <= nbNs)
7278 xmlErrAttributeDup(ctxt, NULL, attname);
7279 else
7280 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007281 if (alloc != 0) xmlFree(attvalue);
7282 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007283 continue;
7284 }
7285 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007286 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7287 xmlURIPtr uri;
7288
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007289 if (attname == ctxt->str_xml) {
7290 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007291 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7292 "xml namespace prefix mapped to wrong URI\n",
7293 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007294 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007295 /*
7296 * Do not keep a namespace definition node
7297 */
7298 if (alloc != 0) xmlFree(attvalue);
7299 SKIP_BLANKS;
7300 continue;
7301 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007302 uri = xmlParseURI((const char *) URL);
7303 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007304 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7305 "xmlns:%s: '%s' is not a valid URI\n",
7306 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007308 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007309 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7310 "xmlns:%s: URI %s is not absolute\n",
7311 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007312 }
7313 xmlFreeURI(uri);
7314 }
7315
Daniel Veillard0fb18932003-09-07 09:14:37 +00007316 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007317 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007318 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007319 for (j = 1;j <= nbNs;j++)
7320 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7321 break;
7322 if (j <= nbNs)
7323 xmlErrAttributeDup(ctxt, aprefix, attname);
7324 else
7325 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007326 if (alloc != 0) xmlFree(attvalue);
7327 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 continue;
7329 }
7330
7331 /*
7332 * Add the pair to atts
7333 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007334 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7335 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007336 if (attvalue[len] == 0)
7337 xmlFree(attvalue);
7338 goto failed;
7339 }
7340 maxatts = ctxt->maxatts;
7341 atts = ctxt->atts;
7342 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007343 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007344 atts[nbatts++] = attname;
7345 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007346 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007347 atts[nbatts++] = attvalue;
7348 attvalue += len;
7349 atts[nbatts++] = attvalue;
7350 /*
7351 * tag if some deallocation is needed
7352 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007353 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007354 } else {
7355 if ((attvalue != NULL) && (attvalue[len] == 0))
7356 xmlFree(attvalue);
7357 }
7358
7359failed:
7360
7361 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007362 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007363 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7364 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007365 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007366 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7367 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 }
7369 SKIP_BLANKS;
7370 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7371 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007372 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007373 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007374 break;
7375 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007376 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007377 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378 }
7379
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007381 * The attributes defaulting
7382 */
7383 if (ctxt->attsDefault != NULL) {
7384 xmlDefAttrsPtr defaults;
7385
7386 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7387 if (defaults != NULL) {
7388 for (i = 0;i < defaults->nbAttrs;i++) {
7389 attname = defaults->values[4 * i];
7390 aprefix = defaults->values[4 * i + 1];
7391
7392 /*
7393 * special work for namespaces defaulted defs
7394 */
7395 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7396 /*
7397 * check that it's not a defined namespace
7398 */
7399 for (j = 1;j <= nbNs;j++)
7400 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7401 break;
7402 if (j <= nbNs) continue;
7403
7404 nsname = xmlGetNamespace(ctxt, NULL);
7405 if (nsname != defaults->values[4 * i + 2]) {
7406 if (nsPush(ctxt, NULL,
7407 defaults->values[4 * i + 2]) > 0)
7408 nbNs++;
7409 }
7410 } else if (aprefix == ctxt->str_xmlns) {
7411 /*
7412 * check that it's not a defined namespace
7413 */
7414 for (j = 1;j <= nbNs;j++)
7415 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7416 break;
7417 if (j <= nbNs) continue;
7418
7419 nsname = xmlGetNamespace(ctxt, attname);
7420 if (nsname != defaults->values[2]) {
7421 if (nsPush(ctxt, attname,
7422 defaults->values[4 * i + 2]) > 0)
7423 nbNs++;
7424 }
7425 } else {
7426 /*
7427 * check that it's not a defined attribute
7428 */
7429 for (j = 0;j < nbatts;j+=5) {
7430 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7431 break;
7432 }
7433 if (j < nbatts) continue;
7434
7435 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7436 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007437 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007438 }
7439 maxatts = ctxt->maxatts;
7440 atts = ctxt->atts;
7441 }
7442 atts[nbatts++] = attname;
7443 atts[nbatts++] = aprefix;
7444 if (aprefix == NULL)
7445 atts[nbatts++] = NULL;
7446 else
7447 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7448 atts[nbatts++] = defaults->values[4 * i + 2];
7449 atts[nbatts++] = defaults->values[4 * i + 3];
7450 nbdef++;
7451 }
7452 }
7453 }
7454 }
7455
Daniel Veillarde70c8772003-11-25 07:21:18 +00007456 /*
7457 * The attributes checkings
7458 */
7459 for (i = 0; i < nbatts;i += 5) {
7460 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7461 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7462 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7463 "Namespace prefix %s for %s on %s is not defined\n",
7464 atts[i + 1], atts[i], localname);
7465 }
7466 atts[i + 2] = nsname;
7467 /*
7468 * [ WFC: Unique Att Spec ]
7469 * No attribute name may appear more than once in the same
7470 * start-tag or empty-element tag.
7471 * As extended by the Namespace in XML REC.
7472 */
7473 for (j = 0; j < i;j += 5) {
7474 if (atts[i] == atts[j]) {
7475 if (atts[i+1] == atts[j+1]) {
7476 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7477 break;
7478 }
7479 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7480 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7481 "Namespaced Attribute %s in '%s' redefined\n",
7482 atts[i], nsname, NULL);
7483 break;
7484 }
7485 }
7486 }
7487 }
7488
Daniel Veillarde57ec792003-09-10 10:50:59 +00007489 nsname = xmlGetNamespace(ctxt, prefix);
7490 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007491 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7492 "Namespace prefix %s on %s is not defined\n",
7493 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007494 }
7495 *pref = prefix;
7496 *URI = nsname;
7497
7498 /*
7499 * SAX: Start of Element !
7500 */
7501 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7502 (!ctxt->disableSAX)) {
7503 if (nbNs > 0)
7504 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7505 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7506 nbatts / 5, nbdef, atts);
7507 else
7508 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7509 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7510 }
7511
7512 /*
7513 * Free up attribute allocated strings if needed
7514 */
7515 if (attval != 0) {
7516 for (i = 3,j = 0; j < nratts;i += 5,j++)
7517 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7518 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007519 }
7520
7521 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007522
7523base_changed:
7524 /*
7525 * the attribute strings are valid iif the base didn't changed
7526 */
7527 if (attval != 0) {
7528 for (i = 3,j = 0; j < nratts;i += 5,j++)
7529 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7530 xmlFree((xmlChar *) atts[i]);
7531 }
7532 ctxt->input->cur = ctxt->input->base + cur;
7533 if (ctxt->wellFormed == 1) {
7534 goto reparse;
7535 }
7536 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007537}
7538
7539/**
7540 * xmlParseEndTag2:
7541 * @ctxt: an XML parser context
7542 * @line: line of the start tag
7543 * @nsNr: number of namespaces on the start tag
7544 *
7545 * parse an end of tag
7546 *
7547 * [42] ETag ::= '</' Name S? '>'
7548 *
7549 * With namespace
7550 *
7551 * [NS 9] ETag ::= '</' QName S? '>'
7552 */
7553
7554static void
7555xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007556 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 const xmlChar *name;
7558
7559 GROW;
7560 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007561 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007562 return;
7563 }
7564 SKIP(2);
7565
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007566 if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
7567 if (ctxt->input->cur[tlen] == '>') {
7568 ctxt->input->cur += tlen + 1;
7569 goto done;
7570 }
7571 ctxt->input->cur += tlen;
7572 name = (xmlChar*)1;
7573 } else {
7574 if (prefix == NULL)
7575 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7576 else
7577 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7578 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007579
7580 /*
7581 * We should definitely be at the ending "S? '>'" part
7582 */
7583 GROW;
7584 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007585 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007586 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007587 } else
7588 NEXT1;
7589
7590 /*
7591 * [ WFC: Element Type Match ]
7592 * The Name in an element's end-tag must match the element type in the
7593 * start-tag.
7594 *
7595 */
7596 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007597 if (name == NULL) name = BAD_CAST "unparseable";
7598 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007599 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007600 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007601 }
7602
7603 /*
7604 * SAX: End of Tag
7605 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007606done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007607 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7608 (!ctxt->disableSAX))
7609 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7610
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 spacePop(ctxt);
7612 if (nsNr != 0)
7613 nsPop(ctxt, nsNr);
7614 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007615}
7616
7617/**
Owen Taylor3473f882001-02-23 17:55:21 +00007618 * xmlParseCDSect:
7619 * @ctxt: an XML parser context
7620 *
7621 * Parse escaped pure raw content.
7622 *
7623 * [18] CDSect ::= CDStart CData CDEnd
7624 *
7625 * [19] CDStart ::= '<![CDATA['
7626 *
7627 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7628 *
7629 * [21] CDEnd ::= ']]>'
7630 */
7631void
7632xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7633 xmlChar *buf = NULL;
7634 int len = 0;
7635 int size = XML_PARSER_BUFFER_SIZE;
7636 int r, rl;
7637 int s, sl;
7638 int cur, l;
7639 int count = 0;
7640
Daniel Veillard8f597c32003-10-06 08:19:27 +00007641 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007642 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007643 SKIP(9);
7644 } else
7645 return;
7646
7647 ctxt->instate = XML_PARSER_CDATA_SECTION;
7648 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007649 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007650 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007651 ctxt->instate = XML_PARSER_CONTENT;
7652 return;
7653 }
7654 NEXTL(rl);
7655 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007656 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007657 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007658 ctxt->instate = XML_PARSER_CONTENT;
7659 return;
7660 }
7661 NEXTL(sl);
7662 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007663 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007664 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007665 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007666 return;
7667 }
William M. Brack871611b2003-10-18 04:53:14 +00007668 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007669 ((r != ']') || (s != ']') || (cur != '>'))) {
7670 if (len + 5 >= size) {
7671 size *= 2;
7672 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007675 return;
7676 }
7677 }
7678 COPY_BUF(rl,buf,len,r);
7679 r = s;
7680 rl = sl;
7681 s = cur;
7682 sl = l;
7683 count++;
7684 if (count > 50) {
7685 GROW;
7686 count = 0;
7687 }
7688 NEXTL(l);
7689 cur = CUR_CHAR(l);
7690 }
7691 buf[len] = 0;
7692 ctxt->instate = XML_PARSER_CONTENT;
7693 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007694 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007695 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007696 xmlFree(buf);
7697 return;
7698 }
7699 NEXTL(l);
7700
7701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007702 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007703 */
7704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7705 if (ctxt->sax->cdataBlock != NULL)
7706 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007707 else if (ctxt->sax->characters != NULL)
7708 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007709 }
7710 xmlFree(buf);
7711}
7712
7713/**
7714 * xmlParseContent:
7715 * @ctxt: an XML parser context
7716 *
7717 * Parse a content:
7718 *
7719 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7720 */
7721
7722void
7723xmlParseContent(xmlParserCtxtPtr ctxt) {
7724 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007725 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007726 ((RAW != '<') || (NXT(1) != '/'))) {
7727 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007728 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007729 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007730
7731 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007732 * First case : a Processing Instruction.
7733 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007734 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007735 xmlParsePI(ctxt);
7736 }
7737
7738 /*
7739 * Second case : a CDSection
7740 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007741 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007742 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007743 xmlParseCDSect(ctxt);
7744 }
7745
7746 /*
7747 * Third case : a comment
7748 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007749 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007750 (NXT(2) == '-') && (NXT(3) == '-')) {
7751 xmlParseComment(ctxt);
7752 ctxt->instate = XML_PARSER_CONTENT;
7753 }
7754
7755 /*
7756 * Fourth case : a sub-element.
7757 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007758 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007759 xmlParseElement(ctxt);
7760 }
7761
7762 /*
7763 * Fifth case : a reference. If if has not been resolved,
7764 * parsing returns it's Name, create the node
7765 */
7766
Daniel Veillard21a0f912001-02-25 19:54:14 +00007767 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007768 xmlParseReference(ctxt);
7769 }
7770
7771 /*
7772 * Last case, text. Note that References are handled directly.
7773 */
7774 else {
7775 xmlParseCharData(ctxt, 0);
7776 }
7777
7778 GROW;
7779 /*
7780 * Pop-up of finished entities.
7781 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007782 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007783 xmlPopInput(ctxt);
7784 SHRINK;
7785
Daniel Veillardfdc91562002-07-01 21:52:03 +00007786 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007787 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7788 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007789 ctxt->instate = XML_PARSER_EOF;
7790 break;
7791 }
7792 }
7793}
7794
7795/**
7796 * xmlParseElement:
7797 * @ctxt: an XML parser context
7798 *
7799 * parse an XML element, this is highly recursive
7800 *
7801 * [39] element ::= EmptyElemTag | STag content ETag
7802 *
7803 * [ WFC: Element Type Match ]
7804 * The Name in an element's end-tag must match the element type in the
7805 * start-tag.
7806 *
Owen Taylor3473f882001-02-23 17:55:21 +00007807 */
7808
7809void
7810xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007811 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 const xmlChar *prefix;
7813 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007814 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007815 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007816 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007818
7819 /* Capture start position */
7820 if (ctxt->record_info) {
7821 node_info.begin_pos = ctxt->input->consumed +
7822 (CUR_PTR - ctxt->input->base);
7823 node_info.begin_line = ctxt->input->line;
7824 }
7825
7826 if (ctxt->spaceNr == 0)
7827 spacePush(ctxt, -1);
7828 else
7829 spacePush(ctxt, *ctxt->space);
7830
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007831 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007832#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007833 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007834#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007835 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007836#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007837 else
7838 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007839#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007840 if (name == NULL) {
7841 spacePop(ctxt);
7842 return;
7843 }
7844 namePush(ctxt, name);
7845 ret = ctxt->node;
7846
Daniel Veillard4432df22003-09-28 18:58:27 +00007847#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007848 /*
7849 * [ VC: Root Element Type ]
7850 * The Name in the document type declaration must match the element
7851 * type of the root element.
7852 */
7853 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7854 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7855 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007856#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007857
7858 /*
7859 * Check for an Empty Element.
7860 */
7861 if ((RAW == '/') && (NXT(1) == '>')) {
7862 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 if (ctxt->sax2) {
7864 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7865 (!ctxt->disableSAX))
7866 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007867#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007868 } else {
7869 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7870 (!ctxt->disableSAX))
7871 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007872#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007873 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007874 namePop(ctxt);
7875 spacePop(ctxt);
7876 if (nsNr != ctxt->nsNr)
7877 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007878 if ( ret != NULL && ctxt->record_info ) {
7879 node_info.end_pos = ctxt->input->consumed +
7880 (CUR_PTR - ctxt->input->base);
7881 node_info.end_line = ctxt->input->line;
7882 node_info.node = ret;
7883 xmlParserAddNodeInfo(ctxt, &node_info);
7884 }
7885 return;
7886 }
7887 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007888 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007889 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007890 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7891 "Couldn't find end of Start Tag %s line %d\n",
7892 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007893
7894 /*
7895 * end of parsing of this node.
7896 */
7897 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007898 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007899 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900 if (nsNr != ctxt->nsNr)
7901 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007902
7903 /*
7904 * Capture end position and add node
7905 */
7906 if ( ret != NULL && ctxt->record_info ) {
7907 node_info.end_pos = ctxt->input->consumed +
7908 (CUR_PTR - ctxt->input->base);
7909 node_info.end_line = ctxt->input->line;
7910 node_info.node = ret;
7911 xmlParserAddNodeInfo(ctxt, &node_info);
7912 }
7913 return;
7914 }
7915
7916 /*
7917 * Parse the content of the element:
7918 */
7919 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007920 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007921 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007922 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007923 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007924
7925 /*
7926 * end of parsing of this node.
7927 */
7928 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007930 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931 if (nsNr != ctxt->nsNr)
7932 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007933 return;
7934 }
7935
7936 /*
7937 * parse the end of tag: '</' should be here.
7938 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007940 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007941 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007942 }
7943#ifdef LIBXML_SAX1_ENABLED
7944 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007946#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007947
7948 /*
7949 * Capture end position and add node
7950 */
7951 if ( ret != NULL && ctxt->record_info ) {
7952 node_info.end_pos = ctxt->input->consumed +
7953 (CUR_PTR - ctxt->input->base);
7954 node_info.end_line = ctxt->input->line;
7955 node_info.node = ret;
7956 xmlParserAddNodeInfo(ctxt, &node_info);
7957 }
7958}
7959
7960/**
7961 * xmlParseVersionNum:
7962 * @ctxt: an XML parser context
7963 *
7964 * parse the XML version value.
7965 *
7966 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7967 *
7968 * Returns the string giving the XML version number, or NULL
7969 */
7970xmlChar *
7971xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7972 xmlChar *buf = NULL;
7973 int len = 0;
7974 int size = 10;
7975 xmlChar cur;
7976
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007977 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007978 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007979 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007980 return(NULL);
7981 }
7982 cur = CUR;
7983 while (((cur >= 'a') && (cur <= 'z')) ||
7984 ((cur >= 'A') && (cur <= 'Z')) ||
7985 ((cur >= '0') && (cur <= '9')) ||
7986 (cur == '_') || (cur == '.') ||
7987 (cur == ':') || (cur == '-')) {
7988 if (len + 1 >= size) {
7989 size *= 2;
7990 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7991 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007992 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007993 return(NULL);
7994 }
7995 }
7996 buf[len++] = cur;
7997 NEXT;
7998 cur=CUR;
7999 }
8000 buf[len] = 0;
8001 return(buf);
8002}
8003
8004/**
8005 * xmlParseVersionInfo:
8006 * @ctxt: an XML parser context
8007 *
8008 * parse the XML version.
8009 *
8010 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8011 *
8012 * [25] Eq ::= S? '=' S?
8013 *
8014 * Returns the version string, e.g. "1.0"
8015 */
8016
8017xmlChar *
8018xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8019 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008020
Daniel Veillarda07050d2003-10-19 14:46:32 +00008021 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008022 SKIP(7);
8023 SKIP_BLANKS;
8024 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008025 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008026 return(NULL);
8027 }
8028 NEXT;
8029 SKIP_BLANKS;
8030 if (RAW == '"') {
8031 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 version = xmlParseVersionNum(ctxt);
8033 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008034 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008035 } else
8036 NEXT;
8037 } else if (RAW == '\''){
8038 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008039 version = xmlParseVersionNum(ctxt);
8040 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008041 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008042 } else
8043 NEXT;
8044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008045 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008046 }
8047 }
8048 return(version);
8049}
8050
8051/**
8052 * xmlParseEncName:
8053 * @ctxt: an XML parser context
8054 *
8055 * parse the XML encoding name
8056 *
8057 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8058 *
8059 * Returns the encoding name value or NULL
8060 */
8061xmlChar *
8062xmlParseEncName(xmlParserCtxtPtr ctxt) {
8063 xmlChar *buf = NULL;
8064 int len = 0;
8065 int size = 10;
8066 xmlChar cur;
8067
8068 cur = CUR;
8069 if (((cur >= 'a') && (cur <= 'z')) ||
8070 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008071 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008072 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008073 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008074 return(NULL);
8075 }
8076
8077 buf[len++] = cur;
8078 NEXT;
8079 cur = CUR;
8080 while (((cur >= 'a') && (cur <= 'z')) ||
8081 ((cur >= 'A') && (cur <= 'Z')) ||
8082 ((cur >= '0') && (cur <= '9')) ||
8083 (cur == '.') || (cur == '_') ||
8084 (cur == '-')) {
8085 if (len + 1 >= size) {
8086 size *= 2;
8087 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8088 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008089 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008090 return(NULL);
8091 }
8092 }
8093 buf[len++] = cur;
8094 NEXT;
8095 cur = CUR;
8096 if (cur == 0) {
8097 SHRINK;
8098 GROW;
8099 cur = CUR;
8100 }
8101 }
8102 buf[len] = 0;
8103 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008104 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008105 }
8106 return(buf);
8107}
8108
8109/**
8110 * xmlParseEncodingDecl:
8111 * @ctxt: an XML parser context
8112 *
8113 * parse the XML encoding declaration
8114 *
8115 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8116 *
8117 * this setups the conversion filters.
8118 *
8119 * Returns the encoding value or NULL
8120 */
8121
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008122const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008123xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8124 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008125
8126 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008127 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008128 SKIP(8);
8129 SKIP_BLANKS;
8130 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008131 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008132 return(NULL);
8133 }
8134 NEXT;
8135 SKIP_BLANKS;
8136 if (RAW == '"') {
8137 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008138 encoding = xmlParseEncName(ctxt);
8139 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008140 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008141 } else
8142 NEXT;
8143 } else if (RAW == '\''){
8144 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008145 encoding = xmlParseEncName(ctxt);
8146 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 } else
8149 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008150 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008151 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008152 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008153 /*
8154 * UTF-16 encoding stwich has already taken place at this stage,
8155 * more over the little-endian/big-endian selection is already done
8156 */
8157 if ((encoding != NULL) &&
8158 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8159 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008160 if (ctxt->encoding != NULL)
8161 xmlFree((xmlChar *) ctxt->encoding);
8162 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008163 }
8164 /*
8165 * UTF-8 encoding is handled natively
8166 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008167 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008168 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8169 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008170 if (ctxt->encoding != NULL)
8171 xmlFree((xmlChar *) ctxt->encoding);
8172 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008173 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008174 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008175 xmlCharEncodingHandlerPtr handler;
8176
8177 if (ctxt->input->encoding != NULL)
8178 xmlFree((xmlChar *) ctxt->input->encoding);
8179 ctxt->input->encoding = encoding;
8180
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008181 handler = xmlFindCharEncodingHandler((const char *) encoding);
8182 if (handler != NULL) {
8183 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008184 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008185 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008186 "Unsupported encoding %s\n", encoding);
8187 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008188 }
8189 }
8190 }
8191 return(encoding);
8192}
8193
8194/**
8195 * xmlParseSDDecl:
8196 * @ctxt: an XML parser context
8197 *
8198 * parse the XML standalone declaration
8199 *
8200 * [32] SDDecl ::= S 'standalone' Eq
8201 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8202 *
8203 * [ VC: Standalone Document Declaration ]
8204 * TODO The standalone document declaration must have the value "no"
8205 * if any external markup declarations contain declarations of:
8206 * - attributes with default values, if elements to which these
8207 * attributes apply appear in the document without specifications
8208 * of values for these attributes, or
8209 * - entities (other than amp, lt, gt, apos, quot), if references
8210 * to those entities appear in the document, or
8211 * - attributes with values subject to normalization, where the
8212 * attribute appears in the document with a value which will change
8213 * as a result of normalization, or
8214 * - element types with element content, if white space occurs directly
8215 * within any instance of those types.
8216 *
8217 * Returns 1 if standalone, 0 otherwise
8218 */
8219
8220int
8221xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8222 int standalone = -1;
8223
8224 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008225 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008226 SKIP(10);
8227 SKIP_BLANKS;
8228 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008229 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008230 return(standalone);
8231 }
8232 NEXT;
8233 SKIP_BLANKS;
8234 if (RAW == '\''){
8235 NEXT;
8236 if ((RAW == 'n') && (NXT(1) == 'o')) {
8237 standalone = 0;
8238 SKIP(2);
8239 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8240 (NXT(2) == 's')) {
8241 standalone = 1;
8242 SKIP(3);
8243 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008244 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008245 }
8246 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008247 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008248 } else
8249 NEXT;
8250 } else if (RAW == '"'){
8251 NEXT;
8252 if ((RAW == 'n') && (NXT(1) == 'o')) {
8253 standalone = 0;
8254 SKIP(2);
8255 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8256 (NXT(2) == 's')) {
8257 standalone = 1;
8258 SKIP(3);
8259 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008260 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008261 }
8262 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008263 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008264 } else
8265 NEXT;
8266 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008267 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008268 }
8269 }
8270 return(standalone);
8271}
8272
8273/**
8274 * xmlParseXMLDecl:
8275 * @ctxt: an XML parser context
8276 *
8277 * parse an XML declaration header
8278 *
8279 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8280 */
8281
8282void
8283xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8284 xmlChar *version;
8285
8286 /*
8287 * We know that '<?xml' is here.
8288 */
8289 SKIP(5);
8290
William M. Brack76e95df2003-10-18 16:20:14 +00008291 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008292 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8293 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008294 }
8295 SKIP_BLANKS;
8296
8297 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008298 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008299 */
8300 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008301 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008302 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008303 } else {
8304 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8305 /*
8306 * TODO: Blueberry should be detected here
8307 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008308 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8309 "Unsupported version '%s'\n",
8310 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008311 }
8312 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008313 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008314 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008315 }
Owen Taylor3473f882001-02-23 17:55:21 +00008316
8317 /*
8318 * We may have the encoding declaration
8319 */
William M. Brack76e95df2003-10-18 16:20:14 +00008320 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008321 if ((RAW == '?') && (NXT(1) == '>')) {
8322 SKIP(2);
8323 return;
8324 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008325 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008326 }
8327 xmlParseEncodingDecl(ctxt);
8328 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8329 /*
8330 * The XML REC instructs us to stop parsing right here
8331 */
8332 return;
8333 }
8334
8335 /*
8336 * We may have the standalone status.
8337 */
William M. Brack76e95df2003-10-18 16:20:14 +00008338 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008339 if ((RAW == '?') && (NXT(1) == '>')) {
8340 SKIP(2);
8341 return;
8342 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008343 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008344 }
8345 SKIP_BLANKS;
8346 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8347
8348 SKIP_BLANKS;
8349 if ((RAW == '?') && (NXT(1) == '>')) {
8350 SKIP(2);
8351 } else if (RAW == '>') {
8352 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008353 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008354 NEXT;
8355 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008356 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008357 MOVETO_ENDTAG(CUR_PTR);
8358 NEXT;
8359 }
8360}
8361
8362/**
8363 * xmlParseMisc:
8364 * @ctxt: an XML parser context
8365 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008366 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008367 *
8368 * [27] Misc ::= Comment | PI | S
8369 */
8370
8371void
8372xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008373 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008374 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008375 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008376 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008377 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008378 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008379 NEXT;
8380 } else
8381 xmlParseComment(ctxt);
8382 }
8383}
8384
8385/**
8386 * xmlParseDocument:
8387 * @ctxt: an XML parser context
8388 *
8389 * parse an XML document (and build a tree if using the standard SAX
8390 * interface).
8391 *
8392 * [1] document ::= prolog element Misc*
8393 *
8394 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8395 *
8396 * Returns 0, -1 in case of error. the parser context is augmented
8397 * as a result of the parsing.
8398 */
8399
8400int
8401xmlParseDocument(xmlParserCtxtPtr ctxt) {
8402 xmlChar start[4];
8403 xmlCharEncoding enc;
8404
8405 xmlInitParser();
8406
8407 GROW;
8408
8409 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008410 * SAX: detecting the level.
8411 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008412 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413
8414 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008415 * SAX: beginning of the document processing.
8416 */
8417 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8418 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8419
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008420 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8421 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008422 /*
8423 * Get the 4 first bytes and decode the charset
8424 * if enc != XML_CHAR_ENCODING_NONE
8425 * plug some encoding conversion routines.
8426 */
8427 start[0] = RAW;
8428 start[1] = NXT(1);
8429 start[2] = NXT(2);
8430 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008431 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008432 if (enc != XML_CHAR_ENCODING_NONE) {
8433 xmlSwitchEncoding(ctxt, enc);
8434 }
Owen Taylor3473f882001-02-23 17:55:21 +00008435 }
8436
8437
8438 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008439 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008440 }
8441
8442 /*
8443 * Check for the XMLDecl in the Prolog.
8444 */
8445 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008446 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008447
8448 /*
8449 * Note that we will switch encoding on the fly.
8450 */
8451 xmlParseXMLDecl(ctxt);
8452 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8453 /*
8454 * The XML REC instructs us to stop parsing right here
8455 */
8456 return(-1);
8457 }
8458 ctxt->standalone = ctxt->input->standalone;
8459 SKIP_BLANKS;
8460 } else {
8461 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8462 }
8463 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8464 ctxt->sax->startDocument(ctxt->userData);
8465
8466 /*
8467 * The Misc part of the Prolog
8468 */
8469 GROW;
8470 xmlParseMisc(ctxt);
8471
8472 /*
8473 * Then possibly doc type declaration(s) and more Misc
8474 * (doctypedecl Misc*)?
8475 */
8476 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008477 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008478
8479 ctxt->inSubset = 1;
8480 xmlParseDocTypeDecl(ctxt);
8481 if (RAW == '[') {
8482 ctxt->instate = XML_PARSER_DTD;
8483 xmlParseInternalSubset(ctxt);
8484 }
8485
8486 /*
8487 * Create and update the external subset.
8488 */
8489 ctxt->inSubset = 2;
8490 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8491 (!ctxt->disableSAX))
8492 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8493 ctxt->extSubSystem, ctxt->extSubURI);
8494 ctxt->inSubset = 0;
8495
8496
8497 ctxt->instate = XML_PARSER_PROLOG;
8498 xmlParseMisc(ctxt);
8499 }
8500
8501 /*
8502 * Time to start parsing the tree itself
8503 */
8504 GROW;
8505 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008506 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8507 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008508 } else {
8509 ctxt->instate = XML_PARSER_CONTENT;
8510 xmlParseElement(ctxt);
8511 ctxt->instate = XML_PARSER_EPILOG;
8512
8513
8514 /*
8515 * The Misc part at the end
8516 */
8517 xmlParseMisc(ctxt);
8518
Daniel Veillard561b7f82002-03-20 21:55:57 +00008519 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008520 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008521 }
8522 ctxt->instate = XML_PARSER_EOF;
8523 }
8524
8525 /*
8526 * SAX: end of the document processing.
8527 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008528 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008529 ctxt->sax->endDocument(ctxt->userData);
8530
Daniel Veillard5997aca2002-03-18 18:36:20 +00008531 /*
8532 * Remove locally kept entity definitions if the tree was not built
8533 */
8534 if ((ctxt->myDoc != NULL) &&
8535 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8536 xmlFreeDoc(ctxt->myDoc);
8537 ctxt->myDoc = NULL;
8538 }
8539
Daniel Veillardc7612992002-02-17 22:47:37 +00008540 if (! ctxt->wellFormed) {
8541 ctxt->valid = 0;
8542 return(-1);
8543 }
Owen Taylor3473f882001-02-23 17:55:21 +00008544 return(0);
8545}
8546
8547/**
8548 * xmlParseExtParsedEnt:
8549 * @ctxt: an XML parser context
8550 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008551 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008552 * An external general parsed entity is well-formed if it matches the
8553 * production labeled extParsedEnt.
8554 *
8555 * [78] extParsedEnt ::= TextDecl? content
8556 *
8557 * Returns 0, -1 in case of error. the parser context is augmented
8558 * as a result of the parsing.
8559 */
8560
8561int
8562xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8563 xmlChar start[4];
8564 xmlCharEncoding enc;
8565
8566 xmlDefaultSAXHandlerInit();
8567
Daniel Veillard309f81d2003-09-23 09:02:53 +00008568 xmlDetectSAX2(ctxt);
8569
Owen Taylor3473f882001-02-23 17:55:21 +00008570 GROW;
8571
8572 /*
8573 * SAX: beginning of the document processing.
8574 */
8575 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8576 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8577
8578 /*
8579 * Get the 4 first bytes and decode the charset
8580 * if enc != XML_CHAR_ENCODING_NONE
8581 * plug some encoding conversion routines.
8582 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008583 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8584 start[0] = RAW;
8585 start[1] = NXT(1);
8586 start[2] = NXT(2);
8587 start[3] = NXT(3);
8588 enc = xmlDetectCharEncoding(start, 4);
8589 if (enc != XML_CHAR_ENCODING_NONE) {
8590 xmlSwitchEncoding(ctxt, enc);
8591 }
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
8593
8594
8595 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597 }
8598
8599 /*
8600 * Check for the XMLDecl in the Prolog.
8601 */
8602 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008603 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008604
8605 /*
8606 * Note that we will switch encoding on the fly.
8607 */
8608 xmlParseXMLDecl(ctxt);
8609 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8610 /*
8611 * The XML REC instructs us to stop parsing right here
8612 */
8613 return(-1);
8614 }
8615 SKIP_BLANKS;
8616 } else {
8617 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8618 }
8619 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8620 ctxt->sax->startDocument(ctxt->userData);
8621
8622 /*
8623 * Doing validity checking on chunk doesn't make sense
8624 */
8625 ctxt->instate = XML_PARSER_CONTENT;
8626 ctxt->validate = 0;
8627 ctxt->loadsubset = 0;
8628 ctxt->depth = 0;
8629
8630 xmlParseContent(ctxt);
8631
8632 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008633 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008635 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 }
8637
8638 /*
8639 * SAX: end of the document processing.
8640 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008641 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008642 ctxt->sax->endDocument(ctxt->userData);
8643
8644 if (! ctxt->wellFormed) return(-1);
8645 return(0);
8646}
8647
Daniel Veillard73b013f2003-09-30 12:36:01 +00008648#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008649/************************************************************************
8650 * *
8651 * Progressive parsing interfaces *
8652 * *
8653 ************************************************************************/
8654
8655/**
8656 * xmlParseLookupSequence:
8657 * @ctxt: an XML parser context
8658 * @first: the first char to lookup
8659 * @next: the next char to lookup or zero
8660 * @third: the next char to lookup or zero
8661 *
8662 * Try to find if a sequence (first, next, third) or just (first next) or
8663 * (first) is available in the input stream.
8664 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8665 * to avoid rescanning sequences of bytes, it DOES change the state of the
8666 * parser, do not use liberally.
8667 *
8668 * Returns the index to the current parsing point if the full sequence
8669 * is available, -1 otherwise.
8670 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008671static int
Owen Taylor3473f882001-02-23 17:55:21 +00008672xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8673 xmlChar next, xmlChar third) {
8674 int base, len;
8675 xmlParserInputPtr in;
8676 const xmlChar *buf;
8677
8678 in = ctxt->input;
8679 if (in == NULL) return(-1);
8680 base = in->cur - in->base;
8681 if (base < 0) return(-1);
8682 if (ctxt->checkIndex > base)
8683 base = ctxt->checkIndex;
8684 if (in->buf == NULL) {
8685 buf = in->base;
8686 len = in->length;
8687 } else {
8688 buf = in->buf->buffer->content;
8689 len = in->buf->buffer->use;
8690 }
8691 /* take into account the sequence length */
8692 if (third) len -= 2;
8693 else if (next) len --;
8694 for (;base < len;base++) {
8695 if (buf[base] == first) {
8696 if (third != 0) {
8697 if ((buf[base + 1] != next) ||
8698 (buf[base + 2] != third)) continue;
8699 } else if (next != 0) {
8700 if (buf[base + 1] != next) continue;
8701 }
8702 ctxt->checkIndex = 0;
8703#ifdef DEBUG_PUSH
8704 if (next == 0)
8705 xmlGenericError(xmlGenericErrorContext,
8706 "PP: lookup '%c' found at %d\n",
8707 first, base);
8708 else if (third == 0)
8709 xmlGenericError(xmlGenericErrorContext,
8710 "PP: lookup '%c%c' found at %d\n",
8711 first, next, base);
8712 else
8713 xmlGenericError(xmlGenericErrorContext,
8714 "PP: lookup '%c%c%c' found at %d\n",
8715 first, next, third, base);
8716#endif
8717 return(base - (in->cur - in->base));
8718 }
8719 }
8720 ctxt->checkIndex = base;
8721#ifdef DEBUG_PUSH
8722 if (next == 0)
8723 xmlGenericError(xmlGenericErrorContext,
8724 "PP: lookup '%c' failed\n", first);
8725 else if (third == 0)
8726 xmlGenericError(xmlGenericErrorContext,
8727 "PP: lookup '%c%c' failed\n", first, next);
8728 else
8729 xmlGenericError(xmlGenericErrorContext,
8730 "PP: lookup '%c%c%c' failed\n", first, next, third);
8731#endif
8732 return(-1);
8733}
8734
8735/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008736 * xmlParseGetLasts:
8737 * @ctxt: an XML parser context
8738 * @lastlt: pointer to store the last '<' from the input
8739 * @lastgt: pointer to store the last '>' from the input
8740 *
8741 * Lookup the last < and > in the current chunk
8742 */
8743static void
8744xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8745 const xmlChar **lastgt) {
8746 const xmlChar *tmp;
8747
8748 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8749 xmlGenericError(xmlGenericErrorContext,
8750 "Internal error: xmlParseGetLasts\n");
8751 return;
8752 }
8753 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8754 tmp = ctxt->input->end;
8755 tmp--;
8756 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8757 (*tmp != '>')) tmp--;
8758 if (tmp < ctxt->input->base) {
8759 *lastlt = NULL;
8760 *lastgt = NULL;
8761 } else if (*tmp == '<') {
8762 *lastlt = tmp;
8763 tmp--;
8764 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8765 if (tmp < ctxt->input->base)
8766 *lastgt = NULL;
8767 else
8768 *lastgt = tmp;
8769 } else {
8770 *lastgt = tmp;
8771 tmp--;
8772 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8773 if (tmp < ctxt->input->base)
8774 *lastlt = NULL;
8775 else
8776 *lastlt = tmp;
8777 }
8778
8779 } else {
8780 *lastlt = NULL;
8781 *lastgt = NULL;
8782 }
8783}
8784/**
Owen Taylor3473f882001-02-23 17:55:21 +00008785 * xmlParseTryOrFinish:
8786 * @ctxt: an XML parser context
8787 * @terminate: last chunk indicator
8788 *
8789 * Try to progress on parsing
8790 *
8791 * Returns zero if no parsing was possible
8792 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008793static int
Owen Taylor3473f882001-02-23 17:55:21 +00008794xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8795 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008796 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008797 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008798 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008799
8800#ifdef DEBUG_PUSH
8801 switch (ctxt->instate) {
8802 case XML_PARSER_EOF:
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: try EOF\n"); break;
8805 case XML_PARSER_START:
8806 xmlGenericError(xmlGenericErrorContext,
8807 "PP: try START\n"); break;
8808 case XML_PARSER_MISC:
8809 xmlGenericError(xmlGenericErrorContext,
8810 "PP: try MISC\n");break;
8811 case XML_PARSER_COMMENT:
8812 xmlGenericError(xmlGenericErrorContext,
8813 "PP: try COMMENT\n");break;
8814 case XML_PARSER_PROLOG:
8815 xmlGenericError(xmlGenericErrorContext,
8816 "PP: try PROLOG\n");break;
8817 case XML_PARSER_START_TAG:
8818 xmlGenericError(xmlGenericErrorContext,
8819 "PP: try START_TAG\n");break;
8820 case XML_PARSER_CONTENT:
8821 xmlGenericError(xmlGenericErrorContext,
8822 "PP: try CONTENT\n");break;
8823 case XML_PARSER_CDATA_SECTION:
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: try CDATA_SECTION\n");break;
8826 case XML_PARSER_END_TAG:
8827 xmlGenericError(xmlGenericErrorContext,
8828 "PP: try END_TAG\n");break;
8829 case XML_PARSER_ENTITY_DECL:
8830 xmlGenericError(xmlGenericErrorContext,
8831 "PP: try ENTITY_DECL\n");break;
8832 case XML_PARSER_ENTITY_VALUE:
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: try ENTITY_VALUE\n");break;
8835 case XML_PARSER_ATTRIBUTE_VALUE:
8836 xmlGenericError(xmlGenericErrorContext,
8837 "PP: try ATTRIBUTE_VALUE\n");break;
8838 case XML_PARSER_DTD:
8839 xmlGenericError(xmlGenericErrorContext,
8840 "PP: try DTD\n");break;
8841 case XML_PARSER_EPILOG:
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: try EPILOG\n");break;
8844 case XML_PARSER_PI:
8845 xmlGenericError(xmlGenericErrorContext,
8846 "PP: try PI\n");break;
8847 case XML_PARSER_IGNORE:
8848 xmlGenericError(xmlGenericErrorContext,
8849 "PP: try IGNORE\n");break;
8850 }
8851#endif
8852
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008853 if ((ctxt->input != NULL) &&
8854 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008855 xmlSHRINK(ctxt);
8856 ctxt->checkIndex = 0;
8857 }
8858 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008859
Daniel Veillarda880b122003-04-21 21:36:41 +00008860 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008861 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8862 return(0);
8863
8864
Owen Taylor3473f882001-02-23 17:55:21 +00008865 /*
8866 * Pop-up of finished entities.
8867 */
8868 while ((RAW == 0) && (ctxt->inputNr > 1))
8869 xmlPopInput(ctxt);
8870
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008871 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008872 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008873 avail = ctxt->input->length -
8874 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008875 else {
8876 /*
8877 * If we are operating on converted input, try to flush
8878 * remainng chars to avoid them stalling in the non-converted
8879 * buffer.
8880 */
8881 if ((ctxt->input->buf->raw != NULL) &&
8882 (ctxt->input->buf->raw->use > 0)) {
8883 int base = ctxt->input->base -
8884 ctxt->input->buf->buffer->content;
8885 int current = ctxt->input->cur - ctxt->input->base;
8886
8887 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8888 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8889 ctxt->input->cur = ctxt->input->base + current;
8890 ctxt->input->end =
8891 &ctxt->input->buf->buffer->content[
8892 ctxt->input->buf->buffer->use];
8893 }
8894 avail = ctxt->input->buf->buffer->use -
8895 (ctxt->input->cur - ctxt->input->base);
8896 }
Owen Taylor3473f882001-02-23 17:55:21 +00008897 if (avail < 1)
8898 goto done;
8899 switch (ctxt->instate) {
8900 case XML_PARSER_EOF:
8901 /*
8902 * Document parsing is done !
8903 */
8904 goto done;
8905 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008906 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8907 xmlChar start[4];
8908 xmlCharEncoding enc;
8909
8910 /*
8911 * Very first chars read from the document flow.
8912 */
8913 if (avail < 4)
8914 goto done;
8915
8916 /*
8917 * Get the 4 first bytes and decode the charset
8918 * if enc != XML_CHAR_ENCODING_NONE
8919 * plug some encoding conversion routines.
8920 */
8921 start[0] = RAW;
8922 start[1] = NXT(1);
8923 start[2] = NXT(2);
8924 start[3] = NXT(3);
8925 enc = xmlDetectCharEncoding(start, 4);
8926 if (enc != XML_CHAR_ENCODING_NONE) {
8927 xmlSwitchEncoding(ctxt, enc);
8928 }
8929 break;
8930 }
Owen Taylor3473f882001-02-23 17:55:21 +00008931
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008932 if (avail < 2)
8933 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008934 cur = ctxt->input->cur[0];
8935 next = ctxt->input->cur[1];
8936 if (cur == 0) {
8937 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8938 ctxt->sax->setDocumentLocator(ctxt->userData,
8939 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008940 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008941 ctxt->instate = XML_PARSER_EOF;
8942#ifdef DEBUG_PUSH
8943 xmlGenericError(xmlGenericErrorContext,
8944 "PP: entering EOF\n");
8945#endif
8946 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8947 ctxt->sax->endDocument(ctxt->userData);
8948 goto done;
8949 }
8950 if ((cur == '<') && (next == '?')) {
8951 /* PI or XML decl */
8952 if (avail < 5) return(ret);
8953 if ((!terminate) &&
8954 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8955 return(ret);
8956 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8957 ctxt->sax->setDocumentLocator(ctxt->userData,
8958 &xmlDefaultSAXLocator);
8959 if ((ctxt->input->cur[2] == 'x') &&
8960 (ctxt->input->cur[3] == 'm') &&
8961 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008962 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008963 ret += 5;
8964#ifdef DEBUG_PUSH
8965 xmlGenericError(xmlGenericErrorContext,
8966 "PP: Parsing XML Decl\n");
8967#endif
8968 xmlParseXMLDecl(ctxt);
8969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8970 /*
8971 * The XML REC instructs us to stop parsing right
8972 * here
8973 */
8974 ctxt->instate = XML_PARSER_EOF;
8975 return(0);
8976 }
8977 ctxt->standalone = ctxt->input->standalone;
8978 if ((ctxt->encoding == NULL) &&
8979 (ctxt->input->encoding != NULL))
8980 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8981 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8982 (!ctxt->disableSAX))
8983 ctxt->sax->startDocument(ctxt->userData);
8984 ctxt->instate = XML_PARSER_MISC;
8985#ifdef DEBUG_PUSH
8986 xmlGenericError(xmlGenericErrorContext,
8987 "PP: entering MISC\n");
8988#endif
8989 } else {
8990 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8991 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8992 (!ctxt->disableSAX))
8993 ctxt->sax->startDocument(ctxt->userData);
8994 ctxt->instate = XML_PARSER_MISC;
8995#ifdef DEBUG_PUSH
8996 xmlGenericError(xmlGenericErrorContext,
8997 "PP: entering MISC\n");
8998#endif
8999 }
9000 } else {
9001 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9002 ctxt->sax->setDocumentLocator(ctxt->userData,
9003 &xmlDefaultSAXLocator);
9004 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9005 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9006 (!ctxt->disableSAX))
9007 ctxt->sax->startDocument(ctxt->userData);
9008 ctxt->instate = XML_PARSER_MISC;
9009#ifdef DEBUG_PUSH
9010 xmlGenericError(xmlGenericErrorContext,
9011 "PP: entering MISC\n");
9012#endif
9013 }
9014 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009015 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009016 const xmlChar *name;
9017 const xmlChar *prefix;
9018 const xmlChar *URI;
9019 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009020
9021 if ((avail < 2) && (ctxt->inputNr == 1))
9022 goto done;
9023 cur = ctxt->input->cur[0];
9024 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009025 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009026 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009027 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9028 ctxt->sax->endDocument(ctxt->userData);
9029 goto done;
9030 }
9031 if (!terminate) {
9032 if (ctxt->progressive) {
9033 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9034 goto done;
9035 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9036 goto done;
9037 }
9038 }
9039 if (ctxt->spaceNr == 0)
9040 spacePush(ctxt, -1);
9041 else
9042 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009043#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009044 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009045#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009046 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009047#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009048 else
9049 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009050#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009051 if (name == NULL) {
9052 spacePop(ctxt);
9053 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009054 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9055 ctxt->sax->endDocument(ctxt->userData);
9056 goto done;
9057 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009058#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009059 /*
9060 * [ VC: Root Element Type ]
9061 * The Name in the document type declaration must match
9062 * the element type of the root element.
9063 */
9064 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9065 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9066 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009067#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009068
9069 /*
9070 * Check for an Empty Element.
9071 */
9072 if ((RAW == '/') && (NXT(1) == '>')) {
9073 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009074
9075 if (ctxt->sax2) {
9076 if ((ctxt->sax != NULL) &&
9077 (ctxt->sax->endElementNs != NULL) &&
9078 (!ctxt->disableSAX))
9079 ctxt->sax->endElementNs(ctxt->userData, name,
9080 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009081#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009082 } else {
9083 if ((ctxt->sax != NULL) &&
9084 (ctxt->sax->endElement != NULL) &&
9085 (!ctxt->disableSAX))
9086 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009087#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009088 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009089 spacePop(ctxt);
9090 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009091 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009092 } else {
9093 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009094 }
9095 break;
9096 }
9097 if (RAW == '>') {
9098 NEXT;
9099 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009100 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009101 "Couldn't find end of Start Tag %s\n",
9102 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009103 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009104 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009105 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009106 if (ctxt->sax2)
9107 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009108#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009109 else
9110 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009111#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009112
Daniel Veillarda880b122003-04-21 21:36:41 +00009113 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009114 break;
9115 }
9116 case XML_PARSER_CONTENT: {
9117 const xmlChar *test;
9118 unsigned int cons;
9119 if ((avail < 2) && (ctxt->inputNr == 1))
9120 goto done;
9121 cur = ctxt->input->cur[0];
9122 next = ctxt->input->cur[1];
9123
9124 test = CUR_PTR;
9125 cons = ctxt->input->consumed;
9126 if ((cur == '<') && (next == '/')) {
9127 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009128 break;
9129 } else if ((cur == '<') && (next == '?')) {
9130 if ((!terminate) &&
9131 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9132 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009133 xmlParsePI(ctxt);
9134 } else if ((cur == '<') && (next != '!')) {
9135 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009136 break;
9137 } else if ((cur == '<') && (next == '!') &&
9138 (ctxt->input->cur[2] == '-') &&
9139 (ctxt->input->cur[3] == '-')) {
9140 if ((!terminate) &&
9141 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9142 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009143 xmlParseComment(ctxt);
9144 ctxt->instate = XML_PARSER_CONTENT;
9145 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9146 (ctxt->input->cur[2] == '[') &&
9147 (ctxt->input->cur[3] == 'C') &&
9148 (ctxt->input->cur[4] == 'D') &&
9149 (ctxt->input->cur[5] == 'A') &&
9150 (ctxt->input->cur[6] == 'T') &&
9151 (ctxt->input->cur[7] == 'A') &&
9152 (ctxt->input->cur[8] == '[')) {
9153 SKIP(9);
9154 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009155 break;
9156 } else if ((cur == '<') && (next == '!') &&
9157 (avail < 9)) {
9158 goto done;
9159 } else if (cur == '&') {
9160 if ((!terminate) &&
9161 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9162 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009163 xmlParseReference(ctxt);
9164 } else {
9165 /* TODO Avoid the extra copy, handle directly !!! */
9166 /*
9167 * Goal of the following test is:
9168 * - minimize calls to the SAX 'character' callback
9169 * when they are mergeable
9170 * - handle an problem for isBlank when we only parse
9171 * a sequence of blank chars and the next one is
9172 * not available to check against '<' presence.
9173 * - tries to homogenize the differences in SAX
9174 * callbacks between the push and pull versions
9175 * of the parser.
9176 */
9177 if ((ctxt->inputNr == 1) &&
9178 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9179 if (!terminate) {
9180 if (ctxt->progressive) {
9181 if ((lastlt == NULL) ||
9182 (ctxt->input->cur > lastlt))
9183 goto done;
9184 } else if (xmlParseLookupSequence(ctxt,
9185 '<', 0, 0) < 0) {
9186 goto done;
9187 }
9188 }
9189 }
9190 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009191 xmlParseCharData(ctxt, 0);
9192 }
9193 /*
9194 * Pop-up of finished entities.
9195 */
9196 while ((RAW == 0) && (ctxt->inputNr > 1))
9197 xmlPopInput(ctxt);
9198 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009199 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9200 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009201 ctxt->instate = XML_PARSER_EOF;
9202 break;
9203 }
9204 break;
9205 }
9206 case XML_PARSER_END_TAG:
9207 if (avail < 2)
9208 goto done;
9209 if (!terminate) {
9210 if (ctxt->progressive) {
9211 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9212 goto done;
9213 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9214 goto done;
9215 }
9216 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009217 if (ctxt->sax2) {
9218 xmlParseEndTag2(ctxt,
9219 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9220 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009221 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009222 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009223 }
9224#ifdef LIBXML_SAX1_ENABLED
9225 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009226 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009227#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009228 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009230 } else {
9231 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009232 }
9233 break;
9234 case XML_PARSER_CDATA_SECTION: {
9235 /*
9236 * The Push mode need to have the SAX callback for
9237 * cdataBlock merge back contiguous callbacks.
9238 */
9239 int base;
9240
9241 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9242 if (base < 0) {
9243 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9244 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9245 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009246 ctxt->sax->cdataBlock(ctxt->userData,
9247 ctxt->input->cur,
9248 XML_PARSER_BIG_BUFFER_SIZE);
9249 else if (ctxt->sax->characters != NULL)
9250 ctxt->sax->characters(ctxt->userData,
9251 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009252 XML_PARSER_BIG_BUFFER_SIZE);
9253 }
9254 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9255 ctxt->checkIndex = 0;
9256 }
9257 goto done;
9258 } else {
9259 if ((ctxt->sax != NULL) && (base > 0) &&
9260 (!ctxt->disableSAX)) {
9261 if (ctxt->sax->cdataBlock != NULL)
9262 ctxt->sax->cdataBlock(ctxt->userData,
9263 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009264 else if (ctxt->sax->characters != NULL)
9265 ctxt->sax->characters(ctxt->userData,
9266 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009267 }
9268 SKIP(base + 3);
9269 ctxt->checkIndex = 0;
9270 ctxt->instate = XML_PARSER_CONTENT;
9271#ifdef DEBUG_PUSH
9272 xmlGenericError(xmlGenericErrorContext,
9273 "PP: entering CONTENT\n");
9274#endif
9275 }
9276 break;
9277 }
Owen Taylor3473f882001-02-23 17:55:21 +00009278 case XML_PARSER_MISC:
9279 SKIP_BLANKS;
9280 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009281 avail = ctxt->input->length -
9282 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009283 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009284 avail = ctxt->input->buf->buffer->use -
9285 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009286 if (avail < 2)
9287 goto done;
9288 cur = ctxt->input->cur[0];
9289 next = ctxt->input->cur[1];
9290 if ((cur == '<') && (next == '?')) {
9291 if ((!terminate) &&
9292 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9293 goto done;
9294#ifdef DEBUG_PUSH
9295 xmlGenericError(xmlGenericErrorContext,
9296 "PP: Parsing PI\n");
9297#endif
9298 xmlParsePI(ctxt);
9299 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009300 (ctxt->input->cur[2] == '-') &&
9301 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009302 if ((!terminate) &&
9303 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9304 goto done;
9305#ifdef DEBUG_PUSH
9306 xmlGenericError(xmlGenericErrorContext,
9307 "PP: Parsing Comment\n");
9308#endif
9309 xmlParseComment(ctxt);
9310 ctxt->instate = XML_PARSER_MISC;
9311 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009312 (ctxt->input->cur[2] == 'D') &&
9313 (ctxt->input->cur[3] == 'O') &&
9314 (ctxt->input->cur[4] == 'C') &&
9315 (ctxt->input->cur[5] == 'T') &&
9316 (ctxt->input->cur[6] == 'Y') &&
9317 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009318 (ctxt->input->cur[8] == 'E')) {
9319 if ((!terminate) &&
9320 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9321 goto done;
9322#ifdef DEBUG_PUSH
9323 xmlGenericError(xmlGenericErrorContext,
9324 "PP: Parsing internal subset\n");
9325#endif
9326 ctxt->inSubset = 1;
9327 xmlParseDocTypeDecl(ctxt);
9328 if (RAW == '[') {
9329 ctxt->instate = XML_PARSER_DTD;
9330#ifdef DEBUG_PUSH
9331 xmlGenericError(xmlGenericErrorContext,
9332 "PP: entering DTD\n");
9333#endif
9334 } else {
9335 /*
9336 * Create and update the external subset.
9337 */
9338 ctxt->inSubset = 2;
9339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9340 (ctxt->sax->externalSubset != NULL))
9341 ctxt->sax->externalSubset(ctxt->userData,
9342 ctxt->intSubName, ctxt->extSubSystem,
9343 ctxt->extSubURI);
9344 ctxt->inSubset = 0;
9345 ctxt->instate = XML_PARSER_PROLOG;
9346#ifdef DEBUG_PUSH
9347 xmlGenericError(xmlGenericErrorContext,
9348 "PP: entering PROLOG\n");
9349#endif
9350 }
9351 } else if ((cur == '<') && (next == '!') &&
9352 (avail < 9)) {
9353 goto done;
9354 } else {
9355 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009356 ctxt->progressive = 1;
9357 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009358#ifdef DEBUG_PUSH
9359 xmlGenericError(xmlGenericErrorContext,
9360 "PP: entering START_TAG\n");
9361#endif
9362 }
9363 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009364 case XML_PARSER_PROLOG:
9365 SKIP_BLANKS;
9366 if (ctxt->input->buf == NULL)
9367 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9368 else
9369 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9370 if (avail < 2)
9371 goto done;
9372 cur = ctxt->input->cur[0];
9373 next = ctxt->input->cur[1];
9374 if ((cur == '<') && (next == '?')) {
9375 if ((!terminate) &&
9376 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9377 goto done;
9378#ifdef DEBUG_PUSH
9379 xmlGenericError(xmlGenericErrorContext,
9380 "PP: Parsing PI\n");
9381#endif
9382 xmlParsePI(ctxt);
9383 } else if ((cur == '<') && (next == '!') &&
9384 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9385 if ((!terminate) &&
9386 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9387 goto done;
9388#ifdef DEBUG_PUSH
9389 xmlGenericError(xmlGenericErrorContext,
9390 "PP: Parsing Comment\n");
9391#endif
9392 xmlParseComment(ctxt);
9393 ctxt->instate = XML_PARSER_PROLOG;
9394 } else if ((cur == '<') && (next == '!') &&
9395 (avail < 4)) {
9396 goto done;
9397 } else {
9398 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009399 ctxt->progressive = 1;
9400 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009401#ifdef DEBUG_PUSH
9402 xmlGenericError(xmlGenericErrorContext,
9403 "PP: entering START_TAG\n");
9404#endif
9405 }
9406 break;
9407 case XML_PARSER_EPILOG:
9408 SKIP_BLANKS;
9409 if (ctxt->input->buf == NULL)
9410 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9411 else
9412 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9413 if (avail < 2)
9414 goto done;
9415 cur = ctxt->input->cur[0];
9416 next = ctxt->input->cur[1];
9417 if ((cur == '<') && (next == '?')) {
9418 if ((!terminate) &&
9419 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9420 goto done;
9421#ifdef DEBUG_PUSH
9422 xmlGenericError(xmlGenericErrorContext,
9423 "PP: Parsing PI\n");
9424#endif
9425 xmlParsePI(ctxt);
9426 ctxt->instate = XML_PARSER_EPILOG;
9427 } else if ((cur == '<') && (next == '!') &&
9428 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9429 if ((!terminate) &&
9430 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9431 goto done;
9432#ifdef DEBUG_PUSH
9433 xmlGenericError(xmlGenericErrorContext,
9434 "PP: Parsing Comment\n");
9435#endif
9436 xmlParseComment(ctxt);
9437 ctxt->instate = XML_PARSER_EPILOG;
9438 } else if ((cur == '<') && (next == '!') &&
9439 (avail < 4)) {
9440 goto done;
9441 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009442 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009443 ctxt->instate = XML_PARSER_EOF;
9444#ifdef DEBUG_PUSH
9445 xmlGenericError(xmlGenericErrorContext,
9446 "PP: entering EOF\n");
9447#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009448 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009449 ctxt->sax->endDocument(ctxt->userData);
9450 goto done;
9451 }
9452 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009453 case XML_PARSER_DTD: {
9454 /*
9455 * Sorry but progressive parsing of the internal subset
9456 * is not expected to be supported. We first check that
9457 * the full content of the internal subset is available and
9458 * the parsing is launched only at that point.
9459 * Internal subset ends up with "']' S? '>'" in an unescaped
9460 * section and not in a ']]>' sequence which are conditional
9461 * sections (whoever argued to keep that crap in XML deserve
9462 * a place in hell !).
9463 */
9464 int base, i;
9465 xmlChar *buf;
9466 xmlChar quote = 0;
9467
9468 base = ctxt->input->cur - ctxt->input->base;
9469 if (base < 0) return(0);
9470 if (ctxt->checkIndex > base)
9471 base = ctxt->checkIndex;
9472 buf = ctxt->input->buf->buffer->content;
9473 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9474 base++) {
9475 if (quote != 0) {
9476 if (buf[base] == quote)
9477 quote = 0;
9478 continue;
9479 }
9480 if (buf[base] == '"') {
9481 quote = '"';
9482 continue;
9483 }
9484 if (buf[base] == '\'') {
9485 quote = '\'';
9486 continue;
9487 }
9488 if (buf[base] == ']') {
9489 if ((unsigned int) base +1 >=
9490 ctxt->input->buf->buffer->use)
9491 break;
9492 if (buf[base + 1] == ']') {
9493 /* conditional crap, skip both ']' ! */
9494 base++;
9495 continue;
9496 }
9497 for (i = 0;
9498 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9499 i++) {
9500 if (buf[base + i] == '>')
9501 goto found_end_int_subset;
9502 }
9503 break;
9504 }
9505 }
9506 /*
9507 * We didn't found the end of the Internal subset
9508 */
9509 if (quote == 0)
9510 ctxt->checkIndex = base;
9511#ifdef DEBUG_PUSH
9512 if (next == 0)
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: lookup of int subset end filed\n");
9515#endif
9516 goto done;
9517
9518found_end_int_subset:
9519 xmlParseInternalSubset(ctxt);
9520 ctxt->inSubset = 2;
9521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9522 (ctxt->sax->externalSubset != NULL))
9523 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9524 ctxt->extSubSystem, ctxt->extSubURI);
9525 ctxt->inSubset = 0;
9526 ctxt->instate = XML_PARSER_PROLOG;
9527 ctxt->checkIndex = 0;
9528#ifdef DEBUG_PUSH
9529 xmlGenericError(xmlGenericErrorContext,
9530 "PP: entering PROLOG\n");
9531#endif
9532 break;
9533 }
9534 case XML_PARSER_COMMENT:
9535 xmlGenericError(xmlGenericErrorContext,
9536 "PP: internal error, state == COMMENT\n");
9537 ctxt->instate = XML_PARSER_CONTENT;
9538#ifdef DEBUG_PUSH
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: entering CONTENT\n");
9541#endif
9542 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009543 case XML_PARSER_IGNORE:
9544 xmlGenericError(xmlGenericErrorContext,
9545 "PP: internal error, state == IGNORE");
9546 ctxt->instate = XML_PARSER_DTD;
9547#ifdef DEBUG_PUSH
9548 xmlGenericError(xmlGenericErrorContext,
9549 "PP: entering DTD\n");
9550#endif
9551 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009552 case XML_PARSER_PI:
9553 xmlGenericError(xmlGenericErrorContext,
9554 "PP: internal error, state == PI\n");
9555 ctxt->instate = XML_PARSER_CONTENT;
9556#ifdef DEBUG_PUSH
9557 xmlGenericError(xmlGenericErrorContext,
9558 "PP: entering CONTENT\n");
9559#endif
9560 break;
9561 case XML_PARSER_ENTITY_DECL:
9562 xmlGenericError(xmlGenericErrorContext,
9563 "PP: internal error, state == ENTITY_DECL\n");
9564 ctxt->instate = XML_PARSER_DTD;
9565#ifdef DEBUG_PUSH
9566 xmlGenericError(xmlGenericErrorContext,
9567 "PP: entering DTD\n");
9568#endif
9569 break;
9570 case XML_PARSER_ENTITY_VALUE:
9571 xmlGenericError(xmlGenericErrorContext,
9572 "PP: internal error, state == ENTITY_VALUE\n");
9573 ctxt->instate = XML_PARSER_CONTENT;
9574#ifdef DEBUG_PUSH
9575 xmlGenericError(xmlGenericErrorContext,
9576 "PP: entering DTD\n");
9577#endif
9578 break;
9579 case XML_PARSER_ATTRIBUTE_VALUE:
9580 xmlGenericError(xmlGenericErrorContext,
9581 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9582 ctxt->instate = XML_PARSER_START_TAG;
9583#ifdef DEBUG_PUSH
9584 xmlGenericError(xmlGenericErrorContext,
9585 "PP: entering START_TAG\n");
9586#endif
9587 break;
9588 case XML_PARSER_SYSTEM_LITERAL:
9589 xmlGenericError(xmlGenericErrorContext,
9590 "PP: internal error, state == SYSTEM_LITERAL\n");
9591 ctxt->instate = XML_PARSER_START_TAG;
9592#ifdef DEBUG_PUSH
9593 xmlGenericError(xmlGenericErrorContext,
9594 "PP: entering START_TAG\n");
9595#endif
9596 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009597 case XML_PARSER_PUBLIC_LITERAL:
9598 xmlGenericError(xmlGenericErrorContext,
9599 "PP: internal error, state == PUBLIC_LITERAL\n");
9600 ctxt->instate = XML_PARSER_START_TAG;
9601#ifdef DEBUG_PUSH
9602 xmlGenericError(xmlGenericErrorContext,
9603 "PP: entering START_TAG\n");
9604#endif
9605 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009606 }
9607 }
9608done:
9609#ifdef DEBUG_PUSH
9610 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9611#endif
9612 return(ret);
9613}
9614
9615/**
Owen Taylor3473f882001-02-23 17:55:21 +00009616 * xmlParseChunk:
9617 * @ctxt: an XML parser context
9618 * @chunk: an char array
9619 * @size: the size in byte of the chunk
9620 * @terminate: last chunk indicator
9621 *
9622 * Parse a Chunk of memory
9623 *
9624 * Returns zero if no error, the xmlParserErrors otherwise.
9625 */
9626int
9627xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9628 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009629 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9630 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009631 if (ctxt->instate == XML_PARSER_START)
9632 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009633 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9634 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9635 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9636 int cur = ctxt->input->cur - ctxt->input->base;
9637
9638 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9639 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9640 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009641 ctxt->input->end =
9642 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009643#ifdef DEBUG_PUSH
9644 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9645#endif
9646
Owen Taylor3473f882001-02-23 17:55:21 +00009647 } else if (ctxt->instate != XML_PARSER_EOF) {
9648 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9649 xmlParserInputBufferPtr in = ctxt->input->buf;
9650 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9651 (in->raw != NULL)) {
9652 int nbchars;
9653
9654 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9655 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009656 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009657 xmlGenericError(xmlGenericErrorContext,
9658 "xmlParseChunk: encoder error\n");
9659 return(XML_ERR_INVALID_ENCODING);
9660 }
9661 }
9662 }
9663 }
9664 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009665 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9666 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009667 if (terminate) {
9668 /*
9669 * Check for termination
9670 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009671 int avail = 0;
9672 if (ctxt->input->buf == NULL)
9673 avail = ctxt->input->length -
9674 (ctxt->input->cur - ctxt->input->base);
9675 else
9676 avail = ctxt->input->buf->buffer->use -
9677 (ctxt->input->cur - ctxt->input->base);
9678
Owen Taylor3473f882001-02-23 17:55:21 +00009679 if ((ctxt->instate != XML_PARSER_EOF) &&
9680 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009681 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009682 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009683 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009684 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009685 }
Owen Taylor3473f882001-02-23 17:55:21 +00009686 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009687 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009688 ctxt->sax->endDocument(ctxt->userData);
9689 }
9690 ctxt->instate = XML_PARSER_EOF;
9691 }
9692 return((xmlParserErrors) ctxt->errNo);
9693}
9694
9695/************************************************************************
9696 * *
9697 * I/O front end functions to the parser *
9698 * *
9699 ************************************************************************/
9700
9701/**
9702 * xmlStopParser:
9703 * @ctxt: an XML parser context
9704 *
9705 * Blocks further parser processing
9706 */
9707void
9708xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009709 if (ctxt == NULL)
9710 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009711 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009712 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009713 if (ctxt->input != NULL)
9714 ctxt->input->cur = BAD_CAST"";
9715}
9716
9717/**
9718 * xmlCreatePushParserCtxt:
9719 * @sax: a SAX handler
9720 * @user_data: The user data returned on SAX callbacks
9721 * @chunk: a pointer to an array of chars
9722 * @size: number of chars in the array
9723 * @filename: an optional file name or URI
9724 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009725 * Create a parser context for using the XML parser in push mode.
9726 * If @buffer and @size are non-NULL, the data is used to detect
9727 * the encoding. The remaining characters will be parsed so they
9728 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009729 * To allow content encoding detection, @size should be >= 4
9730 * The value of @filename is used for fetching external entities
9731 * and error/warning reports.
9732 *
9733 * Returns the new parser context or NULL
9734 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009735
Owen Taylor3473f882001-02-23 17:55:21 +00009736xmlParserCtxtPtr
9737xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9738 const char *chunk, int size, const char *filename) {
9739 xmlParserCtxtPtr ctxt;
9740 xmlParserInputPtr inputStream;
9741 xmlParserInputBufferPtr buf;
9742 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9743
9744 /*
9745 * plug some encoding conversion routines
9746 */
9747 if ((chunk != NULL) && (size >= 4))
9748 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9749
9750 buf = xmlAllocParserInputBuffer(enc);
9751 if (buf == NULL) return(NULL);
9752
9753 ctxt = xmlNewParserCtxt();
9754 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009755 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009756 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009757 return(NULL);
9758 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009759 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9760 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009761 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009762 xmlFreeParserInputBuffer(buf);
9763 xmlFreeParserCtxt(ctxt);
9764 return(NULL);
9765 }
Owen Taylor3473f882001-02-23 17:55:21 +00009766 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009767#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009768 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009769#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009770 xmlFree(ctxt->sax);
9771 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9772 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009773 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009774 xmlFreeParserInputBuffer(buf);
9775 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009776 return(NULL);
9777 }
9778 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9779 if (user_data != NULL)
9780 ctxt->userData = user_data;
9781 }
9782 if (filename == NULL) {
9783 ctxt->directory = NULL;
9784 } else {
9785 ctxt->directory = xmlParserGetDirectory(filename);
9786 }
9787
9788 inputStream = xmlNewInputStream(ctxt);
9789 if (inputStream == NULL) {
9790 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009791 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009792 return(NULL);
9793 }
9794
9795 if (filename == NULL)
9796 inputStream->filename = NULL;
9797 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009798 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009799 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009800 inputStream->buf = buf;
9801 inputStream->base = inputStream->buf->buffer->content;
9802 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009803 inputStream->end =
9804 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009805
9806 inputPush(ctxt, inputStream);
9807
9808 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9809 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009810 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9811 int cur = ctxt->input->cur - ctxt->input->base;
9812
Owen Taylor3473f882001-02-23 17:55:21 +00009813 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009814
9815 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9816 ctxt->input->cur = ctxt->input->base + cur;
9817 ctxt->input->end =
9818 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009819#ifdef DEBUG_PUSH
9820 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9821#endif
9822 }
9823
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009824 if (enc != XML_CHAR_ENCODING_NONE) {
9825 xmlSwitchEncoding(ctxt, enc);
9826 }
9827
Owen Taylor3473f882001-02-23 17:55:21 +00009828 return(ctxt);
9829}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009830#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009831
9832/**
9833 * xmlCreateIOParserCtxt:
9834 * @sax: a SAX handler
9835 * @user_data: The user data returned on SAX callbacks
9836 * @ioread: an I/O read function
9837 * @ioclose: an I/O close function
9838 * @ioctx: an I/O handler
9839 * @enc: the charset encoding if known
9840 *
9841 * Create a parser context for using the XML parser with an existing
9842 * I/O stream
9843 *
9844 * Returns the new parser context or NULL
9845 */
9846xmlParserCtxtPtr
9847xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9848 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9849 void *ioctx, xmlCharEncoding enc) {
9850 xmlParserCtxtPtr ctxt;
9851 xmlParserInputPtr inputStream;
9852 xmlParserInputBufferPtr buf;
9853
9854 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9855 if (buf == NULL) return(NULL);
9856
9857 ctxt = xmlNewParserCtxt();
9858 if (ctxt == NULL) {
9859 xmlFree(buf);
9860 return(NULL);
9861 }
9862 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009863#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009864 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009865#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009866 xmlFree(ctxt->sax);
9867 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9868 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009869 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009870 xmlFree(ctxt);
9871 return(NULL);
9872 }
9873 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9874 if (user_data != NULL)
9875 ctxt->userData = user_data;
9876 }
9877
9878 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9879 if (inputStream == NULL) {
9880 xmlFreeParserCtxt(ctxt);
9881 return(NULL);
9882 }
9883 inputPush(ctxt, inputStream);
9884
9885 return(ctxt);
9886}
9887
Daniel Veillard4432df22003-09-28 18:58:27 +00009888#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009889/************************************************************************
9890 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009891 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009892 * *
9893 ************************************************************************/
9894
9895/**
9896 * xmlIOParseDTD:
9897 * @sax: the SAX handler block or NULL
9898 * @input: an Input Buffer
9899 * @enc: the charset encoding if known
9900 *
9901 * Load and parse a DTD
9902 *
9903 * Returns the resulting xmlDtdPtr or NULL in case of error.
9904 * @input will be freed at parsing end.
9905 */
9906
9907xmlDtdPtr
9908xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9909 xmlCharEncoding enc) {
9910 xmlDtdPtr ret = NULL;
9911 xmlParserCtxtPtr ctxt;
9912 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009913 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009914
9915 if (input == NULL)
9916 return(NULL);
9917
9918 ctxt = xmlNewParserCtxt();
9919 if (ctxt == NULL) {
9920 return(NULL);
9921 }
9922
9923 /*
9924 * Set-up the SAX context
9925 */
9926 if (sax != NULL) {
9927 if (ctxt->sax != NULL)
9928 xmlFree(ctxt->sax);
9929 ctxt->sax = sax;
9930 ctxt->userData = NULL;
9931 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009932 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009933
9934 /*
9935 * generate a parser input from the I/O handler
9936 */
9937
Daniel Veillard43caefb2003-12-07 19:32:22 +00009938 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009939 if (pinput == NULL) {
9940 if (sax != NULL) ctxt->sax = NULL;
9941 xmlFreeParserCtxt(ctxt);
9942 return(NULL);
9943 }
9944
9945 /*
9946 * plug some encoding conversion routines here.
9947 */
9948 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009949 if (enc != XML_CHAR_ENCODING_NONE) {
9950 xmlSwitchEncoding(ctxt, enc);
9951 }
Owen Taylor3473f882001-02-23 17:55:21 +00009952
9953 pinput->filename = NULL;
9954 pinput->line = 1;
9955 pinput->col = 1;
9956 pinput->base = ctxt->input->cur;
9957 pinput->cur = ctxt->input->cur;
9958 pinput->free = NULL;
9959
9960 /*
9961 * let's parse that entity knowing it's an external subset.
9962 */
9963 ctxt->inSubset = 2;
9964 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9965 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9966 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009967
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009968 if ((enc == XML_CHAR_ENCODING_NONE) &&
9969 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00009970 /*
9971 * Get the 4 first bytes and decode the charset
9972 * if enc != XML_CHAR_ENCODING_NONE
9973 * plug some encoding conversion routines.
9974 */
9975 start[0] = RAW;
9976 start[1] = NXT(1);
9977 start[2] = NXT(2);
9978 start[3] = NXT(3);
9979 enc = xmlDetectCharEncoding(start, 4);
9980 if (enc != XML_CHAR_ENCODING_NONE) {
9981 xmlSwitchEncoding(ctxt, enc);
9982 }
9983 }
9984
Owen Taylor3473f882001-02-23 17:55:21 +00009985 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9986
9987 if (ctxt->myDoc != NULL) {
9988 if (ctxt->wellFormed) {
9989 ret = ctxt->myDoc->extSubset;
9990 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009991 if (ret != NULL) {
9992 xmlNodePtr tmp;
9993
9994 ret->doc = NULL;
9995 tmp = ret->children;
9996 while (tmp != NULL) {
9997 tmp->doc = NULL;
9998 tmp = tmp->next;
9999 }
10000 }
Owen Taylor3473f882001-02-23 17:55:21 +000010001 } else {
10002 ret = NULL;
10003 }
10004 xmlFreeDoc(ctxt->myDoc);
10005 ctxt->myDoc = NULL;
10006 }
10007 if (sax != NULL) ctxt->sax = NULL;
10008 xmlFreeParserCtxt(ctxt);
10009
10010 return(ret);
10011}
10012
10013/**
10014 * xmlSAXParseDTD:
10015 * @sax: the SAX handler block
10016 * @ExternalID: a NAME* containing the External ID of the DTD
10017 * @SystemID: a NAME* containing the URL to the DTD
10018 *
10019 * Load and parse an external subset.
10020 *
10021 * Returns the resulting xmlDtdPtr or NULL in case of error.
10022 */
10023
10024xmlDtdPtr
10025xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10026 const xmlChar *SystemID) {
10027 xmlDtdPtr ret = NULL;
10028 xmlParserCtxtPtr ctxt;
10029 xmlParserInputPtr input = NULL;
10030 xmlCharEncoding enc;
10031
10032 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10033
10034 ctxt = xmlNewParserCtxt();
10035 if (ctxt == NULL) {
10036 return(NULL);
10037 }
10038
10039 /*
10040 * Set-up the SAX context
10041 */
10042 if (sax != NULL) {
10043 if (ctxt->sax != NULL)
10044 xmlFree(ctxt->sax);
10045 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010046 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010047 }
10048
10049 /*
10050 * Ask the Entity resolver to load the damn thing
10051 */
10052
10053 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010054 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010055 if (input == NULL) {
10056 if (sax != NULL) ctxt->sax = NULL;
10057 xmlFreeParserCtxt(ctxt);
10058 return(NULL);
10059 }
10060
10061 /*
10062 * plug some encoding conversion routines here.
10063 */
10064 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010065 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10066 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10067 xmlSwitchEncoding(ctxt, enc);
10068 }
Owen Taylor3473f882001-02-23 17:55:21 +000010069
10070 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010071 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010072 input->line = 1;
10073 input->col = 1;
10074 input->base = ctxt->input->cur;
10075 input->cur = ctxt->input->cur;
10076 input->free = NULL;
10077
10078 /*
10079 * let's parse that entity knowing it's an external subset.
10080 */
10081 ctxt->inSubset = 2;
10082 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10083 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10084 ExternalID, SystemID);
10085 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10086
10087 if (ctxt->myDoc != NULL) {
10088 if (ctxt->wellFormed) {
10089 ret = ctxt->myDoc->extSubset;
10090 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010091 if (ret != NULL) {
10092 xmlNodePtr tmp;
10093
10094 ret->doc = NULL;
10095 tmp = ret->children;
10096 while (tmp != NULL) {
10097 tmp->doc = NULL;
10098 tmp = tmp->next;
10099 }
10100 }
Owen Taylor3473f882001-02-23 17:55:21 +000010101 } else {
10102 ret = NULL;
10103 }
10104 xmlFreeDoc(ctxt->myDoc);
10105 ctxt->myDoc = NULL;
10106 }
10107 if (sax != NULL) ctxt->sax = NULL;
10108 xmlFreeParserCtxt(ctxt);
10109
10110 return(ret);
10111}
10112
Daniel Veillard4432df22003-09-28 18:58:27 +000010113
Owen Taylor3473f882001-02-23 17:55:21 +000010114/**
10115 * xmlParseDTD:
10116 * @ExternalID: a NAME* containing the External ID of the DTD
10117 * @SystemID: a NAME* containing the URL to the DTD
10118 *
10119 * Load and parse an external subset.
10120 *
10121 * Returns the resulting xmlDtdPtr or NULL in case of error.
10122 */
10123
10124xmlDtdPtr
10125xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10126 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10127}
Daniel Veillard4432df22003-09-28 18:58:27 +000010128#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010129
10130/************************************************************************
10131 * *
10132 * Front ends when parsing an Entity *
10133 * *
10134 ************************************************************************/
10135
10136/**
Owen Taylor3473f882001-02-23 17:55:21 +000010137 * xmlParseCtxtExternalEntity:
10138 * @ctx: the existing parsing context
10139 * @URL: the URL for the entity to load
10140 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010141 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010142 *
10143 * Parse an external general entity within an existing parsing context
10144 * An external general parsed entity is well-formed if it matches the
10145 * production labeled extParsedEnt.
10146 *
10147 * [78] extParsedEnt ::= TextDecl? content
10148 *
10149 * Returns 0 if the entity is well formed, -1 in case of args problem and
10150 * the parser error code otherwise
10151 */
10152
10153int
10154xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010155 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010156 xmlParserCtxtPtr ctxt;
10157 xmlDocPtr newDoc;
10158 xmlSAXHandlerPtr oldsax = NULL;
10159 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010160 xmlChar start[4];
10161 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010162
10163 if (ctx->depth > 40) {
10164 return(XML_ERR_ENTITY_LOOP);
10165 }
10166
Daniel Veillardcda96922001-08-21 10:56:31 +000010167 if (lst != NULL)
10168 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010169 if ((URL == NULL) && (ID == NULL))
10170 return(-1);
10171 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10172 return(-1);
10173
10174
10175 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10176 if (ctxt == NULL) return(-1);
10177 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010178 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010179 oldsax = ctxt->sax;
10180 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010181 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010182 newDoc = xmlNewDoc(BAD_CAST "1.0");
10183 if (newDoc == NULL) {
10184 xmlFreeParserCtxt(ctxt);
10185 return(-1);
10186 }
10187 if (ctx->myDoc != NULL) {
10188 newDoc->intSubset = ctx->myDoc->intSubset;
10189 newDoc->extSubset = ctx->myDoc->extSubset;
10190 }
10191 if (ctx->myDoc->URL != NULL) {
10192 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10193 }
10194 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10195 if (newDoc->children == NULL) {
10196 ctxt->sax = oldsax;
10197 xmlFreeParserCtxt(ctxt);
10198 newDoc->intSubset = NULL;
10199 newDoc->extSubset = NULL;
10200 xmlFreeDoc(newDoc);
10201 return(-1);
10202 }
10203 nodePush(ctxt, newDoc->children);
10204 if (ctx->myDoc == NULL) {
10205 ctxt->myDoc = newDoc;
10206 } else {
10207 ctxt->myDoc = ctx->myDoc;
10208 newDoc->children->doc = ctx->myDoc;
10209 }
10210
Daniel Veillard87a764e2001-06-20 17:41:10 +000010211 /*
10212 * Get the 4 first bytes and decode the charset
10213 * if enc != XML_CHAR_ENCODING_NONE
10214 * plug some encoding conversion routines.
10215 */
10216 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010217 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10218 start[0] = RAW;
10219 start[1] = NXT(1);
10220 start[2] = NXT(2);
10221 start[3] = NXT(3);
10222 enc = xmlDetectCharEncoding(start, 4);
10223 if (enc != XML_CHAR_ENCODING_NONE) {
10224 xmlSwitchEncoding(ctxt, enc);
10225 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010226 }
10227
Owen Taylor3473f882001-02-23 17:55:21 +000010228 /*
10229 * Parse a possible text declaration first
10230 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010231 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010232 xmlParseTextDecl(ctxt);
10233 }
10234
10235 /*
10236 * Doing validity checking on chunk doesn't make sense
10237 */
10238 ctxt->instate = XML_PARSER_CONTENT;
10239 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010240 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010241 ctxt->loadsubset = ctx->loadsubset;
10242 ctxt->depth = ctx->depth + 1;
10243 ctxt->replaceEntities = ctx->replaceEntities;
10244 if (ctxt->validate) {
10245 ctxt->vctxt.error = ctx->vctxt.error;
10246 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010247 } else {
10248 ctxt->vctxt.error = NULL;
10249 ctxt->vctxt.warning = NULL;
10250 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010251 ctxt->vctxt.nodeTab = NULL;
10252 ctxt->vctxt.nodeNr = 0;
10253 ctxt->vctxt.nodeMax = 0;
10254 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010255 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10256 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010257 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10258 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10259 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010260 ctxt->dictNames = ctx->dictNames;
10261 ctxt->attsDefault = ctx->attsDefault;
10262 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010263
10264 xmlParseContent(ctxt);
10265
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010266 ctx->validate = ctxt->validate;
10267 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010268 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010269 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010270 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010271 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010272 }
10273 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010274 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010275 }
10276
10277 if (!ctxt->wellFormed) {
10278 if (ctxt->errNo == 0)
10279 ret = 1;
10280 else
10281 ret = ctxt->errNo;
10282 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010283 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010284 xmlNodePtr cur;
10285
10286 /*
10287 * Return the newly created nodeset after unlinking it from
10288 * they pseudo parent.
10289 */
10290 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010291 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010292 while (cur != NULL) {
10293 cur->parent = NULL;
10294 cur = cur->next;
10295 }
10296 newDoc->children->children = NULL;
10297 }
10298 ret = 0;
10299 }
10300 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010301 ctxt->dict = NULL;
10302 ctxt->attsDefault = NULL;
10303 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010304 xmlFreeParserCtxt(ctxt);
10305 newDoc->intSubset = NULL;
10306 newDoc->extSubset = NULL;
10307 xmlFreeDoc(newDoc);
10308
10309 return(ret);
10310}
10311
10312/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010313 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010314 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010315 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010316 * @sax: the SAX handler bloc (possibly NULL)
10317 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10318 * @depth: Used for loop detection, use 0
10319 * @URL: the URL for the entity to load
10320 * @ID: the System ID for the entity to load
10321 * @list: the return value for the set of parsed nodes
10322 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010323 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010324 *
10325 * Returns 0 if the entity is well formed, -1 in case of args problem and
10326 * the parser error code otherwise
10327 */
10328
Daniel Veillard7d515752003-09-26 19:12:37 +000010329static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010330xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10331 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010332 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010333 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010334 xmlParserCtxtPtr ctxt;
10335 xmlDocPtr newDoc;
10336 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010337 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010338 xmlChar start[4];
10339 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010340
10341 if (depth > 40) {
10342 return(XML_ERR_ENTITY_LOOP);
10343 }
10344
10345
10346
10347 if (list != NULL)
10348 *list = NULL;
10349 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010350 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010351 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010352 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010353
10354
10355 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010356 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010357 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010358 if (oldctxt != NULL) {
10359 ctxt->_private = oldctxt->_private;
10360 ctxt->loadsubset = oldctxt->loadsubset;
10361 ctxt->validate = oldctxt->validate;
10362 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010363 ctxt->record_info = oldctxt->record_info;
10364 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10365 ctxt->node_seq.length = oldctxt->node_seq.length;
10366 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010367 } else {
10368 /*
10369 * Doing validity checking on chunk without context
10370 * doesn't make sense
10371 */
10372 ctxt->_private = NULL;
10373 ctxt->validate = 0;
10374 ctxt->external = 2;
10375 ctxt->loadsubset = 0;
10376 }
Owen Taylor3473f882001-02-23 17:55:21 +000010377 if (sax != NULL) {
10378 oldsax = ctxt->sax;
10379 ctxt->sax = sax;
10380 if (user_data != NULL)
10381 ctxt->userData = user_data;
10382 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010383 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 newDoc = xmlNewDoc(BAD_CAST "1.0");
10385 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010386 ctxt->node_seq.maximum = 0;
10387 ctxt->node_seq.length = 0;
10388 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010389 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010390 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010391 }
10392 if (doc != NULL) {
10393 newDoc->intSubset = doc->intSubset;
10394 newDoc->extSubset = doc->extSubset;
10395 }
10396 if (doc->URL != NULL) {
10397 newDoc->URL = xmlStrdup(doc->URL);
10398 }
10399 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10400 if (newDoc->children == NULL) {
10401 if (sax != NULL)
10402 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010403 ctxt->node_seq.maximum = 0;
10404 ctxt->node_seq.length = 0;
10405 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010406 xmlFreeParserCtxt(ctxt);
10407 newDoc->intSubset = NULL;
10408 newDoc->extSubset = NULL;
10409 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010410 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010411 }
10412 nodePush(ctxt, newDoc->children);
10413 if (doc == NULL) {
10414 ctxt->myDoc = newDoc;
10415 } else {
10416 ctxt->myDoc = doc;
10417 newDoc->children->doc = doc;
10418 }
10419
Daniel Veillard87a764e2001-06-20 17:41:10 +000010420 /*
10421 * Get the 4 first bytes and decode the charset
10422 * if enc != XML_CHAR_ENCODING_NONE
10423 * plug some encoding conversion routines.
10424 */
10425 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010426 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10427 start[0] = RAW;
10428 start[1] = NXT(1);
10429 start[2] = NXT(2);
10430 start[3] = NXT(3);
10431 enc = xmlDetectCharEncoding(start, 4);
10432 if (enc != XML_CHAR_ENCODING_NONE) {
10433 xmlSwitchEncoding(ctxt, enc);
10434 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010435 }
10436
Owen Taylor3473f882001-02-23 17:55:21 +000010437 /*
10438 * Parse a possible text declaration first
10439 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010440 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010441 xmlParseTextDecl(ctxt);
10442 }
10443
Owen Taylor3473f882001-02-23 17:55:21 +000010444 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010445 ctxt->depth = depth;
10446
10447 xmlParseContent(ctxt);
10448
Daniel Veillard561b7f82002-03-20 21:55:57 +000010449 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010450 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010451 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010452 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010453 }
10454 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010455 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 }
10457
10458 if (!ctxt->wellFormed) {
10459 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010460 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010461 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010462 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010463 } else {
10464 if (list != NULL) {
10465 xmlNodePtr cur;
10466
10467 /*
10468 * Return the newly created nodeset after unlinking it from
10469 * they pseudo parent.
10470 */
10471 cur = newDoc->children->children;
10472 *list = cur;
10473 while (cur != NULL) {
10474 cur->parent = NULL;
10475 cur = cur->next;
10476 }
10477 newDoc->children->children = NULL;
10478 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010479 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010480 }
10481 if (sax != NULL)
10482 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010483 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10484 oldctxt->node_seq.length = ctxt->node_seq.length;
10485 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010486 ctxt->node_seq.maximum = 0;
10487 ctxt->node_seq.length = 0;
10488 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010489 xmlFreeParserCtxt(ctxt);
10490 newDoc->intSubset = NULL;
10491 newDoc->extSubset = NULL;
10492 xmlFreeDoc(newDoc);
10493
10494 return(ret);
10495}
10496
Daniel Veillard81273902003-09-30 00:43:48 +000010497#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010498/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010499 * xmlParseExternalEntity:
10500 * @doc: the document the chunk pertains to
10501 * @sax: the SAX handler bloc (possibly NULL)
10502 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10503 * @depth: Used for loop detection, use 0
10504 * @URL: the URL for the entity to load
10505 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010506 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010507 *
10508 * Parse an external general entity
10509 * An external general parsed entity is well-formed if it matches the
10510 * production labeled extParsedEnt.
10511 *
10512 * [78] extParsedEnt ::= TextDecl? content
10513 *
10514 * Returns 0 if the entity is well formed, -1 in case of args problem and
10515 * the parser error code otherwise
10516 */
10517
10518int
10519xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010520 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010521 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010522 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010523}
10524
10525/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010526 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010527 * @doc: the document the chunk pertains to
10528 * @sax: the SAX handler bloc (possibly NULL)
10529 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10530 * @depth: Used for loop detection, use 0
10531 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010532 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010533 *
10534 * Parse a well-balanced chunk of an XML document
10535 * called by the parser
10536 * The allowed sequence for the Well Balanced Chunk is the one defined by
10537 * the content production in the XML grammar:
10538 *
10539 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10540 *
10541 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10542 * the parser error code otherwise
10543 */
10544
10545int
10546xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010547 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010548 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10549 depth, string, lst, 0 );
10550}
Daniel Veillard81273902003-09-30 00:43:48 +000010551#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010552
10553/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010554 * xmlParseBalancedChunkMemoryInternal:
10555 * @oldctxt: the existing parsing context
10556 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10557 * @user_data: the user data field for the parser context
10558 * @lst: the return value for the set of parsed nodes
10559 *
10560 *
10561 * Parse a well-balanced chunk of an XML document
10562 * called by the parser
10563 * The allowed sequence for the Well Balanced Chunk is the one defined by
10564 * the content production in the XML grammar:
10565 *
10566 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10567 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010568 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10569 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010570 *
10571 * In case recover is set to 1, the nodelist will not be empty even if
10572 * the parsed chunk is not well balanced.
10573 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010574static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010575xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10576 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10577 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010578 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010579 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010580 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010581 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010582 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010583
10584 if (oldctxt->depth > 40) {
10585 return(XML_ERR_ENTITY_LOOP);
10586 }
10587
10588
10589 if (lst != NULL)
10590 *lst = NULL;
10591 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010592 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010593
10594 size = xmlStrlen(string);
10595
10596 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010597 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010598 if (user_data != NULL)
10599 ctxt->userData = user_data;
10600 else
10601 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010602 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10603 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010604 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10605 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10606 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010607
10608 oldsax = ctxt->sax;
10609 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010610 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010611 ctxt->replaceEntities = oldctxt->replaceEntities;
10612 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010613
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010614 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010615 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010616 newDoc = xmlNewDoc(BAD_CAST "1.0");
10617 if (newDoc == NULL) {
10618 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010619 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010620 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010621 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010622 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010623 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010624 } else {
10625 ctxt->myDoc = oldctxt->myDoc;
10626 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010627 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010628 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010629 BAD_CAST "pseudoroot", NULL);
10630 if (ctxt->myDoc->children == NULL) {
10631 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010632 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010633 xmlFreeParserCtxt(ctxt);
10634 if (newDoc != NULL)
10635 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010636 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010637 }
10638 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010639 ctxt->instate = XML_PARSER_CONTENT;
10640 ctxt->depth = oldctxt->depth + 1;
10641
Daniel Veillard328f48c2002-11-15 15:24:34 +000010642 ctxt->validate = 0;
10643 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010644 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10645 /*
10646 * ID/IDREF registration will be done in xmlValidateElement below
10647 */
10648 ctxt->loadsubset |= XML_SKIP_IDS;
10649 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010650 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010651 ctxt->attsDefault = oldctxt->attsDefault;
10652 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010653
Daniel Veillard68e9e742002-11-16 15:35:11 +000010654 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010655 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010656 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010657 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010658 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010659 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010660 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010661 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010662 }
10663
10664 if (!ctxt->wellFormed) {
10665 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010666 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010667 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010668 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010669 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010670 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010671 }
10672
William M. Brack7b9154b2003-09-27 19:23:50 +000010673 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010674 xmlNodePtr cur;
10675
10676 /*
10677 * Return the newly created nodeset after unlinking it from
10678 * they pseudo parent.
10679 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010680 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010681 *lst = cur;
10682 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010683#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010684 if (oldctxt->validate && oldctxt->wellFormed &&
10685 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10686 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10687 oldctxt->myDoc, cur);
10688 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010689#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010690 cur->parent = NULL;
10691 cur = cur->next;
10692 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010693 ctxt->myDoc->children->children = NULL;
10694 }
10695 if (ctxt->myDoc != NULL) {
10696 xmlFreeNode(ctxt->myDoc->children);
10697 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010698 }
10699
10700 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010701 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010702 ctxt->attsDefault = NULL;
10703 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010704 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010705 if (newDoc != NULL)
10706 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010707
10708 return(ret);
10709}
10710
Daniel Veillard81273902003-09-30 00:43:48 +000010711#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010712/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010713 * xmlParseBalancedChunkMemoryRecover:
10714 * @doc: the document the chunk pertains to
10715 * @sax: the SAX handler bloc (possibly NULL)
10716 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10717 * @depth: Used for loop detection, use 0
10718 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10719 * @lst: the return value for the set of parsed nodes
10720 * @recover: return nodes even if the data is broken (use 0)
10721 *
10722 *
10723 * Parse a well-balanced chunk of an XML document
10724 * called by the parser
10725 * The allowed sequence for the Well Balanced Chunk is the one defined by
10726 * the content production in the XML grammar:
10727 *
10728 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10729 *
10730 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10731 * the parser error code otherwise
10732 *
10733 * In case recover is set to 1, the nodelist will not be empty even if
10734 * the parsed chunk is not well balanced.
10735 */
10736int
10737xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10738 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10739 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010740 xmlParserCtxtPtr ctxt;
10741 xmlDocPtr newDoc;
10742 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010743 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010744 int size;
10745 int ret = 0;
10746
10747 if (depth > 40) {
10748 return(XML_ERR_ENTITY_LOOP);
10749 }
10750
10751
Daniel Veillardcda96922001-08-21 10:56:31 +000010752 if (lst != NULL)
10753 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010754 if (string == NULL)
10755 return(-1);
10756
10757 size = xmlStrlen(string);
10758
10759 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10760 if (ctxt == NULL) return(-1);
10761 ctxt->userData = ctxt;
10762 if (sax != NULL) {
10763 oldsax = ctxt->sax;
10764 ctxt->sax = sax;
10765 if (user_data != NULL)
10766 ctxt->userData = user_data;
10767 }
10768 newDoc = xmlNewDoc(BAD_CAST "1.0");
10769 if (newDoc == NULL) {
10770 xmlFreeParserCtxt(ctxt);
10771 return(-1);
10772 }
10773 if (doc != NULL) {
10774 newDoc->intSubset = doc->intSubset;
10775 newDoc->extSubset = doc->extSubset;
10776 }
10777 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10778 if (newDoc->children == NULL) {
10779 if (sax != NULL)
10780 ctxt->sax = oldsax;
10781 xmlFreeParserCtxt(ctxt);
10782 newDoc->intSubset = NULL;
10783 newDoc->extSubset = NULL;
10784 xmlFreeDoc(newDoc);
10785 return(-1);
10786 }
10787 nodePush(ctxt, newDoc->children);
10788 if (doc == NULL) {
10789 ctxt->myDoc = newDoc;
10790 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010791 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010792 newDoc->children->doc = doc;
10793 }
10794 ctxt->instate = XML_PARSER_CONTENT;
10795 ctxt->depth = depth;
10796
10797 /*
10798 * Doing validity checking on chunk doesn't make sense
10799 */
10800 ctxt->validate = 0;
10801 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010802 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010803
Daniel Veillardb39bc392002-10-26 19:29:51 +000010804 if ( doc != NULL ){
10805 content = doc->children;
10806 doc->children = NULL;
10807 xmlParseContent(ctxt);
10808 doc->children = content;
10809 }
10810 else {
10811 xmlParseContent(ctxt);
10812 }
Owen Taylor3473f882001-02-23 17:55:21 +000010813 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010814 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010815 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010816 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010817 }
10818 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010819 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010820 }
10821
10822 if (!ctxt->wellFormed) {
10823 if (ctxt->errNo == 0)
10824 ret = 1;
10825 else
10826 ret = ctxt->errNo;
10827 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010828 ret = 0;
10829 }
10830
10831 if (lst != NULL && (ret == 0 || recover == 1)) {
10832 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010833
10834 /*
10835 * Return the newly created nodeset after unlinking it from
10836 * they pseudo parent.
10837 */
10838 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010839 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010840 while (cur != NULL) {
10841 cur->parent = NULL;
10842 cur = cur->next;
10843 }
10844 newDoc->children->children = NULL;
10845 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010846
Owen Taylor3473f882001-02-23 17:55:21 +000010847 if (sax != NULL)
10848 ctxt->sax = oldsax;
10849 xmlFreeParserCtxt(ctxt);
10850 newDoc->intSubset = NULL;
10851 newDoc->extSubset = NULL;
10852 xmlFreeDoc(newDoc);
10853
10854 return(ret);
10855}
10856
10857/**
10858 * xmlSAXParseEntity:
10859 * @sax: the SAX handler block
10860 * @filename: the filename
10861 *
10862 * parse an XML external entity out of context and build a tree.
10863 * It use the given SAX function block to handle the parsing callback.
10864 * If sax is NULL, fallback to the default DOM tree building routines.
10865 *
10866 * [78] extParsedEnt ::= TextDecl? content
10867 *
10868 * This correspond to a "Well Balanced" chunk
10869 *
10870 * Returns the resulting document tree
10871 */
10872
10873xmlDocPtr
10874xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10875 xmlDocPtr ret;
10876 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010877
10878 ctxt = xmlCreateFileParserCtxt(filename);
10879 if (ctxt == NULL) {
10880 return(NULL);
10881 }
10882 if (sax != NULL) {
10883 if (ctxt->sax != NULL)
10884 xmlFree(ctxt->sax);
10885 ctxt->sax = sax;
10886 ctxt->userData = NULL;
10887 }
10888
Owen Taylor3473f882001-02-23 17:55:21 +000010889 xmlParseExtParsedEnt(ctxt);
10890
10891 if (ctxt->wellFormed)
10892 ret = ctxt->myDoc;
10893 else {
10894 ret = NULL;
10895 xmlFreeDoc(ctxt->myDoc);
10896 ctxt->myDoc = NULL;
10897 }
10898 if (sax != NULL)
10899 ctxt->sax = NULL;
10900 xmlFreeParserCtxt(ctxt);
10901
10902 return(ret);
10903}
10904
10905/**
10906 * xmlParseEntity:
10907 * @filename: the filename
10908 *
10909 * parse an XML external entity out of context and build a tree.
10910 *
10911 * [78] extParsedEnt ::= TextDecl? content
10912 *
10913 * This correspond to a "Well Balanced" chunk
10914 *
10915 * Returns the resulting document tree
10916 */
10917
10918xmlDocPtr
10919xmlParseEntity(const char *filename) {
10920 return(xmlSAXParseEntity(NULL, filename));
10921}
Daniel Veillard81273902003-09-30 00:43:48 +000010922#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010923
10924/**
10925 * xmlCreateEntityParserCtxt:
10926 * @URL: the entity URL
10927 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010928 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010929 *
10930 * Create a parser context for an external entity
10931 * Automatic support for ZLIB/Compress compressed document is provided
10932 * by default if found at compile-time.
10933 *
10934 * Returns the new parser context or NULL
10935 */
10936xmlParserCtxtPtr
10937xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10938 const xmlChar *base) {
10939 xmlParserCtxtPtr ctxt;
10940 xmlParserInputPtr inputStream;
10941 char *directory = NULL;
10942 xmlChar *uri;
10943
10944 ctxt = xmlNewParserCtxt();
10945 if (ctxt == NULL) {
10946 return(NULL);
10947 }
10948
10949 uri = xmlBuildURI(URL, base);
10950
10951 if (uri == NULL) {
10952 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10953 if (inputStream == NULL) {
10954 xmlFreeParserCtxt(ctxt);
10955 return(NULL);
10956 }
10957
10958 inputPush(ctxt, inputStream);
10959
10960 if ((ctxt->directory == NULL) && (directory == NULL))
10961 directory = xmlParserGetDirectory((char *)URL);
10962 if ((ctxt->directory == NULL) && (directory != NULL))
10963 ctxt->directory = directory;
10964 } else {
10965 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10966 if (inputStream == NULL) {
10967 xmlFree(uri);
10968 xmlFreeParserCtxt(ctxt);
10969 return(NULL);
10970 }
10971
10972 inputPush(ctxt, inputStream);
10973
10974 if ((ctxt->directory == NULL) && (directory == NULL))
10975 directory = xmlParserGetDirectory((char *)uri);
10976 if ((ctxt->directory == NULL) && (directory != NULL))
10977 ctxt->directory = directory;
10978 xmlFree(uri);
10979 }
Owen Taylor3473f882001-02-23 17:55:21 +000010980 return(ctxt);
10981}
10982
10983/************************************************************************
10984 * *
10985 * Front ends when parsing from a file *
10986 * *
10987 ************************************************************************/
10988
10989/**
Daniel Veillard61b93382003-11-03 14:28:31 +000010990 * xmlCreateURLParserCtxt:
10991 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010992 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000010993 *
Daniel Veillard61b93382003-11-03 14:28:31 +000010994 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000010995 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000010996 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000010997 *
10998 * Returns the new parser context or NULL
10999 */
11000xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011001xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011002{
11003 xmlParserCtxtPtr ctxt;
11004 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011005 char *directory = NULL;
11006
Owen Taylor3473f882001-02-23 17:55:21 +000011007 ctxt = xmlNewParserCtxt();
11008 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011009 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011010 return(NULL);
11011 }
11012
Daniel Veillard61b93382003-11-03 14:28:31 +000011013 if (options != 0)
11014 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011015
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011016 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011017 if (inputStream == NULL) {
11018 xmlFreeParserCtxt(ctxt);
11019 return(NULL);
11020 }
11021
Owen Taylor3473f882001-02-23 17:55:21 +000011022 inputPush(ctxt, inputStream);
11023 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011024 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011025 if ((ctxt->directory == NULL) && (directory != NULL))
11026 ctxt->directory = directory;
11027
11028 return(ctxt);
11029}
11030
Daniel Veillard61b93382003-11-03 14:28:31 +000011031/**
11032 * xmlCreateFileParserCtxt:
11033 * @filename: the filename
11034 *
11035 * Create a parser context for a file content.
11036 * Automatic support for ZLIB/Compress compressed document is provided
11037 * by default if found at compile-time.
11038 *
11039 * Returns the new parser context or NULL
11040 */
11041xmlParserCtxtPtr
11042xmlCreateFileParserCtxt(const char *filename)
11043{
11044 return(xmlCreateURLParserCtxt(filename, 0));
11045}
11046
Daniel Veillard81273902003-09-30 00:43:48 +000011047#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011048/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011049 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011050 * @sax: the SAX handler block
11051 * @filename: the filename
11052 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11053 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011054 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011055 *
11056 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11057 * compressed document is provided by default if found at compile-time.
11058 * It use the given SAX function block to handle the parsing callback.
11059 * If sax is NULL, fallback to the default DOM tree building routines.
11060 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011061 * User data (void *) is stored within the parser context in the
11062 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011063 *
Owen Taylor3473f882001-02-23 17:55:21 +000011064 * Returns the resulting document tree
11065 */
11066
11067xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011068xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11069 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011070 xmlDocPtr ret;
11071 xmlParserCtxtPtr ctxt;
11072 char *directory = NULL;
11073
Daniel Veillard635ef722001-10-29 11:48:19 +000011074 xmlInitParser();
11075
Owen Taylor3473f882001-02-23 17:55:21 +000011076 ctxt = xmlCreateFileParserCtxt(filename);
11077 if (ctxt == NULL) {
11078 return(NULL);
11079 }
11080 if (sax != NULL) {
11081 if (ctxt->sax != NULL)
11082 xmlFree(ctxt->sax);
11083 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011084 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011085 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011086 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011087 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011088 }
Owen Taylor3473f882001-02-23 17:55:21 +000011089
11090 if ((ctxt->directory == NULL) && (directory == NULL))
11091 directory = xmlParserGetDirectory(filename);
11092 if ((ctxt->directory == NULL) && (directory != NULL))
11093 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11094
Daniel Veillarddad3f682002-11-17 16:47:27 +000011095 ctxt->recovery = recovery;
11096
Owen Taylor3473f882001-02-23 17:55:21 +000011097 xmlParseDocument(ctxt);
11098
William M. Brackc07329e2003-09-08 01:57:30 +000011099 if ((ctxt->wellFormed) || recovery) {
11100 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011101 if (ret != NULL) {
11102 if (ctxt->input->buf->compressed > 0)
11103 ret->compression = 9;
11104 else
11105 ret->compression = ctxt->input->buf->compressed;
11106 }
William M. Brackc07329e2003-09-08 01:57:30 +000011107 }
Owen Taylor3473f882001-02-23 17:55:21 +000011108 else {
11109 ret = NULL;
11110 xmlFreeDoc(ctxt->myDoc);
11111 ctxt->myDoc = NULL;
11112 }
11113 if (sax != NULL)
11114 ctxt->sax = NULL;
11115 xmlFreeParserCtxt(ctxt);
11116
11117 return(ret);
11118}
11119
11120/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011121 * xmlSAXParseFile:
11122 * @sax: the SAX handler block
11123 * @filename: the filename
11124 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11125 * documents
11126 *
11127 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11128 * compressed document is provided by default if found at compile-time.
11129 * It use the given SAX function block to handle the parsing callback.
11130 * If sax is NULL, fallback to the default DOM tree building routines.
11131 *
11132 * Returns the resulting document tree
11133 */
11134
11135xmlDocPtr
11136xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11137 int recovery) {
11138 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11139}
11140
11141/**
Owen Taylor3473f882001-02-23 17:55:21 +000011142 * xmlRecoverDoc:
11143 * @cur: a pointer to an array of xmlChar
11144 *
11145 * parse an XML in-memory document and build a tree.
11146 * In the case the document is not Well Formed, a tree is built anyway
11147 *
11148 * Returns the resulting document tree
11149 */
11150
11151xmlDocPtr
11152xmlRecoverDoc(xmlChar *cur) {
11153 return(xmlSAXParseDoc(NULL, cur, 1));
11154}
11155
11156/**
11157 * xmlParseFile:
11158 * @filename: the filename
11159 *
11160 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11161 * compressed document is provided by default if found at compile-time.
11162 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011163 * Returns the resulting document tree if the file was wellformed,
11164 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011165 */
11166
11167xmlDocPtr
11168xmlParseFile(const char *filename) {
11169 return(xmlSAXParseFile(NULL, filename, 0));
11170}
11171
11172/**
11173 * xmlRecoverFile:
11174 * @filename: the filename
11175 *
11176 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11177 * compressed document is provided by default if found at compile-time.
11178 * In the case the document is not Well Formed, a tree is built anyway
11179 *
11180 * Returns the resulting document tree
11181 */
11182
11183xmlDocPtr
11184xmlRecoverFile(const char *filename) {
11185 return(xmlSAXParseFile(NULL, filename, 1));
11186}
11187
11188
11189/**
11190 * xmlSetupParserForBuffer:
11191 * @ctxt: an XML parser context
11192 * @buffer: a xmlChar * buffer
11193 * @filename: a file name
11194 *
11195 * Setup the parser context to parse a new buffer; Clears any prior
11196 * contents from the parser context. The buffer parameter must not be
11197 * NULL, but the filename parameter can be
11198 */
11199void
11200xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11201 const char* filename)
11202{
11203 xmlParserInputPtr input;
11204
11205 input = xmlNewInputStream(ctxt);
11206 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011207 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011208 xmlFree(ctxt);
11209 return;
11210 }
11211
11212 xmlClearParserCtxt(ctxt);
11213 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011214 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011215 input->base = buffer;
11216 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011217 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011218 inputPush(ctxt, input);
11219}
11220
11221/**
11222 * xmlSAXUserParseFile:
11223 * @sax: a SAX handler
11224 * @user_data: The user data returned on SAX callbacks
11225 * @filename: a file name
11226 *
11227 * parse an XML file and call the given SAX handler routines.
11228 * Automatic support for ZLIB/Compress compressed document is provided
11229 *
11230 * Returns 0 in case of success or a error number otherwise
11231 */
11232int
11233xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11234 const char *filename) {
11235 int ret = 0;
11236 xmlParserCtxtPtr ctxt;
11237
11238 ctxt = xmlCreateFileParserCtxt(filename);
11239 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011240#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011241 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011242#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011243 xmlFree(ctxt->sax);
11244 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011245 xmlDetectSAX2(ctxt);
11246
Owen Taylor3473f882001-02-23 17:55:21 +000011247 if (user_data != NULL)
11248 ctxt->userData = user_data;
11249
11250 xmlParseDocument(ctxt);
11251
11252 if (ctxt->wellFormed)
11253 ret = 0;
11254 else {
11255 if (ctxt->errNo != 0)
11256 ret = ctxt->errNo;
11257 else
11258 ret = -1;
11259 }
11260 if (sax != NULL)
11261 ctxt->sax = NULL;
11262 xmlFreeParserCtxt(ctxt);
11263
11264 return ret;
11265}
Daniel Veillard81273902003-09-30 00:43:48 +000011266#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011267
11268/************************************************************************
11269 * *
11270 * Front ends when parsing from memory *
11271 * *
11272 ************************************************************************/
11273
11274/**
11275 * xmlCreateMemoryParserCtxt:
11276 * @buffer: a pointer to a char array
11277 * @size: the size of the array
11278 *
11279 * Create a parser context for an XML in-memory document.
11280 *
11281 * Returns the new parser context or NULL
11282 */
11283xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011284xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011285 xmlParserCtxtPtr ctxt;
11286 xmlParserInputPtr input;
11287 xmlParserInputBufferPtr buf;
11288
11289 if (buffer == NULL)
11290 return(NULL);
11291 if (size <= 0)
11292 return(NULL);
11293
11294 ctxt = xmlNewParserCtxt();
11295 if (ctxt == NULL)
11296 return(NULL);
11297
Daniel Veillard53350552003-09-18 13:35:51 +000011298 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011299 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011300 if (buf == NULL) {
11301 xmlFreeParserCtxt(ctxt);
11302 return(NULL);
11303 }
Owen Taylor3473f882001-02-23 17:55:21 +000011304
11305 input = xmlNewInputStream(ctxt);
11306 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011307 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011308 xmlFreeParserCtxt(ctxt);
11309 return(NULL);
11310 }
11311
11312 input->filename = NULL;
11313 input->buf = buf;
11314 input->base = input->buf->buffer->content;
11315 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011316 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011317
11318 inputPush(ctxt, input);
11319 return(ctxt);
11320}
11321
Daniel Veillard81273902003-09-30 00:43:48 +000011322#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011323/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011324 * xmlSAXParseMemoryWithData:
11325 * @sax: the SAX handler block
11326 * @buffer: an pointer to a char array
11327 * @size: the size of the array
11328 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11329 * documents
11330 * @data: the userdata
11331 *
11332 * parse an XML in-memory block and use the given SAX function block
11333 * to handle the parsing callback. If sax is NULL, fallback to the default
11334 * DOM tree building routines.
11335 *
11336 * User data (void *) is stored within the parser context in the
11337 * context's _private member, so it is available nearly everywhere in libxml
11338 *
11339 * Returns the resulting document tree
11340 */
11341
11342xmlDocPtr
11343xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11344 int size, int recovery, void *data) {
11345 xmlDocPtr ret;
11346 xmlParserCtxtPtr ctxt;
11347
11348 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11349 if (ctxt == NULL) return(NULL);
11350 if (sax != NULL) {
11351 if (ctxt->sax != NULL)
11352 xmlFree(ctxt->sax);
11353 ctxt->sax = sax;
11354 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011355 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011356 if (data!=NULL) {
11357 ctxt->_private=data;
11358 }
11359
Daniel Veillardadba5f12003-04-04 16:09:01 +000011360 ctxt->recovery = recovery;
11361
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011362 xmlParseDocument(ctxt);
11363
11364 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11365 else {
11366 ret = NULL;
11367 xmlFreeDoc(ctxt->myDoc);
11368 ctxt->myDoc = NULL;
11369 }
11370 if (sax != NULL)
11371 ctxt->sax = NULL;
11372 xmlFreeParserCtxt(ctxt);
11373
11374 return(ret);
11375}
11376
11377/**
Owen Taylor3473f882001-02-23 17:55:21 +000011378 * xmlSAXParseMemory:
11379 * @sax: the SAX handler block
11380 * @buffer: an pointer to a char array
11381 * @size: the size of the array
11382 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11383 * documents
11384 *
11385 * parse an XML in-memory block and use the given SAX function block
11386 * to handle the parsing callback. If sax is NULL, fallback to the default
11387 * DOM tree building routines.
11388 *
11389 * Returns the resulting document tree
11390 */
11391xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011392xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11393 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011394 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011395}
11396
11397/**
11398 * xmlParseMemory:
11399 * @buffer: an pointer to a char array
11400 * @size: the size of the array
11401 *
11402 * parse an XML in-memory block and build a tree.
11403 *
11404 * Returns the resulting document tree
11405 */
11406
Daniel Veillard50822cb2001-07-26 20:05:51 +000011407xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011408 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11409}
11410
11411/**
11412 * xmlRecoverMemory:
11413 * @buffer: an pointer to a char array
11414 * @size: the size of the array
11415 *
11416 * parse an XML in-memory block and build a tree.
11417 * In the case the document is not Well Formed, a tree is built anyway
11418 *
11419 * Returns the resulting document tree
11420 */
11421
Daniel Veillard50822cb2001-07-26 20:05:51 +000011422xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011423 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11424}
11425
11426/**
11427 * xmlSAXUserParseMemory:
11428 * @sax: a SAX handler
11429 * @user_data: The user data returned on SAX callbacks
11430 * @buffer: an in-memory XML document input
11431 * @size: the length of the XML document in bytes
11432 *
11433 * A better SAX parsing routine.
11434 * parse an XML in-memory buffer and call the given SAX handler routines.
11435 *
11436 * Returns 0 in case of success or a error number otherwise
11437 */
11438int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011439 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011440 int ret = 0;
11441 xmlParserCtxtPtr ctxt;
11442 xmlSAXHandlerPtr oldsax = NULL;
11443
Daniel Veillard9e923512002-08-14 08:48:52 +000011444 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011445 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11446 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011447 oldsax = ctxt->sax;
11448 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011449 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011450 if (user_data != NULL)
11451 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011452
11453 xmlParseDocument(ctxt);
11454
11455 if (ctxt->wellFormed)
11456 ret = 0;
11457 else {
11458 if (ctxt->errNo != 0)
11459 ret = ctxt->errNo;
11460 else
11461 ret = -1;
11462 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011463 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011464 xmlFreeParserCtxt(ctxt);
11465
11466 return ret;
11467}
Daniel Veillard81273902003-09-30 00:43:48 +000011468#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011469
11470/**
11471 * xmlCreateDocParserCtxt:
11472 * @cur: a pointer to an array of xmlChar
11473 *
11474 * Creates a parser context for an XML in-memory document.
11475 *
11476 * Returns the new parser context or NULL
11477 */
11478xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011479xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011480 int len;
11481
11482 if (cur == NULL)
11483 return(NULL);
11484 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011485 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011486}
11487
Daniel Veillard81273902003-09-30 00:43:48 +000011488#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011489/**
11490 * xmlSAXParseDoc:
11491 * @sax: the SAX handler block
11492 * @cur: a pointer to an array of xmlChar
11493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11494 * documents
11495 *
11496 * parse an XML in-memory document and build a tree.
11497 * It use the given SAX function block to handle the parsing callback.
11498 * If sax is NULL, fallback to the default DOM tree building routines.
11499 *
11500 * Returns the resulting document tree
11501 */
11502
11503xmlDocPtr
11504xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11505 xmlDocPtr ret;
11506 xmlParserCtxtPtr ctxt;
11507
11508 if (cur == NULL) return(NULL);
11509
11510
11511 ctxt = xmlCreateDocParserCtxt(cur);
11512 if (ctxt == NULL) return(NULL);
11513 if (sax != NULL) {
11514 ctxt->sax = sax;
11515 ctxt->userData = NULL;
11516 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011517 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011518
11519 xmlParseDocument(ctxt);
11520 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11521 else {
11522 ret = NULL;
11523 xmlFreeDoc(ctxt->myDoc);
11524 ctxt->myDoc = NULL;
11525 }
11526 if (sax != NULL)
11527 ctxt->sax = NULL;
11528 xmlFreeParserCtxt(ctxt);
11529
11530 return(ret);
11531}
11532
11533/**
11534 * xmlParseDoc:
11535 * @cur: a pointer to an array of xmlChar
11536 *
11537 * parse an XML in-memory document and build a tree.
11538 *
11539 * Returns the resulting document tree
11540 */
11541
11542xmlDocPtr
11543xmlParseDoc(xmlChar *cur) {
11544 return(xmlSAXParseDoc(NULL, cur, 0));
11545}
Daniel Veillard81273902003-09-30 00:43:48 +000011546#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011547
Daniel Veillard81273902003-09-30 00:43:48 +000011548#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011549/************************************************************************
11550 * *
11551 * Specific function to keep track of entities references *
11552 * and used by the XSLT debugger *
11553 * *
11554 ************************************************************************/
11555
11556static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11557
11558/**
11559 * xmlAddEntityReference:
11560 * @ent : A valid entity
11561 * @firstNode : A valid first node for children of entity
11562 * @lastNode : A valid last node of children entity
11563 *
11564 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11565 */
11566static void
11567xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11568 xmlNodePtr lastNode)
11569{
11570 if (xmlEntityRefFunc != NULL) {
11571 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11572 }
11573}
11574
11575
11576/**
11577 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011578 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011579 *
11580 * Set the function to call call back when a xml reference has been made
11581 */
11582void
11583xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11584{
11585 xmlEntityRefFunc = func;
11586}
Daniel Veillard81273902003-09-30 00:43:48 +000011587#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011588
11589/************************************************************************
11590 * *
11591 * Miscellaneous *
11592 * *
11593 ************************************************************************/
11594
11595#ifdef LIBXML_XPATH_ENABLED
11596#include <libxml/xpath.h>
11597#endif
11598
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011599extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011600static int xmlParserInitialized = 0;
11601
11602/**
11603 * xmlInitParser:
11604 *
11605 * Initialization function for the XML parser.
11606 * This is not reentrant. Call once before processing in case of
11607 * use in multithreaded programs.
11608 */
11609
11610void
11611xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011612 if (xmlParserInitialized != 0)
11613 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011614
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011615 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11616 (xmlGenericError == NULL))
11617 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011618 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011619 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011620 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011621 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011622 xmlDefaultSAXHandlerInit();
11623 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011624#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011625 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011626#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011627#ifdef LIBXML_HTML_ENABLED
11628 htmlInitAutoClose();
11629 htmlDefaultSAXHandlerInit();
11630#endif
11631#ifdef LIBXML_XPATH_ENABLED
11632 xmlXPathInit();
11633#endif
11634 xmlParserInitialized = 1;
11635}
11636
11637/**
11638 * xmlCleanupParser:
11639 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011640 * Cleanup function for the XML library. It tries to reclaim all
11641 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011642 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011643 * function should not prevent reusing the library but one should
11644 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011645 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011646 */
11647
11648void
11649xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011650 if (!xmlParserInitialized)
11651 return;
11652
Owen Taylor3473f882001-02-23 17:55:21 +000011653 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011654#ifdef LIBXML_CATALOG_ENABLED
11655 xmlCatalogCleanup();
11656#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011657 xmlCleanupInputCallbacks();
11658#ifdef LIBXML_OUTPUT_ENABLED
11659 xmlCleanupOutputCallbacks();
11660#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011661 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011662 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011663 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011664 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011665 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011666}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011667
11668/************************************************************************
11669 * *
11670 * New set (2.6.0) of simpler and more flexible APIs *
11671 * *
11672 ************************************************************************/
11673
11674/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011675 * DICT_FREE:
11676 * @str: a string
11677 *
11678 * Free a string if it is not owned by the "dict" dictionnary in the
11679 * current scope
11680 */
11681#define DICT_FREE(str) \
11682 if ((str) && ((!dict) || \
11683 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11684 xmlFree((char *)(str));
11685
11686/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011687 * xmlCtxtReset:
11688 * @ctxt: an XML parser context
11689 *
11690 * Reset a parser context
11691 */
11692void
11693xmlCtxtReset(xmlParserCtxtPtr ctxt)
11694{
11695 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011696 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011697
11698 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11699 xmlFreeInputStream(input);
11700 }
11701 ctxt->inputNr = 0;
11702 ctxt->input = NULL;
11703
11704 ctxt->spaceNr = 0;
11705 ctxt->spaceTab[0] = -1;
11706 ctxt->space = &ctxt->spaceTab[0];
11707
11708
11709 ctxt->nodeNr = 0;
11710 ctxt->node = NULL;
11711
11712 ctxt->nameNr = 0;
11713 ctxt->name = NULL;
11714
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011715 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011716 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011717 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011718 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011719 DICT_FREE(ctxt->directory);
11720 ctxt->directory = NULL;
11721 DICT_FREE(ctxt->extSubURI);
11722 ctxt->extSubURI = NULL;
11723 DICT_FREE(ctxt->extSubSystem);
11724 ctxt->extSubSystem = NULL;
11725 if (ctxt->myDoc != NULL)
11726 xmlFreeDoc(ctxt->myDoc);
11727 ctxt->myDoc = NULL;
11728
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011729 ctxt->standalone = -1;
11730 ctxt->hasExternalSubset = 0;
11731 ctxt->hasPErefs = 0;
11732 ctxt->html = 0;
11733 ctxt->external = 0;
11734 ctxt->instate = XML_PARSER_START;
11735 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011736
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011737 ctxt->wellFormed = 1;
11738 ctxt->nsWellFormed = 1;
11739 ctxt->valid = 1;
11740 ctxt->vctxt.userData = ctxt;
11741 ctxt->vctxt.error = xmlParserValidityError;
11742 ctxt->vctxt.warning = xmlParserValidityWarning;
11743 ctxt->record_info = 0;
11744 ctxt->nbChars = 0;
11745 ctxt->checkIndex = 0;
11746 ctxt->inSubset = 0;
11747 ctxt->errNo = XML_ERR_OK;
11748 ctxt->depth = 0;
11749 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11750 ctxt->catalogs = NULL;
11751 xmlInitNodeInfoSeq(&ctxt->node_seq);
11752
11753 if (ctxt->attsDefault != NULL) {
11754 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11755 ctxt->attsDefault = NULL;
11756 }
11757 if (ctxt->attsSpecial != NULL) {
11758 xmlHashFree(ctxt->attsSpecial, NULL);
11759 ctxt->attsSpecial = NULL;
11760 }
11761
Daniel Veillard4432df22003-09-28 18:58:27 +000011762#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011763 if (ctxt->catalogs != NULL)
11764 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011765#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011766 if (ctxt->lastError.code != XML_ERR_OK)
11767 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011768}
11769
11770/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011771 * xmlCtxtResetPush:
11772 * @ctxt: an XML parser context
11773 * @chunk: a pointer to an array of chars
11774 * @size: number of chars in the array
11775 * @filename: an optional file name or URI
11776 * @encoding: the document encoding, or NULL
11777 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011778 * Reset a push parser context
11779 *
11780 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011781 */
11782int
11783xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11784 int size, const char *filename, const char *encoding)
11785{
11786 xmlParserInputPtr inputStream;
11787 xmlParserInputBufferPtr buf;
11788 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11789
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011790 if (ctxt == NULL)
11791 return(1);
11792
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011793 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11794 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11795
11796 buf = xmlAllocParserInputBuffer(enc);
11797 if (buf == NULL)
11798 return(1);
11799
11800 if (ctxt == NULL) {
11801 xmlFreeParserInputBuffer(buf);
11802 return(1);
11803 }
11804
11805 xmlCtxtReset(ctxt);
11806
11807 if (ctxt->pushTab == NULL) {
11808 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11809 sizeof(xmlChar *));
11810 if (ctxt->pushTab == NULL) {
11811 xmlErrMemory(ctxt, NULL);
11812 xmlFreeParserInputBuffer(buf);
11813 return(1);
11814 }
11815 }
11816
11817 if (filename == NULL) {
11818 ctxt->directory = NULL;
11819 } else {
11820 ctxt->directory = xmlParserGetDirectory(filename);
11821 }
11822
11823 inputStream = xmlNewInputStream(ctxt);
11824 if (inputStream == NULL) {
11825 xmlFreeParserInputBuffer(buf);
11826 return(1);
11827 }
11828
11829 if (filename == NULL)
11830 inputStream->filename = NULL;
11831 else
11832 inputStream->filename = (char *)
11833 xmlCanonicPath((const xmlChar *) filename);
11834 inputStream->buf = buf;
11835 inputStream->base = inputStream->buf->buffer->content;
11836 inputStream->cur = inputStream->buf->buffer->content;
11837 inputStream->end =
11838 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11839
11840 inputPush(ctxt, inputStream);
11841
11842 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11843 (ctxt->input->buf != NULL)) {
11844 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11845 int cur = ctxt->input->cur - ctxt->input->base;
11846
11847 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11848
11849 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11850 ctxt->input->cur = ctxt->input->base + cur;
11851 ctxt->input->end =
11852 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11853 use];
11854#ifdef DEBUG_PUSH
11855 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11856#endif
11857 }
11858
11859 if (encoding != NULL) {
11860 xmlCharEncodingHandlerPtr hdlr;
11861
11862 hdlr = xmlFindCharEncodingHandler(encoding);
11863 if (hdlr != NULL) {
11864 xmlSwitchToEncoding(ctxt, hdlr);
11865 } else {
11866 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11867 "Unsupported encoding %s\n", BAD_CAST encoding);
11868 }
11869 } else if (enc != XML_CHAR_ENCODING_NONE) {
11870 xmlSwitchEncoding(ctxt, enc);
11871 }
11872
11873 return(0);
11874}
11875
11876/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011877 * xmlCtxtUseOptions:
11878 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011879 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011880 *
11881 * Applies the options to the parser context
11882 *
11883 * Returns 0 in case of success, the set of unknown or unimplemented options
11884 * in case of error.
11885 */
11886int
11887xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11888{
11889 if (options & XML_PARSE_RECOVER) {
11890 ctxt->recovery = 1;
11891 options -= XML_PARSE_RECOVER;
11892 } else
11893 ctxt->recovery = 0;
11894 if (options & XML_PARSE_DTDLOAD) {
11895 ctxt->loadsubset = XML_DETECT_IDS;
11896 options -= XML_PARSE_DTDLOAD;
11897 } else
11898 ctxt->loadsubset = 0;
11899 if (options & XML_PARSE_DTDATTR) {
11900 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11901 options -= XML_PARSE_DTDATTR;
11902 }
11903 if (options & XML_PARSE_NOENT) {
11904 ctxt->replaceEntities = 1;
11905 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11906 options -= XML_PARSE_NOENT;
11907 } else
11908 ctxt->replaceEntities = 0;
11909 if (options & XML_PARSE_NOWARNING) {
11910 ctxt->sax->warning = NULL;
11911 options -= XML_PARSE_NOWARNING;
11912 }
11913 if (options & XML_PARSE_NOERROR) {
11914 ctxt->sax->error = NULL;
11915 ctxt->sax->fatalError = NULL;
11916 options -= XML_PARSE_NOERROR;
11917 }
11918 if (options & XML_PARSE_PEDANTIC) {
11919 ctxt->pedantic = 1;
11920 options -= XML_PARSE_PEDANTIC;
11921 } else
11922 ctxt->pedantic = 0;
11923 if (options & XML_PARSE_NOBLANKS) {
11924 ctxt->keepBlanks = 0;
11925 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11926 options -= XML_PARSE_NOBLANKS;
11927 } else
11928 ctxt->keepBlanks = 1;
11929 if (options & XML_PARSE_DTDVALID) {
11930 ctxt->validate = 1;
11931 if (options & XML_PARSE_NOWARNING)
11932 ctxt->vctxt.warning = NULL;
11933 if (options & XML_PARSE_NOERROR)
11934 ctxt->vctxt.error = NULL;
11935 options -= XML_PARSE_DTDVALID;
11936 } else
11937 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011938#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011939 if (options & XML_PARSE_SAX1) {
11940 ctxt->sax->startElement = xmlSAX2StartElement;
11941 ctxt->sax->endElement = xmlSAX2EndElement;
11942 ctxt->sax->startElementNs = NULL;
11943 ctxt->sax->endElementNs = NULL;
11944 ctxt->sax->initialized = 1;
11945 options -= XML_PARSE_SAX1;
11946 }
Daniel Veillard81273902003-09-30 00:43:48 +000011947#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011948 if (options & XML_PARSE_NODICT) {
11949 ctxt->dictNames = 0;
11950 options -= XML_PARSE_NODICT;
11951 } else {
11952 ctxt->dictNames = 1;
11953 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000011954 if (options & XML_PARSE_NOCDATA) {
11955 ctxt->sax->cdataBlock = NULL;
11956 options -= XML_PARSE_NOCDATA;
11957 }
11958 if (options & XML_PARSE_NSCLEAN) {
11959 ctxt->options |= XML_PARSE_NSCLEAN;
11960 options -= XML_PARSE_NSCLEAN;
11961 }
Daniel Veillard61b93382003-11-03 14:28:31 +000011962 if (options & XML_PARSE_NONET) {
11963 ctxt->options |= XML_PARSE_NONET;
11964 options -= XML_PARSE_NONET;
11965 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000011966 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011967 return (options);
11968}
11969
11970/**
11971 * xmlDoRead:
11972 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000011973 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011974 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011975 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011976 * @reuse: keep the context for reuse
11977 *
11978 * Common front-end for the xmlRead functions
11979 *
11980 * Returns the resulting document tree or NULL
11981 */
11982static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000011983xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
11984 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011985{
11986 xmlDocPtr ret;
11987
11988 xmlCtxtUseOptions(ctxt, options);
11989 if (encoding != NULL) {
11990 xmlCharEncodingHandlerPtr hdlr;
11991
11992 hdlr = xmlFindCharEncodingHandler(encoding);
11993 if (hdlr != NULL)
11994 xmlSwitchToEncoding(ctxt, hdlr);
11995 }
Daniel Veillard60942de2003-09-25 21:05:58 +000011996 if ((URL != NULL) && (ctxt->input != NULL) &&
11997 (ctxt->input->filename == NULL))
11998 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011999 xmlParseDocument(ctxt);
12000 if ((ctxt->wellFormed) || ctxt->recovery)
12001 ret = ctxt->myDoc;
12002 else {
12003 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012004 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012005 if ((ctxt->dictNames) &&
12006 (ctxt->myDoc->dict == ctxt->dict))
12007 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012008 xmlFreeDoc(ctxt->myDoc);
12009 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012010 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012011 ctxt->myDoc = NULL;
12012 if (!reuse) {
12013 if ((ctxt->dictNames) &&
12014 (ret != NULL) &&
12015 (ret->dict == ctxt->dict))
12016 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012017 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012018 } else {
12019 /* Must duplicate the reference to the dictionary */
12020 if ((ctxt->dictNames) &&
12021 (ret != NULL) &&
12022 (ret->dict == ctxt->dict))
12023 xmlDictReference(ctxt->dict);
12024 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012025
12026 return (ret);
12027}
12028
12029/**
12030 * xmlReadDoc:
12031 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012032 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012033 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012034 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012035 *
12036 * parse an XML in-memory document and build a tree.
12037 *
12038 * Returns the resulting document tree
12039 */
12040xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012041xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012042{
12043 xmlParserCtxtPtr ctxt;
12044
12045 if (cur == NULL)
12046 return (NULL);
12047
12048 ctxt = xmlCreateDocParserCtxt(cur);
12049 if (ctxt == NULL)
12050 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012051 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052}
12053
12054/**
12055 * xmlReadFile:
12056 * @filename: a file or URL
12057 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012058 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012059 *
12060 * parse an XML file from the filesystem or the network.
12061 *
12062 * Returns the resulting document tree
12063 */
12064xmlDocPtr
12065xmlReadFile(const char *filename, const char *encoding, int options)
12066{
12067 xmlParserCtxtPtr ctxt;
12068
Daniel Veillard61b93382003-11-03 14:28:31 +000012069 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012070 if (ctxt == NULL)
12071 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012072 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012073}
12074
12075/**
12076 * xmlReadMemory:
12077 * @buffer: a pointer to a char array
12078 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012079 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012080 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012081 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012082 *
12083 * parse an XML in-memory document and build a tree.
12084 *
12085 * Returns the resulting document tree
12086 */
12087xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012088xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012089{
12090 xmlParserCtxtPtr ctxt;
12091
12092 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12093 if (ctxt == NULL)
12094 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012095 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012096}
12097
12098/**
12099 * xmlReadFd:
12100 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012101 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012102 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012103 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012104 *
12105 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012106 * NOTE that the file descriptor will not be closed when the
12107 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012108 *
12109 * Returns the resulting document tree
12110 */
12111xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012112xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012113{
12114 xmlParserCtxtPtr ctxt;
12115 xmlParserInputBufferPtr input;
12116 xmlParserInputPtr stream;
12117
12118 if (fd < 0)
12119 return (NULL);
12120
12121 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12122 if (input == NULL)
12123 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012124 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012125 ctxt = xmlNewParserCtxt();
12126 if (ctxt == NULL) {
12127 xmlFreeParserInputBuffer(input);
12128 return (NULL);
12129 }
12130 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12131 if (stream == NULL) {
12132 xmlFreeParserInputBuffer(input);
12133 xmlFreeParserCtxt(ctxt);
12134 return (NULL);
12135 }
12136 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012137 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012138}
12139
12140/**
12141 * xmlReadIO:
12142 * @ioread: an I/O read function
12143 * @ioclose: an I/O close function
12144 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012145 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012146 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012147 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012148 *
12149 * parse an XML document from I/O functions and source and build a tree.
12150 *
12151 * Returns the resulting document tree
12152 */
12153xmlDocPtr
12154xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012155 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012156{
12157 xmlParserCtxtPtr ctxt;
12158 xmlParserInputBufferPtr input;
12159 xmlParserInputPtr stream;
12160
12161 if (ioread == NULL)
12162 return (NULL);
12163
12164 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12165 XML_CHAR_ENCODING_NONE);
12166 if (input == NULL)
12167 return (NULL);
12168 ctxt = xmlNewParserCtxt();
12169 if (ctxt == NULL) {
12170 xmlFreeParserInputBuffer(input);
12171 return (NULL);
12172 }
12173 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12174 if (stream == NULL) {
12175 xmlFreeParserInputBuffer(input);
12176 xmlFreeParserCtxt(ctxt);
12177 return (NULL);
12178 }
12179 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012180 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012181}
12182
12183/**
12184 * xmlCtxtReadDoc:
12185 * @ctxt: an XML parser context
12186 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012187 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012188 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012189 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012190 *
12191 * parse an XML in-memory document and build a tree.
12192 * This reuses the existing @ctxt parser context
12193 *
12194 * Returns the resulting document tree
12195 */
12196xmlDocPtr
12197xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012198 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012199{
12200 xmlParserInputPtr stream;
12201
12202 if (cur == NULL)
12203 return (NULL);
12204 if (ctxt == NULL)
12205 return (NULL);
12206
12207 xmlCtxtReset(ctxt);
12208
12209 stream = xmlNewStringInputStream(ctxt, cur);
12210 if (stream == NULL) {
12211 return (NULL);
12212 }
12213 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012214 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012215}
12216
12217/**
12218 * xmlCtxtReadFile:
12219 * @ctxt: an XML parser context
12220 * @filename: a file or URL
12221 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012222 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012223 *
12224 * parse an XML file from the filesystem or the network.
12225 * This reuses the existing @ctxt parser context
12226 *
12227 * Returns the resulting document tree
12228 */
12229xmlDocPtr
12230xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12231 const char *encoding, int options)
12232{
12233 xmlParserInputPtr stream;
12234
12235 if (filename == NULL)
12236 return (NULL);
12237 if (ctxt == NULL)
12238 return (NULL);
12239
12240 xmlCtxtReset(ctxt);
12241
12242 stream = xmlNewInputFromFile(ctxt, filename);
12243 if (stream == NULL) {
12244 return (NULL);
12245 }
12246 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012247 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012248}
12249
12250/**
12251 * xmlCtxtReadMemory:
12252 * @ctxt: an XML parser context
12253 * @buffer: a pointer to a char array
12254 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012255 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012256 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012257 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258 *
12259 * parse an XML in-memory document and build a tree.
12260 * This reuses the existing @ctxt parser context
12261 *
12262 * Returns the resulting document tree
12263 */
12264xmlDocPtr
12265xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012266 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012267{
12268 xmlParserInputBufferPtr input;
12269 xmlParserInputPtr stream;
12270
12271 if (ctxt == NULL)
12272 return (NULL);
12273 if (buffer == NULL)
12274 return (NULL);
12275
12276 xmlCtxtReset(ctxt);
12277
12278 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12279 if (input == NULL) {
12280 return(NULL);
12281 }
12282
12283 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12284 if (stream == NULL) {
12285 xmlFreeParserInputBuffer(input);
12286 return(NULL);
12287 }
12288
12289 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012290 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012291}
12292
12293/**
12294 * xmlCtxtReadFd:
12295 * @ctxt: an XML parser context
12296 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012297 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012298 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012299 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012300 *
12301 * parse an XML from a file descriptor and build a tree.
12302 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012303 * NOTE that the file descriptor will not be closed when the
12304 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012305 *
12306 * Returns the resulting document tree
12307 */
12308xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012309xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12310 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012311{
12312 xmlParserInputBufferPtr input;
12313 xmlParserInputPtr stream;
12314
12315 if (fd < 0)
12316 return (NULL);
12317 if (ctxt == NULL)
12318 return (NULL);
12319
12320 xmlCtxtReset(ctxt);
12321
12322
12323 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12324 if (input == NULL)
12325 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012326 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012327 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12328 if (stream == NULL) {
12329 xmlFreeParserInputBuffer(input);
12330 return (NULL);
12331 }
12332 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012333 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012334}
12335
12336/**
12337 * xmlCtxtReadIO:
12338 * @ctxt: an XML parser context
12339 * @ioread: an I/O read function
12340 * @ioclose: an I/O close function
12341 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012342 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012343 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012344 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345 *
12346 * parse an XML document from I/O functions and source and build a tree.
12347 * This reuses the existing @ctxt parser context
12348 *
12349 * Returns the resulting document tree
12350 */
12351xmlDocPtr
12352xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12353 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012354 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012355 const char *encoding, int options)
12356{
12357 xmlParserInputBufferPtr input;
12358 xmlParserInputPtr stream;
12359
12360 if (ioread == NULL)
12361 return (NULL);
12362 if (ctxt == NULL)
12363 return (NULL);
12364
12365 xmlCtxtReset(ctxt);
12366
12367 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12368 XML_CHAR_ENCODING_NONE);
12369 if (input == NULL)
12370 return (NULL);
12371 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12372 if (stream == NULL) {
12373 xmlFreeParserInputBuffer(input);
12374 return (NULL);
12375 }
12376 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012377 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012378}