blob: 49ed8577d3d06839b34f91a3d9cf33404f08ce41 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001847 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001848 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001849 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001850 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001851 if (tmp == NULL) goto mem_error; \
1852 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001853}
1854
1855/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001856 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001857 * @ctxt: the parser context
1858 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001859 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001860 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1861 * @end: an end marker xmlChar, 0 if none
1862 * @end2: an end marker xmlChar, 0 if none
1863 * @end3: an end marker xmlChar, 0 if none
1864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001865 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001866 *
1867 * [67] Reference ::= EntityRef | CharRef
1868 *
1869 * [69] PEReference ::= '%' Name ';'
1870 *
1871 * Returns A newly allocated string with the substitution done. The caller
1872 * must deallocate it !
1873 */
1874xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1876 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001877 xmlChar *buffer = NULL;
1878 int buffer_size = 0;
1879
1880 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001881 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001882 xmlEntityPtr ent;
1883 int c,l;
1884 int nbchars = 0;
1885
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001887 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001888 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001889
1890 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001891 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001892 return(NULL);
1893 }
1894
1895 /*
1896 * allocate a translation buffer.
1897 */
1898 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001899 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001900 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001901
1902 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001903 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001904 * we are operating on already parsed values.
1905 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 if (str < last)
1907 c = CUR_SCHAR(str, l);
1908 else
1909 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001910 while ((c != 0) && (c != end) && /* non input consuming loop */
1911 (c != end2) && (c != end3)) {
1912
1913 if (c == 0) break;
1914 if ((c == '&') && (str[1] == '#')) {
1915 int val = xmlParseStringCharRef(ctxt, &str);
1916 if (val != 0) {
1917 COPY_BUF(0,buffer,nbchars,val);
1918 }
1919 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1920 if (xmlParserDebugEntities)
1921 xmlGenericError(xmlGenericErrorContext,
1922 "String decoding Entity Reference: %.30s\n",
1923 str);
1924 ent = xmlParseStringEntityRef(ctxt, &str);
1925 if ((ent != NULL) &&
1926 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1927 if (ent->content != NULL) {
1928 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1929 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001930 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1931 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001932 }
1933 } else if ((ent != NULL) && (ent->content != NULL)) {
1934 xmlChar *rep;
1935
1936 ctxt->depth++;
1937 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1938 0, 0, 0);
1939 ctxt->depth--;
1940 if (rep != NULL) {
1941 current = rep;
1942 while (*current != 0) { /* non input consuming loop */
1943 buffer[nbchars++] = *current++;
1944 if (nbchars >
1945 buffer_size - XML_PARSER_BUFFER_SIZE) {
1946 growBuffer(buffer);
1947 }
1948 }
1949 xmlFree(rep);
1950 }
1951 } else if (ent != NULL) {
1952 int i = xmlStrlen(ent->name);
1953 const xmlChar *cur = ent->name;
1954
1955 buffer[nbchars++] = '&';
1956 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1957 growBuffer(buffer);
1958 }
1959 for (;i > 0;i--)
1960 buffer[nbchars++] = *cur++;
1961 buffer[nbchars++] = ';';
1962 }
1963 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1964 if (xmlParserDebugEntities)
1965 xmlGenericError(xmlGenericErrorContext,
1966 "String decoding PE Reference: %.30s\n", str);
1967 ent = xmlParseStringPEReference(ctxt, &str);
1968 if (ent != NULL) {
1969 xmlChar *rep;
1970
1971 ctxt->depth++;
1972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1973 0, 0, 0);
1974 ctxt->depth--;
1975 if (rep != NULL) {
1976 current = rep;
1977 while (*current != 0) { /* non input consuming loop */
1978 buffer[nbchars++] = *current++;
1979 if (nbchars >
1980 buffer_size - XML_PARSER_BUFFER_SIZE) {
1981 growBuffer(buffer);
1982 }
1983 }
1984 xmlFree(rep);
1985 }
1986 }
1987 } else {
1988 COPY_BUF(l,buffer,nbchars,c);
1989 str += l;
1990 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1991 growBuffer(buffer);
1992 }
1993 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001994 if (str < last)
1995 c = CUR_SCHAR(str, l);
1996 else
1997 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001998 }
1999 buffer[nbchars++] = 0;
2000 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002001
2002mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002003 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002004 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002005}
2006
Daniel Veillarde57ec792003-09-10 10:50:59 +00002007/**
2008 * xmlStringDecodeEntities:
2009 * @ctxt: the parser context
2010 * @str: the input string
2011 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2012 * @end: an end marker xmlChar, 0 if none
2013 * @end2: an end marker xmlChar, 0 if none
2014 * @end3: an end marker xmlChar, 0 if none
2015 *
2016 * Takes a entity string content and process to do the adequate substitutions.
2017 *
2018 * [67] Reference ::= EntityRef | CharRef
2019 *
2020 * [69] PEReference ::= '%' Name ';'
2021 *
2022 * Returns A newly allocated string with the substitution done. The caller
2023 * must deallocate it !
2024 */
2025xmlChar *
2026xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2027 xmlChar end, xmlChar end2, xmlChar end3) {
2028 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2029 end, end2, end3));
2030}
Owen Taylor3473f882001-02-23 17:55:21 +00002031
2032/************************************************************************
2033 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002034 * Commodity functions, cleanup needed ? *
2035 * *
2036 ************************************************************************/
2037
2038/**
2039 * areBlanks:
2040 * @ctxt: an XML parser context
2041 * @str: a xmlChar *
2042 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002043 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002044 *
2045 * Is this a sequence of blank chars that one can ignore ?
2046 *
2047 * Returns 1 if ignorable 0 otherwise.
2048 */
2049
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002050static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2051 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002052 int i, ret;
2053 xmlNodePtr lastChild;
2054
Daniel Veillard05c13a22001-09-09 08:38:09 +00002055 /*
2056 * Don't spend time trying to differentiate them, the same callback is
2057 * used !
2058 */
2059 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002060 return(0);
2061
Owen Taylor3473f882001-02-23 17:55:21 +00002062 /*
2063 * Check for xml:space value.
2064 */
2065 if (*(ctxt->space) == 1)
2066 return(0);
2067
2068 /*
2069 * Check that the string is made of blanks
2070 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002071 if (blank_chars == 0) {
2072 for (i = 0;i < len;i++)
2073 if (!(IS_BLANK_CH(str[i]))) return(0);
2074 }
Owen Taylor3473f882001-02-23 17:55:21 +00002075
2076 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002077 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002078 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002079 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002080 if (ctxt->myDoc != NULL) {
2081 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2082 if (ret == 0) return(1);
2083 if (ret == 1) return(0);
2084 }
2085
2086 /*
2087 * Otherwise, heuristic :-\
2088 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002089 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002090 if ((ctxt->node->children == NULL) &&
2091 (RAW == '<') && (NXT(1) == '/')) return(0);
2092
2093 lastChild = xmlGetLastChild(ctxt->node);
2094 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002095 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2096 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 } else if (xmlNodeIsText(lastChild))
2098 return(0);
2099 else if ((ctxt->node->children != NULL) &&
2100 (xmlNodeIsText(ctxt->node->children)))
2101 return(0);
2102 return(1);
2103}
2104
Owen Taylor3473f882001-02-23 17:55:21 +00002105/************************************************************************
2106 * *
2107 * Extra stuff for namespace support *
2108 * Relates to http://www.w3.org/TR/WD-xml-names *
2109 * *
2110 ************************************************************************/
2111
2112/**
2113 * xmlSplitQName:
2114 * @ctxt: an XML parser context
2115 * @name: an XML parser context
2116 * @prefix: a xmlChar **
2117 *
2118 * parse an UTF8 encoded XML qualified name string
2119 *
2120 * [NS 5] QName ::= (Prefix ':')? LocalPart
2121 *
2122 * [NS 6] Prefix ::= NCName
2123 *
2124 * [NS 7] LocalPart ::= NCName
2125 *
2126 * Returns the local part, and prefix is updated
2127 * to get the Prefix if any.
2128 */
2129
2130xmlChar *
2131xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2132 xmlChar buf[XML_MAX_NAMELEN + 5];
2133 xmlChar *buffer = NULL;
2134 int len = 0;
2135 int max = XML_MAX_NAMELEN;
2136 xmlChar *ret = NULL;
2137 const xmlChar *cur = name;
2138 int c;
2139
2140 *prefix = NULL;
2141
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002142 if (cur == NULL) return(NULL);
2143
Owen Taylor3473f882001-02-23 17:55:21 +00002144#ifndef XML_XML_NAMESPACE
2145 /* xml: prefix is not really a namespace */
2146 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2147 (cur[2] == 'l') && (cur[3] == ':'))
2148 return(xmlStrdup(name));
2149#endif
2150
Daniel Veillard597bc482003-07-24 16:08:28 +00002151 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002152 if (cur[0] == ':')
2153 return(xmlStrdup(name));
2154
2155 c = *cur++;
2156 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2157 buf[len++] = c;
2158 c = *cur++;
2159 }
2160 if (len >= max) {
2161 /*
2162 * Okay someone managed to make a huge name, so he's ready to pay
2163 * for the processing speed.
2164 */
2165 max = len * 2;
2166
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002167 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002168 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002169 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002170 return(NULL);
2171 }
2172 memcpy(buffer, buf, len);
2173 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2174 if (len + 10 > max) {
2175 max *= 2;
2176 buffer = (xmlChar *) xmlRealloc(buffer,
2177 max * sizeof(xmlChar));
2178 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002179 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
2181 }
2182 }
2183 buffer[len++] = c;
2184 c = *cur++;
2185 }
2186 buffer[len] = 0;
2187 }
2188
Daniel Veillard597bc482003-07-24 16:08:28 +00002189 /* nasty but well=formed
2190 if ((c == ':') && (*cur == 0)) {
2191 return(xmlStrdup(name));
2192 } */
2193
Owen Taylor3473f882001-02-23 17:55:21 +00002194 if (buffer == NULL)
2195 ret = xmlStrndup(buf, len);
2196 else {
2197 ret = buffer;
2198 buffer = NULL;
2199 max = XML_MAX_NAMELEN;
2200 }
2201
2202
2203 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002204 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002205 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002206 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002207 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 }
Owen Taylor3473f882001-02-23 17:55:21 +00002209 len = 0;
2210
Daniel Veillardbb284f42002-10-16 18:02:47 +00002211 /*
2212 * Check that the first character is proper to start
2213 * a new name
2214 */
2215 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2216 ((c >= 0x41) && (c <= 0x5A)) ||
2217 (c == '_') || (c == ':'))) {
2218 int l;
2219 int first = CUR_SCHAR(cur, l);
2220
2221 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002222 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002223 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002224 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002225 }
2226 }
2227 cur++;
2228
Owen Taylor3473f882001-02-23 17:55:21 +00002229 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2230 buf[len++] = c;
2231 c = *cur++;
2232 }
2233 if (len >= max) {
2234 /*
2235 * Okay someone managed to make a huge name, so he's ready to pay
2236 * for the processing speed.
2237 */
2238 max = len * 2;
2239
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002240 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002241 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002242 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002243 return(NULL);
2244 }
2245 memcpy(buffer, buf, len);
2246 while (c != 0) { /* tested bigname2.xml */
2247 if (len + 10 > max) {
2248 max *= 2;
2249 buffer = (xmlChar *) xmlRealloc(buffer,
2250 max * sizeof(xmlChar));
2251 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002252 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002253 return(NULL);
2254 }
2255 }
2256 buffer[len++] = c;
2257 c = *cur++;
2258 }
2259 buffer[len] = 0;
2260 }
2261
2262 if (buffer == NULL)
2263 ret = xmlStrndup(buf, len);
2264 else {
2265 ret = buffer;
2266 }
2267 }
2268
2269 return(ret);
2270}
2271
2272/************************************************************************
2273 * *
2274 * The parser itself *
2275 * Relates to http://www.w3.org/TR/REC-xml *
2276 * *
2277 ************************************************************************/
2278
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002279static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002280static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002281 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002282
Owen Taylor3473f882001-02-23 17:55:21 +00002283/**
2284 * xmlParseName:
2285 * @ctxt: an XML parser context
2286 *
2287 * parse an XML name.
2288 *
2289 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2290 * CombiningChar | Extender
2291 *
2292 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2293 *
2294 * [6] Names ::= Name (S Name)*
2295 *
2296 * Returns the Name parsed or NULL
2297 */
2298
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002299const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002300xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002301 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002302 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002303 int count = 0;
2304
2305 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002306
2307 /*
2308 * Accelerator for simple ASCII names
2309 */
2310 in = ctxt->input->cur;
2311 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2312 ((*in >= 0x41) && (*in <= 0x5A)) ||
2313 (*in == '_') || (*in == ':')) {
2314 in++;
2315 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2316 ((*in >= 0x41) && (*in <= 0x5A)) ||
2317 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002318 (*in == '_') || (*in == '-') ||
2319 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002320 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002321 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002322 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002323 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002324 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002325 ctxt->nbChars += count;
2326 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002327 if (ret == NULL)
2328 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002329 return(ret);
2330 }
2331 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002332 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002333}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002334
Daniel Veillard46de64e2002-05-29 08:21:33 +00002335/**
2336 * xmlParseNameAndCompare:
2337 * @ctxt: an XML parser context
2338 *
2339 * parse an XML name and compares for match
2340 * (specialized for endtag parsing)
2341 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002342 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2343 * and the name for mismatch
2344 */
2345
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002346static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002347xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002348 register const xmlChar *cmp = other;
2349 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002350 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002351
2352 GROW;
2353
2354 in = ctxt->input->cur;
2355 while (*in != 0 && *in == *cmp) {
2356 ++in;
2357 ++cmp;
2358 }
William M. Brack76e95df2003-10-18 16:20:14 +00002359 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002360 /* success */
2361 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002362 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002363 }
2364 /* failure (or end of input buffer), check with full function */
2365 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002366 /* strings coming from the dictionnary direct compare possible */
2367 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002368 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002369 }
2370 return ret;
2371}
2372
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002373static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002374xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002375 int len = 0, l;
2376 int c;
2377 int count = 0;
2378
2379 /*
2380 * Handler for more complex cases
2381 */
2382 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002383 c = CUR_CHAR(l);
2384 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2385 (!IS_LETTER(c) && (c != '_') &&
2386 (c != ':'))) {
2387 return(NULL);
2388 }
2389
2390 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002391 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002392 (c == '.') || (c == '-') ||
2393 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002394 (IS_COMBINING(c)) ||
2395 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002396 if (count++ > 100) {
2397 count = 0;
2398 GROW;
2399 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002400 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002401 NEXTL(l);
2402 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002403 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002404 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002405}
2406
2407/**
2408 * xmlParseStringName:
2409 * @ctxt: an XML parser context
2410 * @str: a pointer to the string pointer (IN/OUT)
2411 *
2412 * parse an XML name.
2413 *
2414 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2415 * CombiningChar | Extender
2416 *
2417 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2418 *
2419 * [6] Names ::= Name (S Name)*
2420 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002421 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002422 * is updated to the current location in the string.
2423 */
2424
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002425static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002426xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2427 xmlChar buf[XML_MAX_NAMELEN + 5];
2428 const xmlChar *cur = *str;
2429 int len = 0, l;
2430 int c;
2431
2432 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002433 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002434 (c != ':')) {
2435 return(NULL);
2436 }
2437
William M. Brack871611b2003-10-18 04:53:14 +00002438 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002439 (c == '.') || (c == '-') ||
2440 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002441 (IS_COMBINING(c)) ||
2442 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002443 COPY_BUF(l,buf,len,c);
2444 cur += l;
2445 c = CUR_SCHAR(cur, l);
2446 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2447 /*
2448 * Okay someone managed to make a huge name, so he's ready to pay
2449 * for the processing speed.
2450 */
2451 xmlChar *buffer;
2452 int max = len * 2;
2453
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002454 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002455 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002456 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002457 return(NULL);
2458 }
2459 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002460 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002461 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002462 (c == '.') || (c == '-') ||
2463 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002464 (IS_COMBINING(c)) ||
2465 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002466 if (len + 10 > max) {
2467 max *= 2;
2468 buffer = (xmlChar *) xmlRealloc(buffer,
2469 max * sizeof(xmlChar));
2470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 }
2475 COPY_BUF(l,buffer,len,c);
2476 cur += l;
2477 c = CUR_SCHAR(cur, l);
2478 }
2479 buffer[len] = 0;
2480 *str = cur;
2481 return(buffer);
2482 }
2483 }
2484 *str = cur;
2485 return(xmlStrndup(buf, len));
2486}
2487
2488/**
2489 * xmlParseNmtoken:
2490 * @ctxt: an XML parser context
2491 *
2492 * parse an XML Nmtoken.
2493 *
2494 * [7] Nmtoken ::= (NameChar)+
2495 *
2496 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2497 *
2498 * Returns the Nmtoken parsed or NULL
2499 */
2500
2501xmlChar *
2502xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2503 xmlChar buf[XML_MAX_NAMELEN + 5];
2504 int len = 0, l;
2505 int c;
2506 int count = 0;
2507
2508 GROW;
2509 c = CUR_CHAR(l);
2510
William M. Brack871611b2003-10-18 04:53:14 +00002511 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002512 (c == '.') || (c == '-') ||
2513 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002514 (IS_COMBINING(c)) ||
2515 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002516 if (count++ > 100) {
2517 count = 0;
2518 GROW;
2519 }
2520 COPY_BUF(l,buf,len,c);
2521 NEXTL(l);
2522 c = CUR_CHAR(l);
2523 if (len >= XML_MAX_NAMELEN) {
2524 /*
2525 * Okay someone managed to make a huge token, so he's ready to pay
2526 * for the processing speed.
2527 */
2528 xmlChar *buffer;
2529 int max = len * 2;
2530
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002531 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002533 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002534 return(NULL);
2535 }
2536 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002537 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002538 (c == '.') || (c == '-') ||
2539 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002540 (IS_COMBINING(c)) ||
2541 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002542 if (count++ > 100) {
2543 count = 0;
2544 GROW;
2545 }
2546 if (len + 10 > max) {
2547 max *= 2;
2548 buffer = (xmlChar *) xmlRealloc(buffer,
2549 max * sizeof(xmlChar));
2550 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 return(NULL);
2553 }
2554 }
2555 COPY_BUF(l,buffer,len,c);
2556 NEXTL(l);
2557 c = CUR_CHAR(l);
2558 }
2559 buffer[len] = 0;
2560 return(buffer);
2561 }
2562 }
2563 if (len == 0)
2564 return(NULL);
2565 return(xmlStrndup(buf, len));
2566}
2567
2568/**
2569 * xmlParseEntityValue:
2570 * @ctxt: an XML parser context
2571 * @orig: if non-NULL store a copy of the original entity value
2572 *
2573 * parse a value for ENTITY declarations
2574 *
2575 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2576 * "'" ([^%&'] | PEReference | Reference)* "'"
2577 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002578 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002579 */
2580
2581xmlChar *
2582xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2583 xmlChar *buf = NULL;
2584 int len = 0;
2585 int size = XML_PARSER_BUFFER_SIZE;
2586 int c, l;
2587 xmlChar stop;
2588 xmlChar *ret = NULL;
2589 const xmlChar *cur = NULL;
2590 xmlParserInputPtr input;
2591
2592 if (RAW == '"') stop = '"';
2593 else if (RAW == '\'') stop = '\'';
2594 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002595 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002596 return(NULL);
2597 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002598 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002599 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002600 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 return(NULL);
2602 }
2603
2604 /*
2605 * The content of the entity definition is copied in a buffer.
2606 */
2607
2608 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2609 input = ctxt->input;
2610 GROW;
2611 NEXT;
2612 c = CUR_CHAR(l);
2613 /*
2614 * NOTE: 4.4.5 Included in Literal
2615 * When a parameter entity reference appears in a literal entity
2616 * value, ... a single or double quote character in the replacement
2617 * text is always treated as a normal data character and will not
2618 * terminate the literal.
2619 * In practice it means we stop the loop only when back at parsing
2620 * the initial entity and the quote is found
2621 */
William M. Brack871611b2003-10-18 04:53:14 +00002622 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002623 (ctxt->input != input))) {
2624 if (len + 5 >= size) {
2625 size *= 2;
2626 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2627 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002628 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002629 return(NULL);
2630 }
2631 }
2632 COPY_BUF(l,buf,len,c);
2633 NEXTL(l);
2634 /*
2635 * Pop-up of finished entities.
2636 */
2637 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2638 xmlPopInput(ctxt);
2639
2640 GROW;
2641 c = CUR_CHAR(l);
2642 if (c == 0) {
2643 GROW;
2644 c = CUR_CHAR(l);
2645 }
2646 }
2647 buf[len] = 0;
2648
2649 /*
2650 * Raise problem w.r.t. '&' and '%' being used in non-entities
2651 * reference constructs. Note Charref will be handled in
2652 * xmlStringDecodeEntities()
2653 */
2654 cur = buf;
2655 while (*cur != 0) { /* non input consuming */
2656 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2657 xmlChar *name;
2658 xmlChar tmp = *cur;
2659
2660 cur++;
2661 name = xmlParseStringName(ctxt, &cur);
2662 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002663 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002664 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002665 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002666 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002667 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2668 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002669 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002670 }
2671 if (name != NULL)
2672 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002673 if (*cur == 0)
2674 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002675 }
2676 cur++;
2677 }
2678
2679 /*
2680 * Then PEReference entities are substituted.
2681 */
2682 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002683 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002684 xmlFree(buf);
2685 } else {
2686 NEXT;
2687 /*
2688 * NOTE: 4.4.7 Bypassed
2689 * When a general entity reference appears in the EntityValue in
2690 * an entity declaration, it is bypassed and left as is.
2691 * so XML_SUBSTITUTE_REF is not set here.
2692 */
2693 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2694 0, 0, 0);
2695 if (orig != NULL)
2696 *orig = buf;
2697 else
2698 xmlFree(buf);
2699 }
2700
2701 return(ret);
2702}
2703
2704/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002705 * xmlParseAttValueComplex:
2706 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002707 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002708 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002709 *
2710 * parse a value for an attribute, this is the fallback function
2711 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002712 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002713 *
2714 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2715 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002716static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002717xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002718 xmlChar limit = 0;
2719 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002720 int len = 0;
2721 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002722 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002723 xmlChar *current = NULL;
2724 xmlEntityPtr ent;
2725
Owen Taylor3473f882001-02-23 17:55:21 +00002726 if (NXT(0) == '"') {
2727 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2728 limit = '"';
2729 NEXT;
2730 } else if (NXT(0) == '\'') {
2731 limit = '\'';
2732 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2733 NEXT;
2734 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002735 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002736 return(NULL);
2737 }
2738
2739 /*
2740 * allocate a translation buffer.
2741 */
2742 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002743 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002744 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002745
2746 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002747 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002748 */
2749 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002750 while ((NXT(0) != limit) && /* checked */
2751 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002752 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002753 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002754 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002755 if (NXT(1) == '#') {
2756 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002757
Owen Taylor3473f882001-02-23 17:55:21 +00002758 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002759 if (ctxt->replaceEntities) {
2760 if (len > buf_size - 10) {
2761 growBuffer(buf);
2762 }
2763 buf[len++] = '&';
2764 } else {
2765 /*
2766 * The reparsing will be done in xmlStringGetNodeList()
2767 * called by the attribute() function in SAX.c
2768 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002769 if (len > buf_size - 10) {
2770 growBuffer(buf);
2771 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002772 buf[len++] = '&';
2773 buf[len++] = '#';
2774 buf[len++] = '3';
2775 buf[len++] = '8';
2776 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002777 }
2778 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002779 if (len > buf_size - 10) {
2780 growBuffer(buf);
2781 }
Owen Taylor3473f882001-02-23 17:55:21 +00002782 len += xmlCopyChar(0, &buf[len], val);
2783 }
2784 } else {
2785 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002786 if ((ent != NULL) &&
2787 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2788 if (len > buf_size - 10) {
2789 growBuffer(buf);
2790 }
2791 if ((ctxt->replaceEntities == 0) &&
2792 (ent->content[0] == '&')) {
2793 buf[len++] = '&';
2794 buf[len++] = '#';
2795 buf[len++] = '3';
2796 buf[len++] = '8';
2797 buf[len++] = ';';
2798 } else {
2799 buf[len++] = ent->content[0];
2800 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002801 } else if ((ent != NULL) &&
2802 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002803 xmlChar *rep;
2804
2805 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2806 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002807 XML_SUBSTITUTE_REF,
2808 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 if (rep != NULL) {
2810 current = rep;
2811 while (*current != 0) { /* non input consuming */
2812 buf[len++] = *current++;
2813 if (len > buf_size - 10) {
2814 growBuffer(buf);
2815 }
2816 }
2817 xmlFree(rep);
2818 }
2819 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002820 if (len > buf_size - 10) {
2821 growBuffer(buf);
2822 }
Owen Taylor3473f882001-02-23 17:55:21 +00002823 if (ent->content != NULL)
2824 buf[len++] = ent->content[0];
2825 }
2826 } else if (ent != NULL) {
2827 int i = xmlStrlen(ent->name);
2828 const xmlChar *cur = ent->name;
2829
2830 /*
2831 * This may look absurd but is needed to detect
2832 * entities problems
2833 */
2834 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2835 (ent->content != NULL)) {
2836 xmlChar *rep;
2837 rep = xmlStringDecodeEntities(ctxt, ent->content,
2838 XML_SUBSTITUTE_REF, 0, 0, 0);
2839 if (rep != NULL)
2840 xmlFree(rep);
2841 }
2842
2843 /*
2844 * Just output the reference
2845 */
2846 buf[len++] = '&';
2847 if (len > buf_size - i - 10) {
2848 growBuffer(buf);
2849 }
2850 for (;i > 0;i--)
2851 buf[len++] = *cur++;
2852 buf[len++] = ';';
2853 }
2854 }
2855 } else {
2856 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002857 if ((len != 0) || (!normalize)) {
2858 if ((!normalize) || (!in_space)) {
2859 COPY_BUF(l,buf,len,0x20);
2860 if (len > buf_size - 10) {
2861 growBuffer(buf);
2862 }
2863 }
2864 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002865 }
2866 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002867 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002868 COPY_BUF(l,buf,len,c);
2869 if (len > buf_size - 10) {
2870 growBuffer(buf);
2871 }
2872 }
2873 NEXTL(l);
2874 }
2875 GROW;
2876 c = CUR_CHAR(l);
2877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002878 if ((in_space) && (normalize)) {
2879 while (buf[len - 1] == 0x20) len--;
2880 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002881 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002882 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002883 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002884 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002885 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2886 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002887 } else
2888 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002889 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002891
2892mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002893 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002894 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002895}
2896
2897/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002898 * xmlParseAttValue:
2899 * @ctxt: an XML parser context
2900 *
2901 * parse a value for an attribute
2902 * Note: the parser won't do substitution of entities here, this
2903 * will be handled later in xmlStringGetNodeList
2904 *
2905 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2906 * "'" ([^<&'] | Reference)* "'"
2907 *
2908 * 3.3.3 Attribute-Value Normalization:
2909 * Before the value of an attribute is passed to the application or
2910 * checked for validity, the XML processor must normalize it as follows:
2911 * - a character reference is processed by appending the referenced
2912 * character to the attribute value
2913 * - an entity reference is processed by recursively processing the
2914 * replacement text of the entity
2915 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2916 * appending #x20 to the normalized value, except that only a single
2917 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2918 * parsed entity or the literal entity value of an internal parsed entity
2919 * - other characters are processed by appending them to the normalized value
2920 * If the declared value is not CDATA, then the XML processor must further
2921 * process the normalized attribute value by discarding any leading and
2922 * trailing space (#x20) characters, and by replacing sequences of space
2923 * (#x20) characters by a single space (#x20) character.
2924 * All attributes for which no declaration has been read should be treated
2925 * by a non-validating parser as if declared CDATA.
2926 *
2927 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2928 */
2929
2930
2931xmlChar *
2932xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002933 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002934}
2935
2936/**
Owen Taylor3473f882001-02-23 17:55:21 +00002937 * xmlParseSystemLiteral:
2938 * @ctxt: an XML parser context
2939 *
2940 * parse an XML Literal
2941 *
2942 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2943 *
2944 * Returns the SystemLiteral parsed or NULL
2945 */
2946
2947xmlChar *
2948xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2949 xmlChar *buf = NULL;
2950 int len = 0;
2951 int size = XML_PARSER_BUFFER_SIZE;
2952 int cur, l;
2953 xmlChar stop;
2954 int state = ctxt->instate;
2955 int count = 0;
2956
2957 SHRINK;
2958 if (RAW == '"') {
2959 NEXT;
2960 stop = '"';
2961 } else if (RAW == '\'') {
2962 NEXT;
2963 stop = '\'';
2964 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002965 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002966 return(NULL);
2967 }
2968
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002969 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002970 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002971 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972 return(NULL);
2973 }
2974 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2975 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002976 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002977 if (len + 5 >= size) {
2978 size *= 2;
2979 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2980 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002981 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002982 ctxt->instate = (xmlParserInputState) state;
2983 return(NULL);
2984 }
2985 }
2986 count++;
2987 if (count > 50) {
2988 GROW;
2989 count = 0;
2990 }
2991 COPY_BUF(l,buf,len,cur);
2992 NEXTL(l);
2993 cur = CUR_CHAR(l);
2994 if (cur == 0) {
2995 GROW;
2996 SHRINK;
2997 cur = CUR_CHAR(l);
2998 }
2999 }
3000 buf[len] = 0;
3001 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003002 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003003 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003004 } else {
3005 NEXT;
3006 }
3007 return(buf);
3008}
3009
3010/**
3011 * xmlParsePubidLiteral:
3012 * @ctxt: an XML parser context
3013 *
3014 * parse an XML public literal
3015 *
3016 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3017 *
3018 * Returns the PubidLiteral parsed or NULL.
3019 */
3020
3021xmlChar *
3022xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3023 xmlChar *buf = NULL;
3024 int len = 0;
3025 int size = XML_PARSER_BUFFER_SIZE;
3026 xmlChar cur;
3027 xmlChar stop;
3028 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003029 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003030
3031 SHRINK;
3032 if (RAW == '"') {
3033 NEXT;
3034 stop = '"';
3035 } else if (RAW == '\'') {
3036 NEXT;
3037 stop = '\'';
3038 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003039 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003040 return(NULL);
3041 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003042 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003045 return(NULL);
3046 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003047 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003048 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003049 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (len + 1 >= size) {
3051 size *= 2;
3052 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3053 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003054 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003055 return(NULL);
3056 }
3057 }
3058 buf[len++] = cur;
3059 count++;
3060 if (count > 50) {
3061 GROW;
3062 count = 0;
3063 }
3064 NEXT;
3065 cur = CUR;
3066 if (cur == 0) {
3067 GROW;
3068 SHRINK;
3069 cur = CUR;
3070 }
3071 }
3072 buf[len] = 0;
3073 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003074 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003075 } else {
3076 NEXT;
3077 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003078 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003079 return(buf);
3080}
3081
Daniel Veillard48b2f892001-02-25 16:11:03 +00003082void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003083/**
3084 * xmlParseCharData:
3085 * @ctxt: an XML parser context
3086 * @cdata: int indicating whether we are within a CDATA section
3087 *
3088 * parse a CharData section.
3089 * if we are within a CDATA section ']]>' marks an end of section.
3090 *
3091 * The right angle bracket (>) may be represented using the string "&gt;",
3092 * and must, for compatibility, be escaped using "&gt;" or a character
3093 * reference when it appears in the string "]]>" in content, when that
3094 * string is not marking the end of a CDATA section.
3095 *
3096 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3097 */
3098
3099void
3100xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003101 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003102 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003103 int line = ctxt->input->line;
3104 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003105
3106 SHRINK;
3107 GROW;
3108 /*
3109 * Accelerated common case where input don't need to be
3110 * modified before passing it to the handler.
3111 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003112 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003113 in = ctxt->input->cur;
3114 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003115get_more_space:
3116 while (*in == 0x20) in++;
3117 if (*in == 0xA) {
3118 ctxt->input->line++;
3119 in++;
3120 while (*in == 0xA) {
3121 ctxt->input->line++;
3122 in++;
3123 }
3124 goto get_more_space;
3125 }
3126 if (*in == '<') {
3127 nbchar = in - ctxt->input->cur;
3128 if (nbchar > 0) {
3129 const xmlChar *tmp = ctxt->input->cur;
3130 ctxt->input->cur = in;
3131
3132 if (ctxt->sax->ignorableWhitespace !=
3133 ctxt->sax->characters) {
3134 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3135 ctxt->sax->ignorableWhitespace(ctxt->userData,
3136 tmp, nbchar);
3137 } else if (ctxt->sax->characters != NULL)
3138 ctxt->sax->characters(ctxt->userData,
3139 tmp, nbchar);
3140 } else if (ctxt->sax->characters != NULL) {
3141 ctxt->sax->characters(ctxt->userData,
3142 tmp, nbchar);
3143 }
3144 }
3145 return;
3146 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003147get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003148 while (((*in > ']') && (*in <= 0x7F)) ||
3149 ((*in > '&') && (*in < '<')) ||
3150 ((*in > '<') && (*in < ']')) ||
3151 ((*in >= 0x20) && (*in < '&')) ||
3152 (*in == 0x09))
3153 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003154 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003156 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003157 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003158 ctxt->input->line++;
3159 in++;
3160 }
3161 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003162 }
3163 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003164 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003165 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003166 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003167 return;
3168 }
3169 in++;
3170 goto get_more;
3171 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003172 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003173 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003174 if ((ctxt->sax->ignorableWhitespace !=
3175 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003176 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003177 const xmlChar *tmp = ctxt->input->cur;
3178 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003179
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003180 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003181 ctxt->sax->ignorableWhitespace(ctxt->userData,
3182 tmp, nbchar);
3183 } else if (ctxt->sax->characters != NULL)
3184 ctxt->sax->characters(ctxt->userData,
3185 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003186 line = ctxt->input->line;
3187 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003188 } else {
3189 if (ctxt->sax->characters != NULL)
3190 ctxt->sax->characters(ctxt->userData,
3191 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003192 line = ctxt->input->line;
3193 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003194 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003195 }
3196 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003197 if (*in == 0xD) {
3198 in++;
3199 if (*in == 0xA) {
3200 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003201 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003202 ctxt->input->line++;
3203 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003204 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003205 in--;
3206 }
3207 if (*in == '<') {
3208 return;
3209 }
3210 if (*in == '&') {
3211 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003212 }
3213 SHRINK;
3214 GROW;
3215 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003216 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003217 nbchar = 0;
3218 }
Daniel Veillard50582112001-03-26 22:52:16 +00003219 ctxt->input->line = line;
3220 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003221 xmlParseCharDataComplex(ctxt, cdata);
3222}
3223
Daniel Veillard01c13b52002-12-10 15:19:08 +00003224/**
3225 * xmlParseCharDataComplex:
3226 * @ctxt: an XML parser context
3227 * @cdata: int indicating whether we are within a CDATA section
3228 *
3229 * parse a CharData section.this is the fallback function
3230 * of xmlParseCharData() when the parsing requires handling
3231 * of non-ASCII characters.
3232 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003233void
3234xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003235 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3236 int nbchar = 0;
3237 int cur, l;
3238 int count = 0;
3239
3240 SHRINK;
3241 GROW;
3242 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003243 while ((cur != '<') && /* checked */
3244 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003245 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003246 if ((cur == ']') && (NXT(1) == ']') &&
3247 (NXT(2) == '>')) {
3248 if (cdata) break;
3249 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003250 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003251 }
3252 }
3253 COPY_BUF(l,buf,nbchar,cur);
3254 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003255 buf[nbchar] = 0;
3256
Owen Taylor3473f882001-02-23 17:55:21 +00003257 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003258 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003259 */
3260 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003261 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (ctxt->sax->ignorableWhitespace != NULL)
3263 ctxt->sax->ignorableWhitespace(ctxt->userData,
3264 buf, nbchar);
3265 } else {
3266 if (ctxt->sax->characters != NULL)
3267 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3268 }
3269 }
3270 nbchar = 0;
3271 }
3272 count++;
3273 if (count > 50) {
3274 GROW;
3275 count = 0;
3276 }
3277 NEXTL(l);
3278 cur = CUR_CHAR(l);
3279 }
3280 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003281 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003282 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003283 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003284 */
3285 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003286 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003287 if (ctxt->sax->ignorableWhitespace != NULL)
3288 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3289 } else {
3290 if (ctxt->sax->characters != NULL)
3291 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3292 }
3293 }
3294 }
3295}
3296
3297/**
3298 * xmlParseExternalID:
3299 * @ctxt: an XML parser context
3300 * @publicID: a xmlChar** receiving PubidLiteral
3301 * @strict: indicate whether we should restrict parsing to only
3302 * production [75], see NOTE below
3303 *
3304 * Parse an External ID or a Public ID
3305 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003306 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003307 * 'PUBLIC' S PubidLiteral S SystemLiteral
3308 *
3309 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3310 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3311 *
3312 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3313 *
3314 * Returns the function returns SystemLiteral and in the second
3315 * case publicID receives PubidLiteral, is strict is off
3316 * it is possible to return NULL and have publicID set.
3317 */
3318
3319xmlChar *
3320xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3321 xmlChar *URI = NULL;
3322
3323 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003324
3325 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003326 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003327 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003328 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3330 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003331 }
3332 SKIP_BLANKS;
3333 URI = xmlParseSystemLiteral(ctxt);
3334 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003337 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003338 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003339 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003341 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003342 }
3343 SKIP_BLANKS;
3344 *publicID = xmlParsePubidLiteral(ctxt);
3345 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003346 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003347 }
3348 if (strict) {
3349 /*
3350 * We don't handle [83] so "S SystemLiteral" is required.
3351 */
William M. Brack76e95df2003-10-18 16:20:14 +00003352 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003354 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003355 }
3356 } else {
3357 /*
3358 * We handle [83] so we return immediately, if
3359 * "S SystemLiteral" is not detected. From a purely parsing
3360 * point of view that's a nice mess.
3361 */
3362 const xmlChar *ptr;
3363 GROW;
3364
3365 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003366 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003367
William M. Brack76e95df2003-10-18 16:20:14 +00003368 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003369 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3370 }
3371 SKIP_BLANKS;
3372 URI = xmlParseSystemLiteral(ctxt);
3373 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 }
3376 }
3377 return(URI);
3378}
3379
3380/**
3381 * xmlParseComment:
3382 * @ctxt: an XML parser context
3383 *
3384 * Skip an XML (SGML) comment <!-- .... -->
3385 * The spec says that "For compatibility, the string "--" (double-hyphen)
3386 * must not occur within comments. "
3387 *
3388 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3389 */
3390void
3391xmlParseComment(xmlParserCtxtPtr ctxt) {
3392 xmlChar *buf = NULL;
3393 int len;
3394 int size = XML_PARSER_BUFFER_SIZE;
3395 int q, ql;
3396 int r, rl;
3397 int cur, l;
3398 xmlParserInputState state;
3399 xmlParserInputPtr input = ctxt->input;
3400 int count = 0;
3401
3402 /*
3403 * Check that there is a comment right here.
3404 */
3405 if ((RAW != '<') || (NXT(1) != '!') ||
3406 (NXT(2) != '-') || (NXT(3) != '-')) return;
3407
3408 state = ctxt->instate;
3409 ctxt->instate = XML_PARSER_COMMENT;
3410 SHRINK;
3411 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003412 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003413 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003414 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003415 ctxt->instate = state;
3416 return;
3417 }
3418 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003419 if (q == 0)
3420 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 NEXTL(ql);
3422 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003423 if (r == 0)
3424 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003425 NEXTL(rl);
3426 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003427 if (cur == 0)
3428 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003429 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003430 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003431 ((cur != '>') ||
3432 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003433 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003434 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003435 }
3436 if (len + 5 >= size) {
3437 size *= 2;
3438 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3439 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003440 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->instate = state;
3442 return;
3443 }
3444 }
3445 COPY_BUF(ql,buf,len,q);
3446 q = r;
3447 ql = rl;
3448 r = cur;
3449 rl = l;
3450
3451 count++;
3452 if (count > 50) {
3453 GROW;
3454 count = 0;
3455 }
3456 NEXTL(l);
3457 cur = CUR_CHAR(l);
3458 if (cur == 0) {
3459 SHRINK;
3460 GROW;
3461 cur = CUR_CHAR(l);
3462 }
3463 }
3464 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003465 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003466 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003467 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 xmlFree(buf);
3469 } else {
3470 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003471 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3472 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003473 }
3474 NEXT;
3475 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3476 (!ctxt->disableSAX))
3477 ctxt->sax->comment(ctxt->userData, buf);
3478 xmlFree(buf);
3479 }
3480 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003481 return;
3482not_terminated:
3483 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3484 "Comment not terminated\n", NULL);
3485 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003486}
3487
3488/**
3489 * xmlParsePITarget:
3490 * @ctxt: an XML parser context
3491 *
3492 * parse the name of a PI
3493 *
3494 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3495 *
3496 * Returns the PITarget name or NULL
3497 */
3498
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003499const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003500xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003501 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003502
3503 name = xmlParseName(ctxt);
3504 if ((name != NULL) &&
3505 ((name[0] == 'x') || (name[0] == 'X')) &&
3506 ((name[1] == 'm') || (name[1] == 'M')) &&
3507 ((name[2] == 'l') || (name[2] == 'L'))) {
3508 int i;
3509 if ((name[0] == 'x') && (name[1] == 'm') &&
3510 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003511 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003512 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003513 return(name);
3514 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003515 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003516 return(name);
3517 }
3518 for (i = 0;;i++) {
3519 if (xmlW3CPIs[i] == NULL) break;
3520 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3521 return(name);
3522 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003523 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3524 "xmlParsePITarget: invalid name prefix 'xml'\n",
3525 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003526 }
3527 return(name);
3528}
3529
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003530#ifdef LIBXML_CATALOG_ENABLED
3531/**
3532 * xmlParseCatalogPI:
3533 * @ctxt: an XML parser context
3534 * @catalog: the PI value string
3535 *
3536 * parse an XML Catalog Processing Instruction.
3537 *
3538 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3539 *
3540 * Occurs only if allowed by the user and if happening in the Misc
3541 * part of the document before any doctype informations
3542 * This will add the given catalog to the parsing context in order
3543 * to be used if there is a resolution need further down in the document
3544 */
3545
3546static void
3547xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3548 xmlChar *URL = NULL;
3549 const xmlChar *tmp, *base;
3550 xmlChar marker;
3551
3552 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003553 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003554 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3555 goto error;
3556 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003557 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003558 if (*tmp != '=') {
3559 return;
3560 }
3561 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003562 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003563 marker = *tmp;
3564 if ((marker != '\'') && (marker != '"'))
3565 goto error;
3566 tmp++;
3567 base = tmp;
3568 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3569 if (*tmp == 0)
3570 goto error;
3571 URL = xmlStrndup(base, tmp - base);
3572 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003573 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003574 if (*tmp != 0)
3575 goto error;
3576
3577 if (URL != NULL) {
3578 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3579 xmlFree(URL);
3580 }
3581 return;
3582
3583error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003584 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3585 "Catalog PI syntax error: %s\n",
3586 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003587 if (URL != NULL)
3588 xmlFree(URL);
3589}
3590#endif
3591
Owen Taylor3473f882001-02-23 17:55:21 +00003592/**
3593 * xmlParsePI:
3594 * @ctxt: an XML parser context
3595 *
3596 * parse an XML Processing Instruction.
3597 *
3598 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3599 *
3600 * The processing is transfered to SAX once parsed.
3601 */
3602
3603void
3604xmlParsePI(xmlParserCtxtPtr ctxt) {
3605 xmlChar *buf = NULL;
3606 int len = 0;
3607 int size = XML_PARSER_BUFFER_SIZE;
3608 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003609 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003610 xmlParserInputState state;
3611 int count = 0;
3612
3613 if ((RAW == '<') && (NXT(1) == '?')) {
3614 xmlParserInputPtr input = ctxt->input;
3615 state = ctxt->instate;
3616 ctxt->instate = XML_PARSER_PI;
3617 /*
3618 * this is a Processing Instruction.
3619 */
3620 SKIP(2);
3621 SHRINK;
3622
3623 /*
3624 * Parse the target name and check for special support like
3625 * namespace.
3626 */
3627 target = xmlParsePITarget(ctxt);
3628 if (target != NULL) {
3629 if ((RAW == '?') && (NXT(1) == '>')) {
3630 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003631 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3632 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003633 }
3634 SKIP(2);
3635
3636 /*
3637 * SAX: PI detected.
3638 */
3639 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3640 (ctxt->sax->processingInstruction != NULL))
3641 ctxt->sax->processingInstruction(ctxt->userData,
3642 target, NULL);
3643 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003644 return;
3645 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003646 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003648 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003649 ctxt->instate = state;
3650 return;
3651 }
3652 cur = CUR;
3653 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003654 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3655 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003656 }
3657 SKIP_BLANKS;
3658 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003659 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003660 ((cur != '?') || (NXT(1) != '>'))) {
3661 if (len + 5 >= size) {
3662 size *= 2;
3663 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3664 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003665 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003666 ctxt->instate = state;
3667 return;
3668 }
3669 }
3670 count++;
3671 if (count > 50) {
3672 GROW;
3673 count = 0;
3674 }
3675 COPY_BUF(l,buf,len,cur);
3676 NEXTL(l);
3677 cur = CUR_CHAR(l);
3678 if (cur == 0) {
3679 SHRINK;
3680 GROW;
3681 cur = CUR_CHAR(l);
3682 }
3683 }
3684 buf[len] = 0;
3685 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003686 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3687 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 } else {
3689 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3691 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003692 }
3693 SKIP(2);
3694
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003695#ifdef LIBXML_CATALOG_ENABLED
3696 if (((state == XML_PARSER_MISC) ||
3697 (state == XML_PARSER_START)) &&
3698 (xmlStrEqual(target, XML_CATALOG_PI))) {
3699 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3700 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3701 (allow == XML_CATA_ALLOW_ALL))
3702 xmlParseCatalogPI(ctxt, buf);
3703 }
3704#endif
3705
3706
Owen Taylor3473f882001-02-23 17:55:21 +00003707 /*
3708 * SAX: PI detected.
3709 */
3710 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3711 (ctxt->sax->processingInstruction != NULL))
3712 ctxt->sax->processingInstruction(ctxt->userData,
3713 target, buf);
3714 }
3715 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003717 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 }
3719 ctxt->instate = state;
3720 }
3721}
3722
3723/**
3724 * xmlParseNotationDecl:
3725 * @ctxt: an XML parser context
3726 *
3727 * parse a notation declaration
3728 *
3729 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3730 *
3731 * Hence there is actually 3 choices:
3732 * 'PUBLIC' S PubidLiteral
3733 * 'PUBLIC' S PubidLiteral S SystemLiteral
3734 * and 'SYSTEM' S SystemLiteral
3735 *
3736 * See the NOTE on xmlParseExternalID().
3737 */
3738
3739void
3740xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003741 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 xmlChar *Pubid;
3743 xmlChar *Systemid;
3744
Daniel Veillarda07050d2003-10-19 14:46:32 +00003745 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003746 xmlParserInputPtr input = ctxt->input;
3747 SHRINK;
3748 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003749 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003750 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3751 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003752 return;
3753 }
3754 SKIP_BLANKS;
3755
Daniel Veillard76d66f42001-05-16 21:05:17 +00003756 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003758 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003759 return;
3760 }
William M. Brack76e95df2003-10-18 16:20:14 +00003761 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003762 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003763 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003764 return;
3765 }
3766 SKIP_BLANKS;
3767
3768 /*
3769 * Parse the IDs.
3770 */
3771 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3772 SKIP_BLANKS;
3773
3774 if (RAW == '>') {
3775 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3777 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003778 }
3779 NEXT;
3780 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3781 (ctxt->sax->notationDecl != NULL))
3782 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003784 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 if (Systemid != NULL) xmlFree(Systemid);
3787 if (Pubid != NULL) xmlFree(Pubid);
3788 }
3789}
3790
3791/**
3792 * xmlParseEntityDecl:
3793 * @ctxt: an XML parser context
3794 *
3795 * parse <!ENTITY declarations
3796 *
3797 * [70] EntityDecl ::= GEDecl | PEDecl
3798 *
3799 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3800 *
3801 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3802 *
3803 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3804 *
3805 * [74] PEDef ::= EntityValue | ExternalID
3806 *
3807 * [76] NDataDecl ::= S 'NDATA' S Name
3808 *
3809 * [ VC: Notation Declared ]
3810 * The Name must match the declared name of a notation.
3811 */
3812
3813void
3814xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003815 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 xmlChar *value = NULL;
3817 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003818 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 int isParameter = 0;
3820 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003821 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003822
3823 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003824 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003825 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003826 SHRINK;
3827 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003828 skipped = SKIP_BLANKS;
3829 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003830 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3831 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003832 }
Owen Taylor3473f882001-02-23 17:55:21 +00003833
3834 if (RAW == '%') {
3835 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003836 skipped = SKIP_BLANKS;
3837 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003838 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3839 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003840 }
Owen Taylor3473f882001-02-23 17:55:21 +00003841 isParameter = 1;
3842 }
3843
Daniel Veillard76d66f42001-05-16 21:05:17 +00003844 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003845 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003846 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3847 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003848 return;
3849 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003850 skipped = SKIP_BLANKS;
3851 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003852 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3853 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003854 }
Owen Taylor3473f882001-02-23 17:55:21 +00003855
Daniel Veillardf5582f12002-06-11 10:08:16 +00003856 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003857 /*
3858 * handle the various case of definitions...
3859 */
3860 if (isParameter) {
3861 if ((RAW == '"') || (RAW == '\'')) {
3862 value = xmlParseEntityValue(ctxt, &orig);
3863 if (value) {
3864 if ((ctxt->sax != NULL) &&
3865 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3866 ctxt->sax->entityDecl(ctxt->userData, name,
3867 XML_INTERNAL_PARAMETER_ENTITY,
3868 NULL, NULL, value);
3869 }
3870 } else {
3871 URI = xmlParseExternalID(ctxt, &literal, 1);
3872 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003873 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 }
3875 if (URI) {
3876 xmlURIPtr uri;
3877
3878 uri = xmlParseURI((const char *) URI);
3879 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003880 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3881 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003882 /*
3883 * This really ought to be a well formedness error
3884 * but the XML Core WG decided otherwise c.f. issue
3885 * E26 of the XML erratas.
3886 */
Owen Taylor3473f882001-02-23 17:55:21 +00003887 } else {
3888 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003889 /*
3890 * Okay this is foolish to block those but not
3891 * invalid URIs.
3892 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003893 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003894 } else {
3895 if ((ctxt->sax != NULL) &&
3896 (!ctxt->disableSAX) &&
3897 (ctxt->sax->entityDecl != NULL))
3898 ctxt->sax->entityDecl(ctxt->userData, name,
3899 XML_EXTERNAL_PARAMETER_ENTITY,
3900 literal, URI, NULL);
3901 }
3902 xmlFreeURI(uri);
3903 }
3904 }
3905 }
3906 } else {
3907 if ((RAW == '"') || (RAW == '\'')) {
3908 value = xmlParseEntityValue(ctxt, &orig);
3909 if ((ctxt->sax != NULL) &&
3910 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3911 ctxt->sax->entityDecl(ctxt->userData, name,
3912 XML_INTERNAL_GENERAL_ENTITY,
3913 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003914 /*
3915 * For expat compatibility in SAX mode.
3916 */
3917 if ((ctxt->myDoc == NULL) ||
3918 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3919 if (ctxt->myDoc == NULL) {
3920 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3921 }
3922 if (ctxt->myDoc->intSubset == NULL)
3923 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3924 BAD_CAST "fake", NULL, NULL);
3925
Daniel Veillard1af9a412003-08-20 22:54:39 +00003926 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3927 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003928 }
Owen Taylor3473f882001-02-23 17:55:21 +00003929 } else {
3930 URI = xmlParseExternalID(ctxt, &literal, 1);
3931 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003932 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003933 }
3934 if (URI) {
3935 xmlURIPtr uri;
3936
3937 uri = xmlParseURI((const char *)URI);
3938 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003939 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3940 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003941 /*
3942 * This really ought to be a well formedness error
3943 * but the XML Core WG decided otherwise c.f. issue
3944 * E26 of the XML erratas.
3945 */
Owen Taylor3473f882001-02-23 17:55:21 +00003946 } else {
3947 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003948 /*
3949 * Okay this is foolish to block those but not
3950 * invalid URIs.
3951 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003952 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003953 }
3954 xmlFreeURI(uri);
3955 }
3956 }
William M. Brack76e95df2003-10-18 16:20:14 +00003957 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3959 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003960 }
3961 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003962 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003963 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003964 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3966 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003967 }
3968 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003969 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3971 (ctxt->sax->unparsedEntityDecl != NULL))
3972 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3973 literal, URI, ndata);
3974 } else {
3975 if ((ctxt->sax != NULL) &&
3976 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3977 ctxt->sax->entityDecl(ctxt->userData, name,
3978 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3979 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003980 /*
3981 * For expat compatibility in SAX mode.
3982 * assuming the entity repalcement was asked for
3983 */
3984 if ((ctxt->replaceEntities != 0) &&
3985 ((ctxt->myDoc == NULL) ||
3986 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3987 if (ctxt->myDoc == NULL) {
3988 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3989 }
3990
3991 if (ctxt->myDoc->intSubset == NULL)
3992 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3993 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00003994 xmlSAX2EntityDecl(ctxt, name,
3995 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3996 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003997 }
Owen Taylor3473f882001-02-23 17:55:21 +00003998 }
3999 }
4000 }
4001 SKIP_BLANKS;
4002 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004003 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004004 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004005 } else {
4006 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004007 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4008 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004009 }
4010 NEXT;
4011 }
4012 if (orig != NULL) {
4013 /*
4014 * Ugly mechanism to save the raw entity value.
4015 */
4016 xmlEntityPtr cur = NULL;
4017
4018 if (isParameter) {
4019 if ((ctxt->sax != NULL) &&
4020 (ctxt->sax->getParameterEntity != NULL))
4021 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4022 } else {
4023 if ((ctxt->sax != NULL) &&
4024 (ctxt->sax->getEntity != NULL))
4025 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004026 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004027 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004028 }
Owen Taylor3473f882001-02-23 17:55:21 +00004029 }
4030 if (cur != NULL) {
4031 if (cur->orig != NULL)
4032 xmlFree(orig);
4033 else
4034 cur->orig = orig;
4035 } else
4036 xmlFree(orig);
4037 }
Owen Taylor3473f882001-02-23 17:55:21 +00004038 if (value != NULL) xmlFree(value);
4039 if (URI != NULL) xmlFree(URI);
4040 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 }
4042}
4043
4044/**
4045 * xmlParseDefaultDecl:
4046 * @ctxt: an XML parser context
4047 * @value: Receive a possible fixed default value for the attribute
4048 *
4049 * Parse an attribute default declaration
4050 *
4051 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4052 *
4053 * [ VC: Required Attribute ]
4054 * if the default declaration is the keyword #REQUIRED, then the
4055 * attribute must be specified for all elements of the type in the
4056 * attribute-list declaration.
4057 *
4058 * [ VC: Attribute Default Legal ]
4059 * The declared default value must meet the lexical constraints of
4060 * the declared attribute type c.f. xmlValidateAttributeDecl()
4061 *
4062 * [ VC: Fixed Attribute Default ]
4063 * if an attribute has a default value declared with the #FIXED
4064 * keyword, instances of that attribute must match the default value.
4065 *
4066 * [ WFC: No < in Attribute Values ]
4067 * handled in xmlParseAttValue()
4068 *
4069 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4070 * or XML_ATTRIBUTE_FIXED.
4071 */
4072
4073int
4074xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4075 int val;
4076 xmlChar *ret;
4077
4078 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004079 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004080 SKIP(9);
4081 return(XML_ATTRIBUTE_REQUIRED);
4082 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004083 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004084 SKIP(8);
4085 return(XML_ATTRIBUTE_IMPLIED);
4086 }
4087 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004088 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004089 SKIP(6);
4090 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004091 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004092 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4093 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004094 }
4095 SKIP_BLANKS;
4096 }
4097 ret = xmlParseAttValue(ctxt);
4098 ctxt->instate = XML_PARSER_DTD;
4099 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004100 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004101 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004102 } else
4103 *value = ret;
4104 return(val);
4105}
4106
4107/**
4108 * xmlParseNotationType:
4109 * @ctxt: an XML parser context
4110 *
4111 * parse an Notation attribute type.
4112 *
4113 * Note: the leading 'NOTATION' S part has already being parsed...
4114 *
4115 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4116 *
4117 * [ VC: Notation Attributes ]
4118 * Values of this type must match one of the notation names included
4119 * in the declaration; all notation names in the declaration must be declared.
4120 *
4121 * Returns: the notation attribute tree built while parsing
4122 */
4123
4124xmlEnumerationPtr
4125xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004126 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004127 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4128
4129 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004130 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004131 return(NULL);
4132 }
4133 SHRINK;
4134 do {
4135 NEXT;
4136 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004137 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004138 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004139 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4140 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004141 return(ret);
4142 }
4143 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004144 if (cur == NULL) return(ret);
4145 if (last == NULL) ret = last = cur;
4146 else {
4147 last->next = cur;
4148 last = cur;
4149 }
4150 SKIP_BLANKS;
4151 } while (RAW == '|');
4152 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004153 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004154 if ((last != NULL) && (last != ret))
4155 xmlFreeEnumeration(last);
4156 return(ret);
4157 }
4158 NEXT;
4159 return(ret);
4160}
4161
4162/**
4163 * xmlParseEnumerationType:
4164 * @ctxt: an XML parser context
4165 *
4166 * parse an Enumeration attribute type.
4167 *
4168 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4169 *
4170 * [ VC: Enumeration ]
4171 * Values of this type must match one of the Nmtoken tokens in
4172 * the declaration
4173 *
4174 * Returns: the enumeration attribute tree built while parsing
4175 */
4176
4177xmlEnumerationPtr
4178xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4179 xmlChar *name;
4180 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4181
4182 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004183 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 return(NULL);
4185 }
4186 SHRINK;
4187 do {
4188 NEXT;
4189 SKIP_BLANKS;
4190 name = xmlParseNmtoken(ctxt);
4191 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004192 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004193 return(ret);
4194 }
4195 cur = xmlCreateEnumeration(name);
4196 xmlFree(name);
4197 if (cur == NULL) return(ret);
4198 if (last == NULL) ret = last = cur;
4199 else {
4200 last->next = cur;
4201 last = cur;
4202 }
4203 SKIP_BLANKS;
4204 } while (RAW == '|');
4205 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004206 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004207 return(ret);
4208 }
4209 NEXT;
4210 return(ret);
4211}
4212
4213/**
4214 * xmlParseEnumeratedType:
4215 * @ctxt: an XML parser context
4216 * @tree: the enumeration tree built while parsing
4217 *
4218 * parse an Enumerated attribute type.
4219 *
4220 * [57] EnumeratedType ::= NotationType | Enumeration
4221 *
4222 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4223 *
4224 *
4225 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4226 */
4227
4228int
4229xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004230 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004231 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004232 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004233 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4234 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004235 return(0);
4236 }
4237 SKIP_BLANKS;
4238 *tree = xmlParseNotationType(ctxt);
4239 if (*tree == NULL) return(0);
4240 return(XML_ATTRIBUTE_NOTATION);
4241 }
4242 *tree = xmlParseEnumerationType(ctxt);
4243 if (*tree == NULL) return(0);
4244 return(XML_ATTRIBUTE_ENUMERATION);
4245}
4246
4247/**
4248 * xmlParseAttributeType:
4249 * @ctxt: an XML parser context
4250 * @tree: the enumeration tree built while parsing
4251 *
4252 * parse the Attribute list def for an element
4253 *
4254 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4255 *
4256 * [55] StringType ::= 'CDATA'
4257 *
4258 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4259 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4260 *
4261 * Validity constraints for attribute values syntax are checked in
4262 * xmlValidateAttributeValue()
4263 *
4264 * [ VC: ID ]
4265 * Values of type ID must match the Name production. A name must not
4266 * appear more than once in an XML document as a value of this type;
4267 * i.e., ID values must uniquely identify the elements which bear them.
4268 *
4269 * [ VC: One ID per Element Type ]
4270 * No element type may have more than one ID attribute specified.
4271 *
4272 * [ VC: ID Attribute Default ]
4273 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4274 *
4275 * [ VC: IDREF ]
4276 * Values of type IDREF must match the Name production, and values
4277 * of type IDREFS must match Names; each IDREF Name must match the value
4278 * of an ID attribute on some element in the XML document; i.e. IDREF
4279 * values must match the value of some ID attribute.
4280 *
4281 * [ VC: Entity Name ]
4282 * Values of type ENTITY must match the Name production, values
4283 * of type ENTITIES must match Names; each Entity Name must match the
4284 * name of an unparsed entity declared in the DTD.
4285 *
4286 * [ VC: Name Token ]
4287 * Values of type NMTOKEN must match the Nmtoken production; values
4288 * of type NMTOKENS must match Nmtokens.
4289 *
4290 * Returns the attribute type
4291 */
4292int
4293xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4294 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004295 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004296 SKIP(5);
4297 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004298 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004299 SKIP(6);
4300 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004301 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004302 SKIP(5);
4303 return(XML_ATTRIBUTE_IDREF);
4304 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4305 SKIP(2);
4306 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004307 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004308 SKIP(6);
4309 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004310 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004311 SKIP(8);
4312 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(8);
4315 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004316 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004317 SKIP(7);
4318 return(XML_ATTRIBUTE_NMTOKEN);
4319 }
4320 return(xmlParseEnumeratedType(ctxt, tree));
4321}
4322
4323/**
4324 * xmlParseAttributeListDecl:
4325 * @ctxt: an XML parser context
4326 *
4327 * : parse the Attribute list def for an element
4328 *
4329 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4330 *
4331 * [53] AttDef ::= S Name S AttType S DefaultDecl
4332 *
4333 */
4334void
4335xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004336 const xmlChar *elemName;
4337 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 xmlEnumerationPtr tree;
4339
Daniel Veillarda07050d2003-10-19 14:46:32 +00004340 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004341 xmlParserInputPtr input = ctxt->input;
4342
4343 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004344 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004345 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004346 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004349 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004350 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004351 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4352 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004353 return;
4354 }
4355 SKIP_BLANKS;
4356 GROW;
4357 while (RAW != '>') {
4358 const xmlChar *check = CUR_PTR;
4359 int type;
4360 int def;
4361 xmlChar *defaultValue = NULL;
4362
4363 GROW;
4364 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004365 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004366 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004367 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4368 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004369 break;
4370 }
4371 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004372 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004374 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004375 if (defaultValue != NULL)
4376 xmlFree(defaultValue);
4377 break;
4378 }
4379 SKIP_BLANKS;
4380
4381 type = xmlParseAttributeType(ctxt, &tree);
4382 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 if (defaultValue != NULL)
4384 xmlFree(defaultValue);
4385 break;
4386 }
4387
4388 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004389 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4391 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004392 if (defaultValue != NULL)
4393 xmlFree(defaultValue);
4394 if (tree != NULL)
4395 xmlFreeEnumeration(tree);
4396 break;
4397 }
4398 SKIP_BLANKS;
4399
4400 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4401 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004402 if (defaultValue != NULL)
4403 xmlFree(defaultValue);
4404 if (tree != NULL)
4405 xmlFreeEnumeration(tree);
4406 break;
4407 }
4408
4409 GROW;
4410 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004411 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004412 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004413 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004414 if (defaultValue != NULL)
4415 xmlFree(defaultValue);
4416 if (tree != NULL)
4417 xmlFreeEnumeration(tree);
4418 break;
4419 }
4420 SKIP_BLANKS;
4421 }
4422 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4424 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004425 if (defaultValue != NULL)
4426 xmlFree(defaultValue);
4427 if (tree != NULL)
4428 xmlFreeEnumeration(tree);
4429 break;
4430 }
4431 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4432 (ctxt->sax->attributeDecl != NULL))
4433 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4434 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004435 else if (tree != NULL)
4436 xmlFreeEnumeration(tree);
4437
4438 if ((ctxt->sax2) && (defaultValue != NULL) &&
4439 (def != XML_ATTRIBUTE_IMPLIED) &&
4440 (def != XML_ATTRIBUTE_REQUIRED)) {
4441 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4442 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004443 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4444 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4445 }
Owen Taylor3473f882001-02-23 17:55:21 +00004446 if (defaultValue != NULL)
4447 xmlFree(defaultValue);
4448 GROW;
4449 }
4450 if (RAW == '>') {
4451 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004452 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4453 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004454 }
4455 NEXT;
4456 }
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
4458}
4459
4460/**
4461 * xmlParseElementMixedContentDecl:
4462 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004463 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004464 *
4465 * parse the declaration for a Mixed Element content
4466 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4467 *
4468 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4469 * '(' S? '#PCDATA' S? ')'
4470 *
4471 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4472 *
4473 * [ VC: No Duplicate Types ]
4474 * The same name must not appear more than once in a single
4475 * mixed-content declaration.
4476 *
4477 * returns: the list of the xmlElementContentPtr describing the element choices
4478 */
4479xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004480xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004481 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004482 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004483
4484 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004485 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP(7);
4487 SKIP_BLANKS;
4488 SHRINK;
4489 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004490 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004491 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4492"Element content declaration doesn't start and stop in the same entity\n",
4493 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004494 }
Owen Taylor3473f882001-02-23 17:55:21 +00004495 NEXT;
4496 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4497 if (RAW == '*') {
4498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4499 NEXT;
4500 }
4501 return(ret);
4502 }
4503 if ((RAW == '(') || (RAW == '|')) {
4504 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4505 if (ret == NULL) return(NULL);
4506 }
4507 while (RAW == '|') {
4508 NEXT;
4509 if (elem == NULL) {
4510 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4511 if (ret == NULL) return(NULL);
4512 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004513 if (cur != NULL)
4514 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004515 cur = ret;
4516 } else {
4517 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4518 if (n == NULL) return(NULL);
4519 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004520 if (n->c1 != NULL)
4521 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004523 if (n != NULL)
4524 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004525 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004526 }
4527 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004528 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004531 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004532 xmlFreeElementContent(cur);
4533 return(NULL);
4534 }
4535 SKIP_BLANKS;
4536 GROW;
4537 }
4538 if ((RAW == ')') && (NXT(1) == '*')) {
4539 if (elem != NULL) {
4540 cur->c2 = xmlNewElementContent(elem,
4541 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004542 if (cur->c2 != NULL)
4543 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 }
4545 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004546 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004547 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4548"Element content declaration doesn't start and stop in the same entity\n",
4549 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004550 }
Owen Taylor3473f882001-02-23 17:55:21 +00004551 SKIP(2);
4552 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004553 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004554 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004555 return(NULL);
4556 }
4557
4558 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004559 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004560 }
4561 return(ret);
4562}
4563
4564/**
4565 * xmlParseElementChildrenContentDecl:
4566 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004567 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004568 *
4569 * parse the declaration for a Mixed Element content
4570 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4571 *
4572 *
4573 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4574 *
4575 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4576 *
4577 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4578 *
4579 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4580 *
4581 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4582 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004583 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004584 * opening or closing parentheses in a choice, seq, or Mixed
4585 * construct is contained in the replacement text for a parameter
4586 * entity, both must be contained in the same replacement text. For
4587 * interoperability, if a parameter-entity reference appears in a
4588 * choice, seq, or Mixed construct, its replacement text should not
4589 * be empty, and neither the first nor last non-blank character of
4590 * the replacement text should be a connector (| or ,).
4591 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004592 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004593 * hierarchy.
4594 */
4595xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004596xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004597 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004598 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 xmlChar type = 0;
4600
4601 SKIP_BLANKS;
4602 GROW;
4603 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004604 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004605
Owen Taylor3473f882001-02-23 17:55:21 +00004606 /* Recurse on first child */
4607 NEXT;
4608 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004609 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004610 SKIP_BLANKS;
4611 GROW;
4612 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004613 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004614 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004615 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004616 return(NULL);
4617 }
4618 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004619 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004621 return(NULL);
4622 }
Owen Taylor3473f882001-02-23 17:55:21 +00004623 GROW;
4624 if (RAW == '?') {
4625 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4626 NEXT;
4627 } else if (RAW == '*') {
4628 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4629 NEXT;
4630 } else if (RAW == '+') {
4631 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4632 NEXT;
4633 } else {
4634 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4635 }
Owen Taylor3473f882001-02-23 17:55:21 +00004636 GROW;
4637 }
4638 SKIP_BLANKS;
4639 SHRINK;
4640 while (RAW != ')') {
4641 /*
4642 * Each loop we parse one separator and one element.
4643 */
4644 if (RAW == ',') {
4645 if (type == 0) type = CUR;
4646
4647 /*
4648 * Detect "Name | Name , Name" error
4649 */
4650 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004651 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004652 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004653 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004654 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004655 xmlFreeElementContent(last);
4656 if (ret != NULL)
4657 xmlFreeElementContent(ret);
4658 return(NULL);
4659 }
4660 NEXT;
4661
4662 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4663 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004664 if ((last != NULL) && (last != ret))
4665 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004666 xmlFreeElementContent(ret);
4667 return(NULL);
4668 }
4669 if (last == NULL) {
4670 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004671 if (ret != NULL)
4672 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004673 ret = cur = op;
4674 } else {
4675 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004676 if (op != NULL)
4677 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004678 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (last != NULL)
4680 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 cur =op;
4682 last = NULL;
4683 }
4684 } else if (RAW == '|') {
4685 if (type == 0) type = CUR;
4686
4687 /*
4688 * Detect "Name , Name | Name" error
4689 */
4690 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004691 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004692 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004693 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004694 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004695 xmlFreeElementContent(last);
4696 if (ret != NULL)
4697 xmlFreeElementContent(ret);
4698 return(NULL);
4699 }
4700 NEXT;
4701
4702 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4703 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004704 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004705 xmlFreeElementContent(last);
4706 if (ret != NULL)
4707 xmlFreeElementContent(ret);
4708 return(NULL);
4709 }
4710 if (last == NULL) {
4711 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004712 if (ret != NULL)
4713 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004714 ret = cur = op;
4715 } else {
4716 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004717 if (op != NULL)
4718 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004719 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004720 if (last != NULL)
4721 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004722 cur =op;
4723 last = NULL;
4724 }
4725 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004726 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004727 if (ret != NULL)
4728 xmlFreeElementContent(ret);
4729 return(NULL);
4730 }
4731 GROW;
4732 SKIP_BLANKS;
4733 GROW;
4734 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004735 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004736 /* Recurse on second child */
4737 NEXT;
4738 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004739 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 SKIP_BLANKS;
4741 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004742 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004744 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if (ret != NULL)
4746 xmlFreeElementContent(ret);
4747 return(NULL);
4748 }
4749 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 if (RAW == '?') {
4751 last->ocur = XML_ELEMENT_CONTENT_OPT;
4752 NEXT;
4753 } else if (RAW == '*') {
4754 last->ocur = XML_ELEMENT_CONTENT_MULT;
4755 NEXT;
4756 } else if (RAW == '+') {
4757 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4758 NEXT;
4759 } else {
4760 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4761 }
4762 }
4763 SKIP_BLANKS;
4764 GROW;
4765 }
4766 if ((cur != NULL) && (last != NULL)) {
4767 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004768 if (last != NULL)
4769 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004770 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004771 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004772 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4773"Element content declaration doesn't start and stop in the same entity\n",
4774 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004775 }
Owen Taylor3473f882001-02-23 17:55:21 +00004776 NEXT;
4777 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004778 if (ret != NULL)
4779 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004780 NEXT;
4781 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004782 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004784 cur = ret;
4785 /*
4786 * Some normalization:
4787 * (a | b* | c?)* == (a | b | c)*
4788 */
4789 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4790 if ((cur->c1 != NULL) &&
4791 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4792 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4793 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4794 if ((cur->c2 != NULL) &&
4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 cur = cur->c2;
4799 }
4800 }
Owen Taylor3473f882001-02-23 17:55:21 +00004801 NEXT;
4802 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004803 if (ret != NULL) {
4804 int found = 0;
4805
Daniel Veillarde470df72001-04-18 21:41:07 +00004806 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004807 /*
4808 * Some normalization:
4809 * (a | b*)+ == (a | b)*
4810 * (a | b?)+ == (a | b)*
4811 */
4812 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4813 if ((cur->c1 != NULL) &&
4814 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4815 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4816 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4817 found = 1;
4818 }
4819 if ((cur->c2 != NULL) &&
4820 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4821 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4822 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4823 found = 1;
4824 }
4825 cur = cur->c2;
4826 }
4827 if (found)
4828 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4829 }
Owen Taylor3473f882001-02-23 17:55:21 +00004830 NEXT;
4831 }
4832 return(ret);
4833}
4834
4835/**
4836 * xmlParseElementContentDecl:
4837 * @ctxt: an XML parser context
4838 * @name: the name of the element being defined.
4839 * @result: the Element Content pointer will be stored here if any
4840 *
4841 * parse the declaration for an Element content either Mixed or Children,
4842 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4843 *
4844 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4845 *
4846 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4847 */
4848
4849int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004850xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004851 xmlElementContentPtr *result) {
4852
4853 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004854 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004855 int res;
4856
4857 *result = NULL;
4858
4859 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004860 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004861 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 return(-1);
4863 }
4864 NEXT;
4865 GROW;
4866 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004867 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004868 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004869 res = XML_ELEMENT_TYPE_MIXED;
4870 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004871 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004872 res = XML_ELEMENT_TYPE_ELEMENT;
4873 }
Owen Taylor3473f882001-02-23 17:55:21 +00004874 SKIP_BLANKS;
4875 *result = tree;
4876 return(res);
4877}
4878
4879/**
4880 * xmlParseElementDecl:
4881 * @ctxt: an XML parser context
4882 *
4883 * parse an Element declaration.
4884 *
4885 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4886 *
4887 * [ VC: Unique Element Type Declaration ]
4888 * No element type may be declared more than once
4889 *
4890 * Returns the type of the element, or -1 in case of error
4891 */
4892int
4893xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004894 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004895 int ret = -1;
4896 xmlElementContentPtr content = NULL;
4897
4898 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004899 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004900 xmlParserInputPtr input = ctxt->input;
4901
4902 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004903 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4905 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004906 }
4907 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004908 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004910 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4911 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004912 return(-1);
4913 }
4914 while ((RAW == 0) && (ctxt->inputNr > 1))
4915 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004916 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4918 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004919 }
4920 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004921 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004922 SKIP(5);
4923 /*
4924 * Element must always be empty.
4925 */
4926 ret = XML_ELEMENT_TYPE_EMPTY;
4927 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4928 (NXT(2) == 'Y')) {
4929 SKIP(3);
4930 /*
4931 * Element is a generic container.
4932 */
4933 ret = XML_ELEMENT_TYPE_ANY;
4934 } else if (RAW == '(') {
4935 ret = xmlParseElementContentDecl(ctxt, name, &content);
4936 } else {
4937 /*
4938 * [ WFC: PEs in Internal Subset ] error handling.
4939 */
4940 if ((RAW == '%') && (ctxt->external == 0) &&
4941 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004942 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004943 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004944 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004945 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004946 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4947 }
Owen Taylor3473f882001-02-23 17:55:21 +00004948 return(-1);
4949 }
4950
4951 SKIP_BLANKS;
4952 /*
4953 * Pop-up of finished entities.
4954 */
4955 while ((RAW == 0) && (ctxt->inputNr > 1))
4956 xmlPopInput(ctxt);
4957 SKIP_BLANKS;
4958
4959 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004960 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004961 } else {
4962 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004963 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4964 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004965 }
4966
4967 NEXT;
4968 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4969 (ctxt->sax->elementDecl != NULL))
4970 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4971 content);
4972 }
4973 if (content != NULL) {
4974 xmlFreeElementContent(content);
4975 }
Owen Taylor3473f882001-02-23 17:55:21 +00004976 }
4977 return(ret);
4978}
4979
4980/**
Owen Taylor3473f882001-02-23 17:55:21 +00004981 * xmlParseConditionalSections
4982 * @ctxt: an XML parser context
4983 *
4984 * [61] conditionalSect ::= includeSect | ignoreSect
4985 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4986 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4987 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4988 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4989 */
4990
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004991static void
Owen Taylor3473f882001-02-23 17:55:21 +00004992xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4993 SKIP(3);
4994 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004995 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004996 SKIP(7);
4997 SKIP_BLANKS;
4998 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004999 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005000 } else {
5001 NEXT;
5002 }
5003 if (xmlParserDebugEntities) {
5004 if ((ctxt->input != NULL) && (ctxt->input->filename))
5005 xmlGenericError(xmlGenericErrorContext,
5006 "%s(%d): ", ctxt->input->filename,
5007 ctxt->input->line);
5008 xmlGenericError(xmlGenericErrorContext,
5009 "Entering INCLUDE Conditional Section\n");
5010 }
5011
5012 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5013 (NXT(2) != '>'))) {
5014 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005015 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005016
5017 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5018 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005019 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005020 NEXT;
5021 } else if (RAW == '%') {
5022 xmlParsePEReference(ctxt);
5023 } else
5024 xmlParseMarkupDecl(ctxt);
5025
5026 /*
5027 * Pop-up of finished entities.
5028 */
5029 while ((RAW == 0) && (ctxt->inputNr > 1))
5030 xmlPopInput(ctxt);
5031
Daniel Veillardfdc91562002-07-01 21:52:03 +00005032 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005033 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 break;
5035 }
5036 }
5037 if (xmlParserDebugEntities) {
5038 if ((ctxt->input != NULL) && (ctxt->input->filename))
5039 xmlGenericError(xmlGenericErrorContext,
5040 "%s(%d): ", ctxt->input->filename,
5041 ctxt->input->line);
5042 xmlGenericError(xmlGenericErrorContext,
5043 "Leaving INCLUDE Conditional Section\n");
5044 }
5045
Daniel Veillarda07050d2003-10-19 14:46:32 +00005046 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005047 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005048 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 int depth = 0;
5050
5051 SKIP(6);
5052 SKIP_BLANKS;
5053 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005054 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005055 } else {
5056 NEXT;
5057 }
5058 if (xmlParserDebugEntities) {
5059 if ((ctxt->input != NULL) && (ctxt->input->filename))
5060 xmlGenericError(xmlGenericErrorContext,
5061 "%s(%d): ", ctxt->input->filename,
5062 ctxt->input->line);
5063 xmlGenericError(xmlGenericErrorContext,
5064 "Entering IGNORE Conditional Section\n");
5065 }
5066
5067 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005068 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005069 * But disable SAX event generating DTD building in the meantime
5070 */
5071 state = ctxt->disableSAX;
5072 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005073 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005074 ctxt->instate = XML_PARSER_IGNORE;
5075
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005076 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005077 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5078 depth++;
5079 SKIP(3);
5080 continue;
5081 }
5082 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5083 if (--depth >= 0) SKIP(3);
5084 continue;
5085 }
5086 NEXT;
5087 continue;
5088 }
5089
5090 ctxt->disableSAX = state;
5091 ctxt->instate = instate;
5092
5093 if (xmlParserDebugEntities) {
5094 if ((ctxt->input != NULL) && (ctxt->input->filename))
5095 xmlGenericError(xmlGenericErrorContext,
5096 "%s(%d): ", ctxt->input->filename,
5097 ctxt->input->line);
5098 xmlGenericError(xmlGenericErrorContext,
5099 "Leaving IGNORE Conditional Section\n");
5100 }
5101
5102 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005103 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 }
5105
5106 if (RAW == 0)
5107 SHRINK;
5108
5109 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005110 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005111 } else {
5112 SKIP(3);
5113 }
5114}
5115
5116/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005117 * xmlParseMarkupDecl:
5118 * @ctxt: an XML parser context
5119 *
5120 * parse Markup declarations
5121 *
5122 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5123 * NotationDecl | PI | Comment
5124 *
5125 * [ VC: Proper Declaration/PE Nesting ]
5126 * Parameter-entity replacement text must be properly nested with
5127 * markup declarations. That is to say, if either the first character
5128 * or the last character of a markup declaration (markupdecl above) is
5129 * contained in the replacement text for a parameter-entity reference,
5130 * both must be contained in the same replacement text.
5131 *
5132 * [ WFC: PEs in Internal Subset ]
5133 * In the internal DTD subset, parameter-entity references can occur
5134 * only where markup declarations can occur, not within markup declarations.
5135 * (This does not apply to references that occur in external parameter
5136 * entities or to the external subset.)
5137 */
5138void
5139xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5140 GROW;
5141 xmlParseElementDecl(ctxt);
5142 xmlParseAttributeListDecl(ctxt);
5143 xmlParseEntityDecl(ctxt);
5144 xmlParseNotationDecl(ctxt);
5145 xmlParsePI(ctxt);
5146 xmlParseComment(ctxt);
5147 /*
5148 * This is only for internal subset. On external entities,
5149 * the replacement is done before parsing stage
5150 */
5151 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5152 xmlParsePEReference(ctxt);
5153
5154 /*
5155 * Conditional sections are allowed from entities included
5156 * by PE References in the internal subset.
5157 */
5158 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5159 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5160 xmlParseConditionalSections(ctxt);
5161 }
5162 }
5163
5164 ctxt->instate = XML_PARSER_DTD;
5165}
5166
5167/**
5168 * xmlParseTextDecl:
5169 * @ctxt: an XML parser context
5170 *
5171 * parse an XML declaration header for external entities
5172 *
5173 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5174 *
5175 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5176 */
5177
5178void
5179xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5180 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005181 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005182
5183 /*
5184 * We know that '<?xml' is here.
5185 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005186 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005187 SKIP(5);
5188 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005189 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005190 return;
5191 }
5192
William M. Brack76e95df2003-10-18 16:20:14 +00005193 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5195 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005196 }
5197 SKIP_BLANKS;
5198
5199 /*
5200 * We may have the VersionInfo here.
5201 */
5202 version = xmlParseVersionInfo(ctxt);
5203 if (version == NULL)
5204 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005205 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005206 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005207 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5208 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005209 }
5210 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005211 ctxt->input->version = version;
5212
5213 /*
5214 * We must have the encoding declaration
5215 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005216 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005217 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5218 /*
5219 * The XML REC instructs us to stop parsing right here
5220 */
5221 return;
5222 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005223 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5224 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5225 "Missing encoding in text declaration\n");
5226 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005227
5228 SKIP_BLANKS;
5229 if ((RAW == '?') && (NXT(1) == '>')) {
5230 SKIP(2);
5231 } else if (RAW == '>') {
5232 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005233 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 NEXT;
5235 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005236 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005237 MOVETO_ENDTAG(CUR_PTR);
5238 NEXT;
5239 }
5240}
5241
5242/**
Owen Taylor3473f882001-02-23 17:55:21 +00005243 * xmlParseExternalSubset:
5244 * @ctxt: an XML parser context
5245 * @ExternalID: the external identifier
5246 * @SystemID: the system identifier (or URL)
5247 *
5248 * parse Markup declarations from an external subset
5249 *
5250 * [30] extSubset ::= textDecl? extSubsetDecl
5251 *
5252 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5253 */
5254void
5255xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5256 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005257 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005259 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005260 xmlParseTextDecl(ctxt);
5261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5262 /*
5263 * The XML REC instructs us to stop parsing right here
5264 */
5265 ctxt->instate = XML_PARSER_EOF;
5266 return;
5267 }
5268 }
5269 if (ctxt->myDoc == NULL) {
5270 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5271 }
5272 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5273 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5274
5275 ctxt->instate = XML_PARSER_DTD;
5276 ctxt->external = 1;
5277 while (((RAW == '<') && (NXT(1) == '?')) ||
5278 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005279 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005280 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005281 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005282
5283 GROW;
5284 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5285 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005286 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005287 NEXT;
5288 } else if (RAW == '%') {
5289 xmlParsePEReference(ctxt);
5290 } else
5291 xmlParseMarkupDecl(ctxt);
5292
5293 /*
5294 * Pop-up of finished entities.
5295 */
5296 while ((RAW == 0) && (ctxt->inputNr > 1))
5297 xmlPopInput(ctxt);
5298
Daniel Veillardfdc91562002-07-01 21:52:03 +00005299 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005300 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005301 break;
5302 }
5303 }
5304
5305 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005306 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005307 }
5308
5309}
5310
5311/**
5312 * xmlParseReference:
5313 * @ctxt: an XML parser context
5314 *
5315 * parse and handle entity references in content, depending on the SAX
5316 * interface, this may end-up in a call to character() if this is a
5317 * CharRef, a predefined entity, if there is no reference() callback.
5318 * or if the parser was asked to switch to that mode.
5319 *
5320 * [67] Reference ::= EntityRef | CharRef
5321 */
5322void
5323xmlParseReference(xmlParserCtxtPtr ctxt) {
5324 xmlEntityPtr ent;
5325 xmlChar *val;
5326 if (RAW != '&') return;
5327
5328 if (NXT(1) == '#') {
5329 int i = 0;
5330 xmlChar out[10];
5331 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005332 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005333
5334 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5335 /*
5336 * So we are using non-UTF-8 buffers
5337 * Check that the char fit on 8bits, if not
5338 * generate a CharRef.
5339 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005340 if (value <= 0xFF) {
5341 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 out[1] = 0;
5343 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5344 (!ctxt->disableSAX))
5345 ctxt->sax->characters(ctxt->userData, out, 1);
5346 } else {
5347 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005348 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005349 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005350 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005351 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5352 (!ctxt->disableSAX))
5353 ctxt->sax->reference(ctxt->userData, out);
5354 }
5355 } else {
5356 /*
5357 * Just encode the value in UTF-8
5358 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005359 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 out[i] = 0;
5361 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5362 (!ctxt->disableSAX))
5363 ctxt->sax->characters(ctxt->userData, out, i);
5364 }
5365 } else {
5366 ent = xmlParseEntityRef(ctxt);
5367 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005368 if (!ctxt->wellFormed)
5369 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005370 if ((ent->name != NULL) &&
5371 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5372 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005373 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005374
5375
5376 /*
5377 * The first reference to the entity trigger a parsing phase
5378 * where the ent->children is filled with the result from
5379 * the parsing.
5380 */
5381 if (ent->children == NULL) {
5382 xmlChar *value;
5383 value = ent->content;
5384
5385 /*
5386 * Check that this entity is well formed
5387 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005388 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005389 (value[1] == 0) && (value[0] == '<') &&
5390 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5391 /*
5392 * DONE: get definite answer on this !!!
5393 * Lots of entity decls are used to declare a single
5394 * char
5395 * <!ENTITY lt "<">
5396 * Which seems to be valid since
5397 * 2.4: The ampersand character (&) and the left angle
5398 * bracket (<) may appear in their literal form only
5399 * when used ... They are also legal within the literal
5400 * entity value of an internal entity declaration;i
5401 * see "4.3.2 Well-Formed Parsed Entities".
5402 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5403 * Looking at the OASIS test suite and James Clark
5404 * tests, this is broken. However the XML REC uses
5405 * it. Is the XML REC not well-formed ????
5406 * This is a hack to avoid this problem
5407 *
5408 * ANSWER: since lt gt amp .. are already defined,
5409 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005410 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005411 * is lousy but acceptable.
5412 */
5413 list = xmlNewDocText(ctxt->myDoc, value);
5414 if (list != NULL) {
5415 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5416 (ent->children == NULL)) {
5417 ent->children = list;
5418 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005419 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 list->parent = (xmlNodePtr) ent;
5421 } else {
5422 xmlFreeNodeList(list);
5423 }
5424 } else if (list != NULL) {
5425 xmlFreeNodeList(list);
5426 }
5427 } else {
5428 /*
5429 * 4.3.2: An internal general parsed entity is well-formed
5430 * if its replacement text matches the production labeled
5431 * content.
5432 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005433
5434 void *user_data;
5435 /*
5436 * This is a bit hackish but this seems the best
5437 * way to make sure both SAX and DOM entity support
5438 * behaves okay.
5439 */
5440 if (ctxt->userData == ctxt)
5441 user_data = NULL;
5442 else
5443 user_data = ctxt->userData;
5444
Owen Taylor3473f882001-02-23 17:55:21 +00005445 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5446 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005447 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5448 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005449 ctxt->depth--;
5450 } else if (ent->etype ==
5451 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5452 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005453 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005454 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005455 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 ctxt->depth--;
5457 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005458 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005459 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5460 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 }
5462 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005463 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005464 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005465 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005466 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5467 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005468 (ent->children == NULL)) {
5469 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005470 if (ctxt->replaceEntities) {
5471 /*
5472 * Prune it directly in the generated document
5473 * except for single text nodes.
5474 */
5475 if ((list->type == XML_TEXT_NODE) &&
5476 (list->next == NULL)) {
5477 list->parent = (xmlNodePtr) ent;
5478 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005479 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005480 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005481 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005482 while (list != NULL) {
5483 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005484 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005485 if (list->next == NULL)
5486 ent->last = list;
5487 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005488 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005489 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005490#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005491 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5492 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005493#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005494 }
5495 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005496 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005497 while (list != NULL) {
5498 list->parent = (xmlNodePtr) ent;
5499 if (list->next == NULL)
5500 ent->last = list;
5501 list = list->next;
5502 }
Owen Taylor3473f882001-02-23 17:55:21 +00005503 }
5504 } else {
5505 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005506 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005507 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005508 } else if ((ret != XML_ERR_OK) &&
5509 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005510 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005511 } else if (list != NULL) {
5512 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005513 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005514 }
5515 }
5516 }
5517 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5518 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5519 /*
5520 * Create a node.
5521 */
5522 ctxt->sax->reference(ctxt->userData, ent->name);
5523 return;
5524 } else if (ctxt->replaceEntities) {
5525 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5526 /*
5527 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005528 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005529 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005530 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005531 if ((list == NULL) && (ent->owner == 0)) {
5532 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 cur = ent->children;
5534 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005535 nw = xmlCopyNode(cur, 1);
5536 if (nw != NULL) {
5537 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005538 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005539 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005540 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005541 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005542 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005543 if (cur == ent->last)
5544 break;
5545 cur = cur->next;
5546 }
Daniel Veillard81273902003-09-30 00:43:48 +00005547#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005548 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005549 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005550#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005551 } else if (list == NULL) {
5552 xmlNodePtr nw = NULL, cur, next, last,
5553 firstChild = NULL;
5554 /*
5555 * Copy the entity child list and make it the new
5556 * entity child list. The goal is to make sure any
5557 * ID or REF referenced will be the one from the
5558 * document content and not the entity copy.
5559 */
5560 cur = ent->children;
5561 ent->children = NULL;
5562 last = ent->last;
5563 ent->last = NULL;
5564 while (cur != NULL) {
5565 next = cur->next;
5566 cur->next = NULL;
5567 cur->parent = NULL;
5568 nw = xmlCopyNode(cur, 1);
5569 if (nw != NULL) {
5570 nw->_private = cur->_private;
5571 if (firstChild == NULL){
5572 firstChild = cur;
5573 }
5574 xmlAddChild((xmlNodePtr) ent, nw);
5575 xmlAddChild(ctxt->node, cur);
5576 }
5577 if (cur == last)
5578 break;
5579 cur = next;
5580 }
5581 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005582#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005583 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5584 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005585#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005586 } else {
5587 /*
5588 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005589 * node with a possible previous text one which
5590 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005591 */
5592 if (ent->children->type == XML_TEXT_NODE)
5593 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5594 if ((ent->last != ent->children) &&
5595 (ent->last->type == XML_TEXT_NODE))
5596 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5597 xmlAddChildList(ctxt->node, ent->children);
5598 }
5599
Owen Taylor3473f882001-02-23 17:55:21 +00005600 /*
5601 * This is to avoid a nasty side effect, see
5602 * characters() in SAX.c
5603 */
5604 ctxt->nodemem = 0;
5605 ctxt->nodelen = 0;
5606 return;
5607 } else {
5608 /*
5609 * Probably running in SAX mode
5610 */
5611 xmlParserInputPtr input;
5612
5613 input = xmlNewEntityInputStream(ctxt, ent);
5614 xmlPushInput(ctxt, input);
5615 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005616 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5617 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005618 xmlParseTextDecl(ctxt);
5619 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5620 /*
5621 * The XML REC instructs us to stop parsing right here
5622 */
5623 ctxt->instate = XML_PARSER_EOF;
5624 return;
5625 }
5626 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005627 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5628 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005629 }
5630 }
5631 return;
5632 }
5633 }
5634 } else {
5635 val = ent->content;
5636 if (val == NULL) return;
5637 /*
5638 * inline the entity.
5639 */
5640 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5641 (!ctxt->disableSAX))
5642 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5643 }
5644 }
5645}
5646
5647/**
5648 * xmlParseEntityRef:
5649 * @ctxt: an XML parser context
5650 *
5651 * parse ENTITY references declarations
5652 *
5653 * [68] EntityRef ::= '&' Name ';'
5654 *
5655 * [ WFC: Entity Declared ]
5656 * In a document without any DTD, a document with only an internal DTD
5657 * subset which contains no parameter entity references, or a document
5658 * with "standalone='yes'", the Name given in the entity reference
5659 * must match that in an entity declaration, except that well-formed
5660 * documents need not declare any of the following entities: amp, lt,
5661 * gt, apos, quot. The declaration of a parameter entity must precede
5662 * any reference to it. Similarly, the declaration of a general entity
5663 * must precede any reference to it which appears in a default value in an
5664 * attribute-list declaration. Note that if entities are declared in the
5665 * external subset or in external parameter entities, a non-validating
5666 * processor is not obligated to read and process their declarations;
5667 * for such documents, the rule that an entity must be declared is a
5668 * well-formedness constraint only if standalone='yes'.
5669 *
5670 * [ WFC: Parsed Entity ]
5671 * An entity reference must not contain the name of an unparsed entity
5672 *
5673 * Returns the xmlEntityPtr if found, or NULL otherwise.
5674 */
5675xmlEntityPtr
5676xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005677 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005678 xmlEntityPtr ent = NULL;
5679
5680 GROW;
5681
5682 if (RAW == '&') {
5683 NEXT;
5684 name = xmlParseName(ctxt);
5685 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5687 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005688 } else {
5689 if (RAW == ';') {
5690 NEXT;
5691 /*
5692 * Ask first SAX for entity resolution, otherwise try the
5693 * predefined set.
5694 */
5695 if (ctxt->sax != NULL) {
5696 if (ctxt->sax->getEntity != NULL)
5697 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005698 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005699 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005700 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5701 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005702 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005703 }
Owen Taylor3473f882001-02-23 17:55:21 +00005704 }
5705 /*
5706 * [ WFC: Entity Declared ]
5707 * In a document without any DTD, a document with only an
5708 * internal DTD subset which contains no parameter entity
5709 * references, or a document with "standalone='yes'", the
5710 * Name given in the entity reference must match that in an
5711 * entity declaration, except that well-formed documents
5712 * need not declare any of the following entities: amp, lt,
5713 * gt, apos, quot.
5714 * The declaration of a parameter entity must precede any
5715 * reference to it.
5716 * Similarly, the declaration of a general entity must
5717 * precede any reference to it which appears in a default
5718 * value in an attribute-list declaration. Note that if
5719 * entities are declared in the external subset or in
5720 * external parameter entities, a non-validating processor
5721 * is not obligated to read and process their declarations;
5722 * for such documents, the rule that an entity must be
5723 * declared is a well-formedness constraint only if
5724 * standalone='yes'.
5725 */
5726 if (ent == NULL) {
5727 if ((ctxt->standalone == 1) ||
5728 ((ctxt->hasExternalSubset == 0) &&
5729 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005730 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005731 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005732 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005733 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005734 "Entity '%s' not defined\n", name);
5735 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005736 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738
5739 /*
5740 * [ WFC: Parsed Entity ]
5741 * An entity reference must not contain the name of an
5742 * unparsed entity
5743 */
5744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005746 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 }
5748
5749 /*
5750 * [ WFC: No External Entity References ]
5751 * Attribute values cannot contain direct or indirect
5752 * entity references to external entities.
5753 */
5754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5757 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 }
5759 /*
5760 * [ WFC: No < in Attribute Values ]
5761 * The replacement text of any entity referred to directly or
5762 * indirectly in an attribute value (other than "&lt;") must
5763 * not contain a <.
5764 */
5765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5766 (ent != NULL) &&
5767 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5768 (ent->content != NULL) &&
5769 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005770 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005771 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 }
5773
5774 /*
5775 * Internal check, no parameter entities here ...
5776 */
5777 else {
5778 switch (ent->etype) {
5779 case XML_INTERNAL_PARAMETER_ENTITY:
5780 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5782 "Attempt to reference the parameter entity '%s'\n",
5783 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005784 break;
5785 default:
5786 break;
5787 }
5788 }
5789
5790 /*
5791 * [ WFC: No Recursion ]
5792 * A parsed entity must not contain a recursive reference
5793 * to itself, either directly or indirectly.
5794 * Done somewhere else
5795 */
5796
5797 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005798 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 }
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 }
5802 return(ent);
5803}
5804
5805/**
5806 * xmlParseStringEntityRef:
5807 * @ctxt: an XML parser context
5808 * @str: a pointer to an index in the string
5809 *
5810 * parse ENTITY references declarations, but this version parses it from
5811 * a string value.
5812 *
5813 * [68] EntityRef ::= '&' Name ';'
5814 *
5815 * [ WFC: Entity Declared ]
5816 * In a document without any DTD, a document with only an internal DTD
5817 * subset which contains no parameter entity references, or a document
5818 * with "standalone='yes'", the Name given in the entity reference
5819 * must match that in an entity declaration, except that well-formed
5820 * documents need not declare any of the following entities: amp, lt,
5821 * gt, apos, quot. The declaration of a parameter entity must precede
5822 * any reference to it. Similarly, the declaration of a general entity
5823 * must precede any reference to it which appears in a default value in an
5824 * attribute-list declaration. Note that if entities are declared in the
5825 * external subset or in external parameter entities, a non-validating
5826 * processor is not obligated to read and process their declarations;
5827 * for such documents, the rule that an entity must be declared is a
5828 * well-formedness constraint only if standalone='yes'.
5829 *
5830 * [ WFC: Parsed Entity ]
5831 * An entity reference must not contain the name of an unparsed entity
5832 *
5833 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5834 * is updated to the current location in the string.
5835 */
5836xmlEntityPtr
5837xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5838 xmlChar *name;
5839 const xmlChar *ptr;
5840 xmlChar cur;
5841 xmlEntityPtr ent = NULL;
5842
5843 if ((str == NULL) || (*str == NULL))
5844 return(NULL);
5845 ptr = *str;
5846 cur = *ptr;
5847 if (cur == '&') {
5848 ptr++;
5849 cur = *ptr;
5850 name = xmlParseStringName(ctxt, &ptr);
5851 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005852 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5853 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005854 } else {
5855 if (*ptr == ';') {
5856 ptr++;
5857 /*
5858 * Ask first SAX for entity resolution, otherwise try the
5859 * predefined set.
5860 */
5861 if (ctxt->sax != NULL) {
5862 if (ctxt->sax->getEntity != NULL)
5863 ent = ctxt->sax->getEntity(ctxt->userData, name);
5864 if (ent == NULL)
5865 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005866 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005867 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005868 }
Owen Taylor3473f882001-02-23 17:55:21 +00005869 }
5870 /*
5871 * [ WFC: Entity Declared ]
5872 * In a document without any DTD, a document with only an
5873 * internal DTD subset which contains no parameter entity
5874 * references, or a document with "standalone='yes'", the
5875 * Name given in the entity reference must match that in an
5876 * entity declaration, except that well-formed documents
5877 * need not declare any of the following entities: amp, lt,
5878 * gt, apos, quot.
5879 * The declaration of a parameter entity must precede any
5880 * reference to it.
5881 * Similarly, the declaration of a general entity must
5882 * precede any reference to it which appears in a default
5883 * value in an attribute-list declaration. Note that if
5884 * entities are declared in the external subset or in
5885 * external parameter entities, a non-validating processor
5886 * is not obligated to read and process their declarations;
5887 * for such documents, the rule that an entity must be
5888 * declared is a well-formedness constraint only if
5889 * standalone='yes'.
5890 */
5891 if (ent == NULL) {
5892 if ((ctxt->standalone == 1) ||
5893 ((ctxt->hasExternalSubset == 0) &&
5894 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005895 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005896 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005898 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005899 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005900 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005901 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005902 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005903 }
5904
5905 /*
5906 * [ WFC: Parsed Entity ]
5907 * An entity reference must not contain the name of an
5908 * unparsed entity
5909 */
5910 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005911 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005912 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 }
5914
5915 /*
5916 * [ WFC: No External Entity References ]
5917 * Attribute values cannot contain direct or indirect
5918 * entity references to external entities.
5919 */
5920 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5921 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005922 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005923 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005924 }
5925 /*
5926 * [ WFC: No < in Attribute Values ]
5927 * The replacement text of any entity referred to directly or
5928 * indirectly in an attribute value (other than "&lt;") must
5929 * not contain a <.
5930 */
5931 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5932 (ent != NULL) &&
5933 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5934 (ent->content != NULL) &&
5935 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005936 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5937 "'<' in entity '%s' is not allowed in attributes values\n",
5938 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 }
5940
5941 /*
5942 * Internal check, no parameter entities here ...
5943 */
5944 else {
5945 switch (ent->etype) {
5946 case XML_INTERNAL_PARAMETER_ENTITY:
5947 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005948 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5949 "Attempt to reference the parameter entity '%s'\n",
5950 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005951 break;
5952 default:
5953 break;
5954 }
5955 }
5956
5957 /*
5958 * [ WFC: No Recursion ]
5959 * A parsed entity must not contain a recursive reference
5960 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005961 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005962 */
5963
5964 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005965 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005966 }
5967 xmlFree(name);
5968 }
5969 }
5970 *str = ptr;
5971 return(ent);
5972}
5973
5974/**
5975 * xmlParsePEReference:
5976 * @ctxt: an XML parser context
5977 *
5978 * parse PEReference declarations
5979 * The entity content is handled directly by pushing it's content as
5980 * a new input stream.
5981 *
5982 * [69] PEReference ::= '%' Name ';'
5983 *
5984 * [ WFC: No Recursion ]
5985 * A parsed entity must not contain a recursive
5986 * reference to itself, either directly or indirectly.
5987 *
5988 * [ WFC: Entity Declared ]
5989 * In a document without any DTD, a document with only an internal DTD
5990 * subset which contains no parameter entity references, or a document
5991 * with "standalone='yes'", ... ... The declaration of a parameter
5992 * entity must precede any reference to it...
5993 *
5994 * [ VC: Entity Declared ]
5995 * In a document with an external subset or external parameter entities
5996 * with "standalone='no'", ... ... The declaration of a parameter entity
5997 * must precede any reference to it...
5998 *
5999 * [ WFC: In DTD ]
6000 * Parameter-entity references may only appear in the DTD.
6001 * NOTE: misleading but this is handled.
6002 */
6003void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006004xmlParsePEReference(xmlParserCtxtPtr ctxt)
6005{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006006 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006007 xmlEntityPtr entity = NULL;
6008 xmlParserInputPtr input;
6009
6010 if (RAW == '%') {
6011 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006012 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006013 if (name == NULL) {
6014 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6015 "xmlParsePEReference: no name\n");
6016 } else {
6017 if (RAW == ';') {
6018 NEXT;
6019 if ((ctxt->sax != NULL) &&
6020 (ctxt->sax->getParameterEntity != NULL))
6021 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6022 name);
6023 if (entity == NULL) {
6024 /*
6025 * [ WFC: Entity Declared ]
6026 * In a document without any DTD, a document with only an
6027 * internal DTD subset which contains no parameter entity
6028 * references, or a document with "standalone='yes'", ...
6029 * ... The declaration of a parameter entity must precede
6030 * any reference to it...
6031 */
6032 if ((ctxt->standalone == 1) ||
6033 ((ctxt->hasExternalSubset == 0) &&
6034 (ctxt->hasPErefs == 0))) {
6035 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6036 "PEReference: %%%s; not found\n",
6037 name);
6038 } else {
6039 /*
6040 * [ VC: Entity Declared ]
6041 * In a document with an external subset or external
6042 * parameter entities with "standalone='no'", ...
6043 * ... The declaration of a parameter entity must
6044 * precede any reference to it...
6045 */
6046 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6047 "PEReference: %%%s; not found\n",
6048 name, NULL);
6049 ctxt->valid = 0;
6050 }
6051 } else {
6052 /*
6053 * Internal checking in case the entity quest barfed
6054 */
6055 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6056 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6057 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6058 "Internal: %%%s; is not a parameter entity\n",
6059 name, NULL);
6060 } else if (ctxt->input->free != deallocblankswrapper) {
6061 input =
6062 xmlNewBlanksWrapperInputStream(ctxt, entity);
6063 xmlPushInput(ctxt, input);
6064 } else {
6065 /*
6066 * TODO !!!
6067 * handle the extra spaces added before and after
6068 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6069 */
6070 input = xmlNewEntityInputStream(ctxt, entity);
6071 xmlPushInput(ctxt, input);
6072 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006073 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006074 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006075 xmlParseTextDecl(ctxt);
6076 if (ctxt->errNo ==
6077 XML_ERR_UNSUPPORTED_ENCODING) {
6078 /*
6079 * The XML REC instructs us to stop parsing
6080 * right here
6081 */
6082 ctxt->instate = XML_PARSER_EOF;
6083 return;
6084 }
6085 }
6086 }
6087 }
6088 ctxt->hasPErefs = 1;
6089 } else {
6090 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6091 }
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094}
6095
6096/**
6097 * xmlParseStringPEReference:
6098 * @ctxt: an XML parser context
6099 * @str: a pointer to an index in the string
6100 *
6101 * parse PEReference declarations
6102 *
6103 * [69] PEReference ::= '%' Name ';'
6104 *
6105 * [ WFC: No Recursion ]
6106 * A parsed entity must not contain a recursive
6107 * reference to itself, either directly or indirectly.
6108 *
6109 * [ WFC: Entity Declared ]
6110 * In a document without any DTD, a document with only an internal DTD
6111 * subset which contains no parameter entity references, or a document
6112 * with "standalone='yes'", ... ... The declaration of a parameter
6113 * entity must precede any reference to it...
6114 *
6115 * [ VC: Entity Declared ]
6116 * In a document with an external subset or external parameter entities
6117 * with "standalone='no'", ... ... The declaration of a parameter entity
6118 * must precede any reference to it...
6119 *
6120 * [ WFC: In DTD ]
6121 * Parameter-entity references may only appear in the DTD.
6122 * NOTE: misleading but this is handled.
6123 *
6124 * Returns the string of the entity content.
6125 * str is updated to the current value of the index
6126 */
6127xmlEntityPtr
6128xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6129 const xmlChar *ptr;
6130 xmlChar cur;
6131 xmlChar *name;
6132 xmlEntityPtr entity = NULL;
6133
6134 if ((str == NULL) || (*str == NULL)) return(NULL);
6135 ptr = *str;
6136 cur = *ptr;
6137 if (cur == '%') {
6138 ptr++;
6139 cur = *ptr;
6140 name = xmlParseStringName(ctxt, &ptr);
6141 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006142 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6143 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006144 } else {
6145 cur = *ptr;
6146 if (cur == ';') {
6147 ptr++;
6148 cur = *ptr;
6149 if ((ctxt->sax != NULL) &&
6150 (ctxt->sax->getParameterEntity != NULL))
6151 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6152 name);
6153 if (entity == NULL) {
6154 /*
6155 * [ WFC: Entity Declared ]
6156 * In a document without any DTD, a document with only an
6157 * internal DTD subset which contains no parameter entity
6158 * references, or a document with "standalone='yes'", ...
6159 * ... The declaration of a parameter entity must precede
6160 * any reference to it...
6161 */
6162 if ((ctxt->standalone == 1) ||
6163 ((ctxt->hasExternalSubset == 0) &&
6164 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006165 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006166 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006167 } else {
6168 /*
6169 * [ VC: Entity Declared ]
6170 * In a document with an external subset or external
6171 * parameter entities with "standalone='no'", ...
6172 * ... The declaration of a parameter entity must
6173 * precede any reference to it...
6174 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006175 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6176 "PEReference: %%%s; not found\n",
6177 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006178 ctxt->valid = 0;
6179 }
6180 } else {
6181 /*
6182 * Internal checking in case the entity quest barfed
6183 */
6184 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6185 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006186 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6187 "%%%s; is not a parameter entity\n",
6188 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006189 }
6190 }
6191 ctxt->hasPErefs = 1;
6192 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006193 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006194 }
6195 xmlFree(name);
6196 }
6197 }
6198 *str = ptr;
6199 return(entity);
6200}
6201
6202/**
6203 * xmlParseDocTypeDecl:
6204 * @ctxt: an XML parser context
6205 *
6206 * parse a DOCTYPE declaration
6207 *
6208 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6209 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6210 *
6211 * [ VC: Root Element Type ]
6212 * The Name in the document type declaration must match the element
6213 * type of the root element.
6214 */
6215
6216void
6217xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006218 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006219 xmlChar *ExternalID = NULL;
6220 xmlChar *URI = NULL;
6221
6222 /*
6223 * We know that '<!DOCTYPE' has been detected.
6224 */
6225 SKIP(9);
6226
6227 SKIP_BLANKS;
6228
6229 /*
6230 * Parse the DOCTYPE name.
6231 */
6232 name = xmlParseName(ctxt);
6233 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006234 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6235 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006236 }
6237 ctxt->intSubName = name;
6238
6239 SKIP_BLANKS;
6240
6241 /*
6242 * Check for SystemID and ExternalID
6243 */
6244 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6245
6246 if ((URI != NULL) || (ExternalID != NULL)) {
6247 ctxt->hasExternalSubset = 1;
6248 }
6249 ctxt->extSubURI = URI;
6250 ctxt->extSubSystem = ExternalID;
6251
6252 SKIP_BLANKS;
6253
6254 /*
6255 * Create and update the internal subset.
6256 */
6257 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6258 (!ctxt->disableSAX))
6259 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6260
6261 /*
6262 * Is there any internal subset declarations ?
6263 * they are handled separately in xmlParseInternalSubset()
6264 */
6265 if (RAW == '[')
6266 return;
6267
6268 /*
6269 * We should be at the end of the DOCTYPE declaration.
6270 */
6271 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006272 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 }
6274 NEXT;
6275}
6276
6277/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006278 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006279 * @ctxt: an XML parser context
6280 *
6281 * parse the internal subset declaration
6282 *
6283 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6284 */
6285
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006286static void
Owen Taylor3473f882001-02-23 17:55:21 +00006287xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6288 /*
6289 * Is there any DTD definition ?
6290 */
6291 if (RAW == '[') {
6292 ctxt->instate = XML_PARSER_DTD;
6293 NEXT;
6294 /*
6295 * Parse the succession of Markup declarations and
6296 * PEReferences.
6297 * Subsequence (markupdecl | PEReference | S)*
6298 */
6299 while (RAW != ']') {
6300 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006301 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006302
6303 SKIP_BLANKS;
6304 xmlParseMarkupDecl(ctxt);
6305 xmlParsePEReference(ctxt);
6306
6307 /*
6308 * Pop-up of finished entities.
6309 */
6310 while ((RAW == 0) && (ctxt->inputNr > 1))
6311 xmlPopInput(ctxt);
6312
6313 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006314 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006315 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006316 break;
6317 }
6318 }
6319 if (RAW == ']') {
6320 NEXT;
6321 SKIP_BLANKS;
6322 }
6323 }
6324
6325 /*
6326 * We should be at the end of the DOCTYPE declaration.
6327 */
6328 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006329 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 }
6331 NEXT;
6332}
6333
Daniel Veillard81273902003-09-30 00:43:48 +00006334#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006335/**
6336 * xmlParseAttribute:
6337 * @ctxt: an XML parser context
6338 * @value: a xmlChar ** used to store the value of the attribute
6339 *
6340 * parse an attribute
6341 *
6342 * [41] Attribute ::= Name Eq AttValue
6343 *
6344 * [ WFC: No External Entity References ]
6345 * Attribute values cannot contain direct or indirect entity references
6346 * to external entities.
6347 *
6348 * [ WFC: No < in Attribute Values ]
6349 * The replacement text of any entity referred to directly or indirectly in
6350 * an attribute value (other than "&lt;") must not contain a <.
6351 *
6352 * [ VC: Attribute Value Type ]
6353 * The attribute must have been declared; the value must be of the type
6354 * declared for it.
6355 *
6356 * [25] Eq ::= S? '=' S?
6357 *
6358 * With namespace:
6359 *
6360 * [NS 11] Attribute ::= QName Eq AttValue
6361 *
6362 * Also the case QName == xmlns:??? is handled independently as a namespace
6363 * definition.
6364 *
6365 * Returns the attribute name, and the value in *value.
6366 */
6367
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006368const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006369xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006370 const xmlChar *name;
6371 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006372
6373 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006374 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006375 name = xmlParseName(ctxt);
6376 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006377 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006378 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006379 return(NULL);
6380 }
6381
6382 /*
6383 * read the value
6384 */
6385 SKIP_BLANKS;
6386 if (RAW == '=') {
6387 NEXT;
6388 SKIP_BLANKS;
6389 val = xmlParseAttValue(ctxt);
6390 ctxt->instate = XML_PARSER_CONTENT;
6391 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006392 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006393 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006394 return(NULL);
6395 }
6396
6397 /*
6398 * Check that xml:lang conforms to the specification
6399 * No more registered as an error, just generate a warning now
6400 * since this was deprecated in XML second edition
6401 */
6402 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6403 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006404 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6405 "Malformed value for xml:lang : %s\n",
6406 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006407 }
6408 }
6409
6410 /*
6411 * Check that xml:space conforms to the specification
6412 */
6413 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6414 if (xmlStrEqual(val, BAD_CAST "default"))
6415 *(ctxt->space) = 0;
6416 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6417 *(ctxt->space) = 1;
6418 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006419 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006420"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006421 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423 }
6424
6425 *value = val;
6426 return(name);
6427}
6428
6429/**
6430 * xmlParseStartTag:
6431 * @ctxt: an XML parser context
6432 *
6433 * parse a start of tag either for rule element or
6434 * EmptyElement. In both case we don't parse the tag closing chars.
6435 *
6436 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6437 *
6438 * [ WFC: Unique Att Spec ]
6439 * No attribute name may appear more than once in the same start-tag or
6440 * empty-element tag.
6441 *
6442 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6443 *
6444 * [ WFC: Unique Att Spec ]
6445 * No attribute name may appear more than once in the same start-tag or
6446 * empty-element tag.
6447 *
6448 * With namespace:
6449 *
6450 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6451 *
6452 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6453 *
6454 * Returns the element name parsed
6455 */
6456
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006457const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006458xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006459 const xmlChar *name;
6460 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006461 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006462 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006463 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006464 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006465 int i;
6466
6467 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006468 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006469
6470 name = xmlParseName(ctxt);
6471 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006473 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006474 return(NULL);
6475 }
6476
6477 /*
6478 * Now parse the attributes, it ends up with the ending
6479 *
6480 * (S Attribute)* S?
6481 */
6482 SKIP_BLANKS;
6483 GROW;
6484
Daniel Veillard21a0f912001-02-25 19:54:14 +00006485 while ((RAW != '>') &&
6486 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006487 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006488 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006489 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006490
6491 attname = xmlParseAttribute(ctxt, &attvalue);
6492 if ((attname != NULL) && (attvalue != NULL)) {
6493 /*
6494 * [ WFC: Unique Att Spec ]
6495 * No attribute name may appear more than once in the same
6496 * start-tag or empty-element tag.
6497 */
6498 for (i = 0; i < nbatts;i += 2) {
6499 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006500 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006501 xmlFree(attvalue);
6502 goto failed;
6503 }
6504 }
Owen Taylor3473f882001-02-23 17:55:21 +00006505 /*
6506 * Add the pair to atts
6507 */
6508 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006509 maxatts = 22; /* allow for 10 attrs by default */
6510 atts = (const xmlChar **)
6511 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006512 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006513 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006514 if (attvalue != NULL)
6515 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006516 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006517 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006518 ctxt->atts = atts;
6519 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006520 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006521 const xmlChar **n;
6522
Owen Taylor3473f882001-02-23 17:55:21 +00006523 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006524 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006525 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006526 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006527 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006528 if (attvalue != NULL)
6529 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006530 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006531 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006532 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006533 ctxt->atts = atts;
6534 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 }
6536 atts[nbatts++] = attname;
6537 atts[nbatts++] = attvalue;
6538 atts[nbatts] = NULL;
6539 atts[nbatts + 1] = NULL;
6540 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006541 if (attvalue != NULL)
6542 xmlFree(attvalue);
6543 }
6544
6545failed:
6546
Daniel Veillard3772de32002-12-17 10:31:45 +00006547 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006548 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6549 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006550 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006551 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6552 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006555 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6556 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006557 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6558 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006559 break;
6560 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006561 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006562 GROW;
6563 }
6564
6565 /*
6566 * SAX: Start of Element !
6567 */
6568 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006569 (!ctxt->disableSAX)) {
6570 if (nbatts > 0)
6571 ctxt->sax->startElement(ctxt->userData, name, atts);
6572 else
6573 ctxt->sax->startElement(ctxt->userData, name, NULL);
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575
6576 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006577 /* Free only the content strings */
6578 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006579 if (atts[i] != NULL)
6580 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006581 }
6582 return(name);
6583}
6584
6585/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006586 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006587 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006588 * @line: line of the start tag
6589 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006590 *
6591 * parse an end of tag
6592 *
6593 * [42] ETag ::= '</' Name S? '>'
6594 *
6595 * With namespace
6596 *
6597 * [NS 9] ETag ::= '</' QName S? '>'
6598 */
6599
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006600static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006601xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006602 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006603
6604 GROW;
6605 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006606 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006607 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006608 return;
6609 }
6610 SKIP(2);
6611
Daniel Veillard46de64e2002-05-29 08:21:33 +00006612 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006613
6614 /*
6615 * We should definitely be at the ending "S? '>'" part
6616 */
6617 GROW;
6618 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006619 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006620 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006621 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006622 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006623
6624 /*
6625 * [ WFC: Element Type Match ]
6626 * The Name in an element's end-tag must match the element type in the
6627 * start-tag.
6628 *
6629 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006630 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006631 if (name == NULL) name = BAD_CAST "unparseable";
6632 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006633 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006634 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006635 }
6636
6637 /*
6638 * SAX: End of Tag
6639 */
6640 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6641 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006642 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006643
Daniel Veillarde57ec792003-09-10 10:50:59 +00006644 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006645 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006646 return;
6647}
6648
6649/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006650 * xmlParseEndTag:
6651 * @ctxt: an XML parser context
6652 *
6653 * parse an end of tag
6654 *
6655 * [42] ETag ::= '</' Name S? '>'
6656 *
6657 * With namespace
6658 *
6659 * [NS 9] ETag ::= '</' QName S? '>'
6660 */
6661
6662void
6663xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006664 xmlParseEndTag1(ctxt, 0);
6665}
Daniel Veillard81273902003-09-30 00:43:48 +00006666#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006667
6668/************************************************************************
6669 * *
6670 * SAX 2 specific operations *
6671 * *
6672 ************************************************************************/
6673
6674static const xmlChar *
6675xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6676 int len = 0, l;
6677 int c;
6678 int count = 0;
6679
6680 /*
6681 * Handler for more complex cases
6682 */
6683 GROW;
6684 c = CUR_CHAR(l);
6685 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006686 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006687 return(NULL);
6688 }
6689
6690 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006691 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006692 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006693 (IS_COMBINING(c)) ||
6694 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006695 if (count++ > 100) {
6696 count = 0;
6697 GROW;
6698 }
6699 len += l;
6700 NEXTL(l);
6701 c = CUR_CHAR(l);
6702 }
6703 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6704}
6705
6706/*
6707 * xmlGetNamespace:
6708 * @ctxt: an XML parser context
6709 * @prefix: the prefix to lookup
6710 *
6711 * Lookup the namespace name for the @prefix (which ca be NULL)
6712 * The prefix must come from the @ctxt->dict dictionnary
6713 *
6714 * Returns the namespace name or NULL if not bound
6715 */
6716static const xmlChar *
6717xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6718 int i;
6719
Daniel Veillarde57ec792003-09-10 10:50:59 +00006720 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006721 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006722 if (ctxt->nsTab[i] == prefix) {
6723 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6724 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006725 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006726 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006727 return(NULL);
6728}
6729
6730/**
6731 * xmlParseNCName:
6732 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006733 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006734 *
6735 * parse an XML name.
6736 *
6737 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6738 * CombiningChar | Extender
6739 *
6740 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6741 *
6742 * Returns the Name parsed or NULL
6743 */
6744
6745static const xmlChar *
6746xmlParseNCName(xmlParserCtxtPtr ctxt) {
6747 const xmlChar *in;
6748 const xmlChar *ret;
6749 int count = 0;
6750
6751 /*
6752 * Accelerator for simple ASCII names
6753 */
6754 in = ctxt->input->cur;
6755 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6756 ((*in >= 0x41) && (*in <= 0x5A)) ||
6757 (*in == '_')) {
6758 in++;
6759 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6760 ((*in >= 0x41) && (*in <= 0x5A)) ||
6761 ((*in >= 0x30) && (*in <= 0x39)) ||
6762 (*in == '_') || (*in == '-') ||
6763 (*in == '.'))
6764 in++;
6765 if ((*in > 0) && (*in < 0x80)) {
6766 count = in - ctxt->input->cur;
6767 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6768 ctxt->input->cur = in;
6769 ctxt->nbChars += count;
6770 ctxt->input->col += count;
6771 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006772 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006773 }
6774 return(ret);
6775 }
6776 }
6777 return(xmlParseNCNameComplex(ctxt));
6778}
6779
6780/**
6781 * xmlParseQName:
6782 * @ctxt: an XML parser context
6783 * @prefix: pointer to store the prefix part
6784 *
6785 * parse an XML Namespace QName
6786 *
6787 * [6] QName ::= (Prefix ':')? LocalPart
6788 * [7] Prefix ::= NCName
6789 * [8] LocalPart ::= NCName
6790 *
6791 * Returns the Name parsed or NULL
6792 */
6793
6794static const xmlChar *
6795xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6796 const xmlChar *l, *p;
6797
6798 GROW;
6799
6800 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006801 if (l == NULL) {
6802 if (CUR == ':') {
6803 l = xmlParseName(ctxt);
6804 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006805 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6806 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006807 *prefix = NULL;
6808 return(l);
6809 }
6810 }
6811 return(NULL);
6812 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006813 if (CUR == ':') {
6814 NEXT;
6815 p = l;
6816 l = xmlParseNCName(ctxt);
6817 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006818 xmlChar *tmp;
6819
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006820 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6821 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006822 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6823 p = xmlDictLookup(ctxt->dict, tmp, -1);
6824 if (tmp != NULL) xmlFree(tmp);
6825 *prefix = NULL;
6826 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006827 }
6828 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006829 xmlChar *tmp;
6830
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006831 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6832 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006833 NEXT;
6834 tmp = (xmlChar *) xmlParseName(ctxt);
6835 if (tmp != NULL) {
6836 tmp = xmlBuildQName(tmp, l, NULL, 0);
6837 l = xmlDictLookup(ctxt->dict, tmp, -1);
6838 if (tmp != NULL) xmlFree(tmp);
6839 *prefix = p;
6840 return(l);
6841 }
6842 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6843 l = xmlDictLookup(ctxt->dict, tmp, -1);
6844 if (tmp != NULL) xmlFree(tmp);
6845 *prefix = p;
6846 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006847 }
6848 *prefix = p;
6849 } else
6850 *prefix = NULL;
6851 return(l);
6852}
6853
6854/**
6855 * xmlParseQNameAndCompare:
6856 * @ctxt: an XML parser context
6857 * @name: the localname
6858 * @prefix: the prefix, if any.
6859 *
6860 * parse an XML name and compares for match
6861 * (specialized for endtag parsing)
6862 *
6863 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6864 * and the name for mismatch
6865 */
6866
6867static const xmlChar *
6868xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6869 xmlChar const *prefix) {
6870 const xmlChar *cmp = name;
6871 const xmlChar *in;
6872 const xmlChar *ret;
6873 const xmlChar *prefix2;
6874
6875 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6876
6877 GROW;
6878 in = ctxt->input->cur;
6879
6880 cmp = prefix;
6881 while (*in != 0 && *in == *cmp) {
6882 ++in;
6883 ++cmp;
6884 }
6885 if ((*cmp == 0) && (*in == ':')) {
6886 in++;
6887 cmp = name;
6888 while (*in != 0 && *in == *cmp) {
6889 ++in;
6890 ++cmp;
6891 }
William M. Brack76e95df2003-10-18 16:20:14 +00006892 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006893 /* success */
6894 ctxt->input->cur = in;
6895 return((const xmlChar*) 1);
6896 }
6897 }
6898 /*
6899 * all strings coms from the dictionary, equality can be done directly
6900 */
6901 ret = xmlParseQName (ctxt, &prefix2);
6902 if ((ret == name) && (prefix == prefix2))
6903 return((const xmlChar*) 1);
6904 return ret;
6905}
6906
6907/**
6908 * xmlParseAttValueInternal:
6909 * @ctxt: an XML parser context
6910 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006911 * @alloc: whether the attribute was reallocated as a new string
6912 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006913 *
6914 * parse a value for an attribute.
6915 * NOTE: if no normalization is needed, the routine will return pointers
6916 * directly from the data buffer.
6917 *
6918 * 3.3.3 Attribute-Value Normalization:
6919 * Before the value of an attribute is passed to the application or
6920 * checked for validity, the XML processor must normalize it as follows:
6921 * - a character reference is processed by appending the referenced
6922 * character to the attribute value
6923 * - an entity reference is processed by recursively processing the
6924 * replacement text of the entity
6925 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6926 * appending #x20 to the normalized value, except that only a single
6927 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6928 * parsed entity or the literal entity value of an internal parsed entity
6929 * - other characters are processed by appending them to the normalized value
6930 * If the declared value is not CDATA, then the XML processor must further
6931 * process the normalized attribute value by discarding any leading and
6932 * trailing space (#x20) characters, and by replacing sequences of space
6933 * (#x20) characters by a single space (#x20) character.
6934 * All attributes for which no declaration has been read should be treated
6935 * by a non-validating parser as if declared CDATA.
6936 *
6937 * Returns the AttValue parsed or NULL. The value has to be freed by the
6938 * caller if it was copied, this can be detected by val[*len] == 0.
6939 */
6940
6941static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006942xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6943 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006944{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006945 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006946 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006947 xmlChar *ret = NULL;
6948
6949 GROW;
6950 in = (xmlChar *) CUR_PTR;
6951 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006952 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006953 return (NULL);
6954 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006955 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006956
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006957 /*
6958 * try to handle in this routine the most common case where no
6959 * allocation of a new string is required and where content is
6960 * pure ASCII.
6961 */
6962 limit = *in++;
6963 end = ctxt->input->end;
6964 start = in;
6965 if (in >= end) {
6966 const xmlChar *oldbase = ctxt->input->base;
6967 GROW;
6968 if (oldbase != ctxt->input->base) {
6969 long delta = ctxt->input->base - oldbase;
6970 start = start + delta;
6971 in = in + delta;
6972 }
6973 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006974 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006975 if (normalize) {
6976 /*
6977 * Skip any leading spaces
6978 */
6979 while ((in < end) && (*in != limit) &&
6980 ((*in == 0x20) || (*in == 0x9) ||
6981 (*in == 0xA) || (*in == 0xD))) {
6982 in++;
6983 start = in;
6984 if (in >= end) {
6985 const xmlChar *oldbase = ctxt->input->base;
6986 GROW;
6987 if (oldbase != ctxt->input->base) {
6988 long delta = ctxt->input->base - oldbase;
6989 start = start + delta;
6990 in = in + delta;
6991 }
6992 end = ctxt->input->end;
6993 }
6994 }
6995 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
6996 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
6997 if ((*in++ == 0x20) && (*in == 0x20)) break;
6998 if (in >= end) {
6999 const xmlChar *oldbase = ctxt->input->base;
7000 GROW;
7001 if (oldbase != ctxt->input->base) {
7002 long delta = ctxt->input->base - oldbase;
7003 start = start + delta;
7004 in = in + delta;
7005 }
7006 end = ctxt->input->end;
7007 }
7008 }
7009 last = in;
7010 /*
7011 * skip the trailing blanks
7012 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007013 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007014 while ((in < end) && (*in != limit) &&
7015 ((*in == 0x20) || (*in == 0x9) ||
7016 (*in == 0xA) || (*in == 0xD))) {
7017 in++;
7018 if (in >= end) {
7019 const xmlChar *oldbase = ctxt->input->base;
7020 GROW;
7021 if (oldbase != ctxt->input->base) {
7022 long delta = ctxt->input->base - oldbase;
7023 start = start + delta;
7024 in = in + delta;
7025 last = last + delta;
7026 }
7027 end = ctxt->input->end;
7028 }
7029 }
7030 if (*in != limit) goto need_complex;
7031 } else {
7032 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7033 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7034 in++;
7035 if (in >= end) {
7036 const xmlChar *oldbase = ctxt->input->base;
7037 GROW;
7038 if (oldbase != ctxt->input->base) {
7039 long delta = ctxt->input->base - oldbase;
7040 start = start + delta;
7041 in = in + delta;
7042 }
7043 end = ctxt->input->end;
7044 }
7045 }
7046 last = in;
7047 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007048 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007049 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007050 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007051 *len = last - start;
7052 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007054 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007055 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007056 }
7057 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007058 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007059 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007060need_complex:
7061 if (alloc) *alloc = 1;
7062 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063}
7064
7065/**
7066 * xmlParseAttribute2:
7067 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007068 * @pref: the element prefix
7069 * @elem: the element name
7070 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007071 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007072 * @len: an int * to save the length of the attribute
7073 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007074 *
7075 * parse an attribute in the new SAX2 framework.
7076 *
7077 * Returns the attribute name, and the value in *value, .
7078 */
7079
7080static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007081xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7082 const xmlChar *pref, const xmlChar *elem,
7083 const xmlChar **prefix, xmlChar **value,
7084 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007085 const xmlChar *name;
7086 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007087 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007088
7089 *value = NULL;
7090 GROW;
7091 name = xmlParseQName(ctxt, prefix);
7092 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007093 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7094 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007095 return(NULL);
7096 }
7097
7098 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007099 * get the type if needed
7100 */
7101 if (ctxt->attsSpecial != NULL) {
7102 int type;
7103
7104 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7105 pref, elem, *prefix, name);
7106 if (type != 0) normalize = 1;
7107 }
7108
7109 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007110 * read the value
7111 */
7112 SKIP_BLANKS;
7113 if (RAW == '=') {
7114 NEXT;
7115 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007116 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 ctxt->instate = XML_PARSER_CONTENT;
7118 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007119 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007121 return(NULL);
7122 }
7123
7124 /*
7125 * Check that xml:lang conforms to the specification
7126 * No more registered as an error, just generate a warning now
7127 * since this was deprecated in XML second edition
7128 */
7129 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7130 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007131 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7132 "Malformed value for xml:lang : %s\n",
7133 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007134 }
7135 }
7136
7137 /*
7138 * Check that xml:space conforms to the specification
7139 */
7140 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7141 if (xmlStrEqual(val, BAD_CAST "default"))
7142 *(ctxt->space) = 0;
7143 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7144 *(ctxt->space) = 1;
7145 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007146 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007147"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7148 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 }
7150 }
7151
7152 *value = val;
7153 return(name);
7154}
7155
7156/**
7157 * xmlParseStartTag2:
7158 * @ctxt: an XML parser context
7159 *
7160 * parse a start of tag either for rule element or
7161 * EmptyElement. In both case we don't parse the tag closing chars.
7162 * This routine is called when running SAX2 parsing
7163 *
7164 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7165 *
7166 * [ WFC: Unique Att Spec ]
7167 * No attribute name may appear more than once in the same start-tag or
7168 * empty-element tag.
7169 *
7170 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7171 *
7172 * [ WFC: Unique Att Spec ]
7173 * No attribute name may appear more than once in the same start-tag or
7174 * empty-element tag.
7175 *
7176 * With namespace:
7177 *
7178 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7179 *
7180 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7181 *
7182 * Returns the element name parsed
7183 */
7184
7185static const xmlChar *
7186xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007187 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007188 const xmlChar *localname;
7189 const xmlChar *prefix;
7190 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007191 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007192 const xmlChar *nsname;
7193 xmlChar *attvalue;
7194 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007195 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007196 int nratts, nbatts, nbdef;
7197 int i, j, nbNs, attval;
7198 const xmlChar *base;
7199 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007200
7201 if (RAW != '<') return(NULL);
7202 NEXT1;
7203
7204 /*
7205 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7206 * point since the attribute values may be stored as pointers to
7207 * the buffer and calling SHRINK would destroy them !
7208 * The Shrinking is only possible once the full set of attribute
7209 * callbacks have been done.
7210 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007211reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007212 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007213 base = ctxt->input->base;
7214 cur = ctxt->input->cur - ctxt->input->base;
7215 nbatts = 0;
7216 nratts = 0;
7217 nbdef = 0;
7218 nbNs = 0;
7219 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007220
7221 localname = xmlParseQName(ctxt, &prefix);
7222 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007223 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7224 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007225 return(NULL);
7226 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007227 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007228
7229 /*
7230 * Now parse the attributes, it ends up with the ending
7231 *
7232 * (S Attribute)* S?
7233 */
7234 SKIP_BLANKS;
7235 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007236 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007237
7238 while ((RAW != '>') &&
7239 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007240 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 const xmlChar *q = CUR_PTR;
7242 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007243 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007245 attname = xmlParseAttribute2(ctxt, prefix, localname,
7246 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007247 if ((attname != NULL) && (attvalue != NULL)) {
7248 if (len < 0) len = xmlStrlen(attvalue);
7249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7251 xmlURIPtr uri;
7252
7253 if (*URL != 0) {
7254 uri = xmlParseURI((const char *) URL);
7255 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007256 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7257 "xmlns: %s not a valid URI\n",
7258 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007259 } else {
7260 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007261 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7262 "xmlns: URI %s is not absolute\n",
7263 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007264 }
7265 xmlFreeURI(uri);
7266 }
7267 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007268 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007269 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007270 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007271 for (j = 1;j <= nbNs;j++)
7272 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7273 break;
7274 if (j <= nbNs)
7275 xmlErrAttributeDup(ctxt, NULL, attname);
7276 else
7277 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007278 if (alloc != 0) xmlFree(attvalue);
7279 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280 continue;
7281 }
7282 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007283 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7284 xmlURIPtr uri;
7285
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007286 if (attname == ctxt->str_xml) {
7287 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007288 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7289 "xml namespace prefix mapped to wrong URI\n",
7290 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007291 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007292 /*
7293 * Do not keep a namespace definition node
7294 */
7295 if (alloc != 0) xmlFree(attvalue);
7296 SKIP_BLANKS;
7297 continue;
7298 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007299 uri = xmlParseURI((const char *) URL);
7300 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007301 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7302 "xmlns:%s: '%s' is not a valid URI\n",
7303 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007304 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007305 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007306 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7307 "xmlns:%s: URI %s is not absolute\n",
7308 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007309 }
7310 xmlFreeURI(uri);
7311 }
7312
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007314 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007316 for (j = 1;j <= nbNs;j++)
7317 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7318 break;
7319 if (j <= nbNs)
7320 xmlErrAttributeDup(ctxt, aprefix, attname);
7321 else
7322 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007323 if (alloc != 0) xmlFree(attvalue);
7324 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007325 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 continue;
7327 }
7328
7329 /*
7330 * Add the pair to atts
7331 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007332 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7333 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 if (attvalue[len] == 0)
7335 xmlFree(attvalue);
7336 goto failed;
7337 }
7338 maxatts = ctxt->maxatts;
7339 atts = ctxt->atts;
7340 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007341 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 atts[nbatts++] = attname;
7343 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007344 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007345 atts[nbatts++] = attvalue;
7346 attvalue += len;
7347 atts[nbatts++] = attvalue;
7348 /*
7349 * tag if some deallocation is needed
7350 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007351 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007352 } else {
7353 if ((attvalue != NULL) && (attvalue[len] == 0))
7354 xmlFree(attvalue);
7355 }
7356
7357failed:
7358
7359 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007360 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7362 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007363 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7365 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007366 }
7367 SKIP_BLANKS;
7368 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7369 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007370 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007372 break;
7373 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007374 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007375 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007376 }
7377
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007379 * The attributes defaulting
7380 */
7381 if (ctxt->attsDefault != NULL) {
7382 xmlDefAttrsPtr defaults;
7383
7384 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7385 if (defaults != NULL) {
7386 for (i = 0;i < defaults->nbAttrs;i++) {
7387 attname = defaults->values[4 * i];
7388 aprefix = defaults->values[4 * i + 1];
7389
7390 /*
7391 * special work for namespaces defaulted defs
7392 */
7393 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7394 /*
7395 * check that it's not a defined namespace
7396 */
7397 for (j = 1;j <= nbNs;j++)
7398 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7399 break;
7400 if (j <= nbNs) continue;
7401
7402 nsname = xmlGetNamespace(ctxt, NULL);
7403 if (nsname != defaults->values[4 * i + 2]) {
7404 if (nsPush(ctxt, NULL,
7405 defaults->values[4 * i + 2]) > 0)
7406 nbNs++;
7407 }
7408 } else if (aprefix == ctxt->str_xmlns) {
7409 /*
7410 * check that it's not a defined namespace
7411 */
7412 for (j = 1;j <= nbNs;j++)
7413 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7414 break;
7415 if (j <= nbNs) continue;
7416
7417 nsname = xmlGetNamespace(ctxt, attname);
7418 if (nsname != defaults->values[2]) {
7419 if (nsPush(ctxt, attname,
7420 defaults->values[4 * i + 2]) > 0)
7421 nbNs++;
7422 }
7423 } else {
7424 /*
7425 * check that it's not a defined attribute
7426 */
7427 for (j = 0;j < nbatts;j+=5) {
7428 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7429 break;
7430 }
7431 if (j < nbatts) continue;
7432
7433 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7434 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007435 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007436 }
7437 maxatts = ctxt->maxatts;
7438 atts = ctxt->atts;
7439 }
7440 atts[nbatts++] = attname;
7441 atts[nbatts++] = aprefix;
7442 if (aprefix == NULL)
7443 atts[nbatts++] = NULL;
7444 else
7445 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7446 atts[nbatts++] = defaults->values[4 * i + 2];
7447 atts[nbatts++] = defaults->values[4 * i + 3];
7448 nbdef++;
7449 }
7450 }
7451 }
7452 }
7453
Daniel Veillarde70c8772003-11-25 07:21:18 +00007454 /*
7455 * The attributes checkings
7456 */
7457 for (i = 0; i < nbatts;i += 5) {
7458 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7459 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7460 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7461 "Namespace prefix %s for %s on %s is not defined\n",
7462 atts[i + 1], atts[i], localname);
7463 }
7464 atts[i + 2] = nsname;
7465 /*
7466 * [ WFC: Unique Att Spec ]
7467 * No attribute name may appear more than once in the same
7468 * start-tag or empty-element tag.
7469 * As extended by the Namespace in XML REC.
7470 */
7471 for (j = 0; j < i;j += 5) {
7472 if (atts[i] == atts[j]) {
7473 if (atts[i+1] == atts[j+1]) {
7474 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7475 break;
7476 }
7477 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7478 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7479 "Namespaced Attribute %s in '%s' redefined\n",
7480 atts[i], nsname, NULL);
7481 break;
7482 }
7483 }
7484 }
7485 }
7486
Daniel Veillarde57ec792003-09-10 10:50:59 +00007487 nsname = xmlGetNamespace(ctxt, prefix);
7488 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007489 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7490 "Namespace prefix %s on %s is not defined\n",
7491 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007492 }
7493 *pref = prefix;
7494 *URI = nsname;
7495
7496 /*
7497 * SAX: Start of Element !
7498 */
7499 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7500 (!ctxt->disableSAX)) {
7501 if (nbNs > 0)
7502 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7503 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7504 nbatts / 5, nbdef, atts);
7505 else
7506 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7507 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7508 }
7509
7510 /*
7511 * Free up attribute allocated strings if needed
7512 */
7513 if (attval != 0) {
7514 for (i = 3,j = 0; j < nratts;i += 5,j++)
7515 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7516 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007517 }
7518
7519 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007520
7521base_changed:
7522 /*
7523 * the attribute strings are valid iif the base didn't changed
7524 */
7525 if (attval != 0) {
7526 for (i = 3,j = 0; j < nratts;i += 5,j++)
7527 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7528 xmlFree((xmlChar *) atts[i]);
7529 }
7530 ctxt->input->cur = ctxt->input->base + cur;
7531 if (ctxt->wellFormed == 1) {
7532 goto reparse;
7533 }
7534 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007535}
7536
7537/**
7538 * xmlParseEndTag2:
7539 * @ctxt: an XML parser context
7540 * @line: line of the start tag
7541 * @nsNr: number of namespaces on the start tag
7542 *
7543 * parse an end of tag
7544 *
7545 * [42] ETag ::= '</' Name S? '>'
7546 *
7547 * With namespace
7548 *
7549 * [NS 9] ETag ::= '</' QName S? '>'
7550 */
7551
7552static void
7553xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007554 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007555 const xmlChar *name;
7556
7557 GROW;
7558 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007559 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007560 return;
7561 }
7562 SKIP(2);
7563
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007564 if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
7565 if (ctxt->input->cur[tlen] == '>') {
7566 ctxt->input->cur += tlen + 1;
7567 goto done;
7568 }
7569 ctxt->input->cur += tlen;
7570 name = (xmlChar*)1;
7571 } else {
7572 if (prefix == NULL)
7573 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7574 else
7575 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7576 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577
7578 /*
7579 * We should definitely be at the ending "S? '>'" part
7580 */
7581 GROW;
7582 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007583 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007584 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007585 } else
7586 NEXT1;
7587
7588 /*
7589 * [ WFC: Element Type Match ]
7590 * The Name in an element's end-tag must match the element type in the
7591 * start-tag.
7592 *
7593 */
7594 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007595 if (name == NULL) name = BAD_CAST "unparseable";
7596 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007597 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007598 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007599 }
7600
7601 /*
7602 * SAX: End of Tag
7603 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007604done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007605 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7606 (!ctxt->disableSAX))
7607 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7608
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 spacePop(ctxt);
7610 if (nsNr != 0)
7611 nsPop(ctxt, nsNr);
7612 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007613}
7614
7615/**
Owen Taylor3473f882001-02-23 17:55:21 +00007616 * xmlParseCDSect:
7617 * @ctxt: an XML parser context
7618 *
7619 * Parse escaped pure raw content.
7620 *
7621 * [18] CDSect ::= CDStart CData CDEnd
7622 *
7623 * [19] CDStart ::= '<![CDATA['
7624 *
7625 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7626 *
7627 * [21] CDEnd ::= ']]>'
7628 */
7629void
7630xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7631 xmlChar *buf = NULL;
7632 int len = 0;
7633 int size = XML_PARSER_BUFFER_SIZE;
7634 int r, rl;
7635 int s, sl;
7636 int cur, l;
7637 int count = 0;
7638
Daniel Veillard8f597c32003-10-06 08:19:27 +00007639 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007640 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007641 SKIP(9);
7642 } else
7643 return;
7644
7645 ctxt->instate = XML_PARSER_CDATA_SECTION;
7646 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007647 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007648 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007649 ctxt->instate = XML_PARSER_CONTENT;
7650 return;
7651 }
7652 NEXTL(rl);
7653 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007654 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007655 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007656 ctxt->instate = XML_PARSER_CONTENT;
7657 return;
7658 }
7659 NEXTL(sl);
7660 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007661 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007662 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007663 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007664 return;
7665 }
William M. Brack871611b2003-10-18 04:53:14 +00007666 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007667 ((r != ']') || (s != ']') || (cur != '>'))) {
7668 if (len + 5 >= size) {
7669 size *= 2;
7670 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7671 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007672 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007673 return;
7674 }
7675 }
7676 COPY_BUF(rl,buf,len,r);
7677 r = s;
7678 rl = sl;
7679 s = cur;
7680 sl = l;
7681 count++;
7682 if (count > 50) {
7683 GROW;
7684 count = 0;
7685 }
7686 NEXTL(l);
7687 cur = CUR_CHAR(l);
7688 }
7689 buf[len] = 0;
7690 ctxt->instate = XML_PARSER_CONTENT;
7691 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007692 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007693 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007694 xmlFree(buf);
7695 return;
7696 }
7697 NEXTL(l);
7698
7699 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007700 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007701 */
7702 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7703 if (ctxt->sax->cdataBlock != NULL)
7704 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007705 else if (ctxt->sax->characters != NULL)
7706 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007707 }
7708 xmlFree(buf);
7709}
7710
7711/**
7712 * xmlParseContent:
7713 * @ctxt: an XML parser context
7714 *
7715 * Parse a content:
7716 *
7717 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7718 */
7719
7720void
7721xmlParseContent(xmlParserCtxtPtr ctxt) {
7722 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007723 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007724 ((RAW != '<') || (NXT(1) != '/'))) {
7725 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007726 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007727 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007728
7729 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007730 * First case : a Processing Instruction.
7731 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007732 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007733 xmlParsePI(ctxt);
7734 }
7735
7736 /*
7737 * Second case : a CDSection
7738 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007739 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007740 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007741 xmlParseCDSect(ctxt);
7742 }
7743
7744 /*
7745 * Third case : a comment
7746 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007747 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007748 (NXT(2) == '-') && (NXT(3) == '-')) {
7749 xmlParseComment(ctxt);
7750 ctxt->instate = XML_PARSER_CONTENT;
7751 }
7752
7753 /*
7754 * Fourth case : a sub-element.
7755 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007756 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007757 xmlParseElement(ctxt);
7758 }
7759
7760 /*
7761 * Fifth case : a reference. If if has not been resolved,
7762 * parsing returns it's Name, create the node
7763 */
7764
Daniel Veillard21a0f912001-02-25 19:54:14 +00007765 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007766 xmlParseReference(ctxt);
7767 }
7768
7769 /*
7770 * Last case, text. Note that References are handled directly.
7771 */
7772 else {
7773 xmlParseCharData(ctxt, 0);
7774 }
7775
7776 GROW;
7777 /*
7778 * Pop-up of finished entities.
7779 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007780 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007781 xmlPopInput(ctxt);
7782 SHRINK;
7783
Daniel Veillardfdc91562002-07-01 21:52:03 +00007784 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007785 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7786 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007787 ctxt->instate = XML_PARSER_EOF;
7788 break;
7789 }
7790 }
7791}
7792
7793/**
7794 * xmlParseElement:
7795 * @ctxt: an XML parser context
7796 *
7797 * parse an XML element, this is highly recursive
7798 *
7799 * [39] element ::= EmptyElemTag | STag content ETag
7800 *
7801 * [ WFC: Element Type Match ]
7802 * The Name in an element's end-tag must match the element type in the
7803 * start-tag.
7804 *
Owen Taylor3473f882001-02-23 17:55:21 +00007805 */
7806
7807void
7808xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007809 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007810 const xmlChar *prefix;
7811 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007812 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007813 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007814 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007816
7817 /* Capture start position */
7818 if (ctxt->record_info) {
7819 node_info.begin_pos = ctxt->input->consumed +
7820 (CUR_PTR - ctxt->input->base);
7821 node_info.begin_line = ctxt->input->line;
7822 }
7823
7824 if (ctxt->spaceNr == 0)
7825 spacePush(ctxt, -1);
7826 else
7827 spacePush(ctxt, *ctxt->space);
7828
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007829 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007830#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007831 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007832#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007833 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007834#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007835 else
7836 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007837#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007838 if (name == NULL) {
7839 spacePop(ctxt);
7840 return;
7841 }
7842 namePush(ctxt, name);
7843 ret = ctxt->node;
7844
Daniel Veillard4432df22003-09-28 18:58:27 +00007845#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007846 /*
7847 * [ VC: Root Element Type ]
7848 * The Name in the document type declaration must match the element
7849 * type of the root element.
7850 */
7851 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7852 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7853 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007854#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007855
7856 /*
7857 * Check for an Empty Element.
7858 */
7859 if ((RAW == '/') && (NXT(1) == '>')) {
7860 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861 if (ctxt->sax2) {
7862 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7863 (!ctxt->disableSAX))
7864 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007865#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 } else {
7867 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7868 (!ctxt->disableSAX))
7869 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007870#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007871 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872 namePop(ctxt);
7873 spacePop(ctxt);
7874 if (nsNr != ctxt->nsNr)
7875 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007876 if ( ret != NULL && ctxt->record_info ) {
7877 node_info.end_pos = ctxt->input->consumed +
7878 (CUR_PTR - ctxt->input->base);
7879 node_info.end_line = ctxt->input->line;
7880 node_info.node = ret;
7881 xmlParserAddNodeInfo(ctxt, &node_info);
7882 }
7883 return;
7884 }
7885 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007886 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007887 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007888 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7889 "Couldn't find end of Start Tag %s line %d\n",
7890 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007891
7892 /*
7893 * end of parsing of this node.
7894 */
7895 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007896 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007897 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007898 if (nsNr != ctxt->nsNr)
7899 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007900
7901 /*
7902 * Capture end position and add node
7903 */
7904 if ( ret != NULL && ctxt->record_info ) {
7905 node_info.end_pos = ctxt->input->consumed +
7906 (CUR_PTR - ctxt->input->base);
7907 node_info.end_line = ctxt->input->line;
7908 node_info.node = ret;
7909 xmlParserAddNodeInfo(ctxt, &node_info);
7910 }
7911 return;
7912 }
7913
7914 /*
7915 * Parse the content of the element:
7916 */
7917 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007918 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007919 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007920 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007921 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007922
7923 /*
7924 * end of parsing of this node.
7925 */
7926 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007927 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007928 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 if (nsNr != ctxt->nsNr)
7930 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007931 return;
7932 }
7933
7934 /*
7935 * parse the end of tag: '</' should be here.
7936 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007937 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007938 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007940 }
7941#ifdef LIBXML_SAX1_ENABLED
7942 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007943 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007944#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007945
7946 /*
7947 * Capture end position and add node
7948 */
7949 if ( ret != NULL && ctxt->record_info ) {
7950 node_info.end_pos = ctxt->input->consumed +
7951 (CUR_PTR - ctxt->input->base);
7952 node_info.end_line = ctxt->input->line;
7953 node_info.node = ret;
7954 xmlParserAddNodeInfo(ctxt, &node_info);
7955 }
7956}
7957
7958/**
7959 * xmlParseVersionNum:
7960 * @ctxt: an XML parser context
7961 *
7962 * parse the XML version value.
7963 *
7964 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7965 *
7966 * Returns the string giving the XML version number, or NULL
7967 */
7968xmlChar *
7969xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7970 xmlChar *buf = NULL;
7971 int len = 0;
7972 int size = 10;
7973 xmlChar cur;
7974
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007975 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007976 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007977 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007978 return(NULL);
7979 }
7980 cur = CUR;
7981 while (((cur >= 'a') && (cur <= 'z')) ||
7982 ((cur >= 'A') && (cur <= 'Z')) ||
7983 ((cur >= '0') && (cur <= '9')) ||
7984 (cur == '_') || (cur == '.') ||
7985 (cur == ':') || (cur == '-')) {
7986 if (len + 1 >= size) {
7987 size *= 2;
7988 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7989 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007990 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007991 return(NULL);
7992 }
7993 }
7994 buf[len++] = cur;
7995 NEXT;
7996 cur=CUR;
7997 }
7998 buf[len] = 0;
7999 return(buf);
8000}
8001
8002/**
8003 * xmlParseVersionInfo:
8004 * @ctxt: an XML parser context
8005 *
8006 * parse the XML version.
8007 *
8008 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8009 *
8010 * [25] Eq ::= S? '=' S?
8011 *
8012 * Returns the version string, e.g. "1.0"
8013 */
8014
8015xmlChar *
8016xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8017 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008018
Daniel Veillarda07050d2003-10-19 14:46:32 +00008019 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008020 SKIP(7);
8021 SKIP_BLANKS;
8022 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008023 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008024 return(NULL);
8025 }
8026 NEXT;
8027 SKIP_BLANKS;
8028 if (RAW == '"') {
8029 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008030 version = xmlParseVersionNum(ctxt);
8031 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008032 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008033 } else
8034 NEXT;
8035 } else if (RAW == '\''){
8036 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008037 version = xmlParseVersionNum(ctxt);
8038 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008039 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008040 } else
8041 NEXT;
8042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008043 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008044 }
8045 }
8046 return(version);
8047}
8048
8049/**
8050 * xmlParseEncName:
8051 * @ctxt: an XML parser context
8052 *
8053 * parse the XML encoding name
8054 *
8055 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8056 *
8057 * Returns the encoding name value or NULL
8058 */
8059xmlChar *
8060xmlParseEncName(xmlParserCtxtPtr ctxt) {
8061 xmlChar *buf = NULL;
8062 int len = 0;
8063 int size = 10;
8064 xmlChar cur;
8065
8066 cur = CUR;
8067 if (((cur >= 'a') && (cur <= 'z')) ||
8068 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008069 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008070 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008071 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008072 return(NULL);
8073 }
8074
8075 buf[len++] = cur;
8076 NEXT;
8077 cur = CUR;
8078 while (((cur >= 'a') && (cur <= 'z')) ||
8079 ((cur >= 'A') && (cur <= 'Z')) ||
8080 ((cur >= '0') && (cur <= '9')) ||
8081 (cur == '.') || (cur == '_') ||
8082 (cur == '-')) {
8083 if (len + 1 >= size) {
8084 size *= 2;
8085 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8086 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008087 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008088 return(NULL);
8089 }
8090 }
8091 buf[len++] = cur;
8092 NEXT;
8093 cur = CUR;
8094 if (cur == 0) {
8095 SHRINK;
8096 GROW;
8097 cur = CUR;
8098 }
8099 }
8100 buf[len] = 0;
8101 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008102 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008103 }
8104 return(buf);
8105}
8106
8107/**
8108 * xmlParseEncodingDecl:
8109 * @ctxt: an XML parser context
8110 *
8111 * parse the XML encoding declaration
8112 *
8113 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8114 *
8115 * this setups the conversion filters.
8116 *
8117 * Returns the encoding value or NULL
8118 */
8119
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008120const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008121xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8122 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008123
8124 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008125 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008126 SKIP(8);
8127 SKIP_BLANKS;
8128 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008129 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008130 return(NULL);
8131 }
8132 NEXT;
8133 SKIP_BLANKS;
8134 if (RAW == '"') {
8135 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008136 encoding = xmlParseEncName(ctxt);
8137 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008138 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008139 } else
8140 NEXT;
8141 } else if (RAW == '\''){
8142 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008143 encoding = xmlParseEncName(ctxt);
8144 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008145 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008146 } else
8147 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008148 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008149 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008150 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008151 /*
8152 * UTF-16 encoding stwich has already taken place at this stage,
8153 * more over the little-endian/big-endian selection is already done
8154 */
8155 if ((encoding != NULL) &&
8156 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8157 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008158 if (ctxt->encoding != NULL)
8159 xmlFree((xmlChar *) ctxt->encoding);
8160 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008161 }
8162 /*
8163 * UTF-8 encoding is handled natively
8164 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008165 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008166 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8167 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008168 if (ctxt->encoding != NULL)
8169 xmlFree((xmlChar *) ctxt->encoding);
8170 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008171 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008172 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008173 xmlCharEncodingHandlerPtr handler;
8174
8175 if (ctxt->input->encoding != NULL)
8176 xmlFree((xmlChar *) ctxt->input->encoding);
8177 ctxt->input->encoding = encoding;
8178
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008179 handler = xmlFindCharEncodingHandler((const char *) encoding);
8180 if (handler != NULL) {
8181 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008182 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008183 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008184 "Unsupported encoding %s\n", encoding);
8185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008186 }
8187 }
8188 }
8189 return(encoding);
8190}
8191
8192/**
8193 * xmlParseSDDecl:
8194 * @ctxt: an XML parser context
8195 *
8196 * parse the XML standalone declaration
8197 *
8198 * [32] SDDecl ::= S 'standalone' Eq
8199 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8200 *
8201 * [ VC: Standalone Document Declaration ]
8202 * TODO The standalone document declaration must have the value "no"
8203 * if any external markup declarations contain declarations of:
8204 * - attributes with default values, if elements to which these
8205 * attributes apply appear in the document without specifications
8206 * of values for these attributes, or
8207 * - entities (other than amp, lt, gt, apos, quot), if references
8208 * to those entities appear in the document, or
8209 * - attributes with values subject to normalization, where the
8210 * attribute appears in the document with a value which will change
8211 * as a result of normalization, or
8212 * - element types with element content, if white space occurs directly
8213 * within any instance of those types.
8214 *
8215 * Returns 1 if standalone, 0 otherwise
8216 */
8217
8218int
8219xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8220 int standalone = -1;
8221
8222 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008223 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008224 SKIP(10);
8225 SKIP_BLANKS;
8226 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008227 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008228 return(standalone);
8229 }
8230 NEXT;
8231 SKIP_BLANKS;
8232 if (RAW == '\''){
8233 NEXT;
8234 if ((RAW == 'n') && (NXT(1) == 'o')) {
8235 standalone = 0;
8236 SKIP(2);
8237 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8238 (NXT(2) == 's')) {
8239 standalone = 1;
8240 SKIP(3);
8241 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008242 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008243 }
8244 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008245 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008246 } else
8247 NEXT;
8248 } else if (RAW == '"'){
8249 NEXT;
8250 if ((RAW == 'n') && (NXT(1) == 'o')) {
8251 standalone = 0;
8252 SKIP(2);
8253 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8254 (NXT(2) == 's')) {
8255 standalone = 1;
8256 SKIP(3);
8257 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008258 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008259 }
8260 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008261 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008262 } else
8263 NEXT;
8264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008266 }
8267 }
8268 return(standalone);
8269}
8270
8271/**
8272 * xmlParseXMLDecl:
8273 * @ctxt: an XML parser context
8274 *
8275 * parse an XML declaration header
8276 *
8277 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8278 */
8279
8280void
8281xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8282 xmlChar *version;
8283
8284 /*
8285 * We know that '<?xml' is here.
8286 */
8287 SKIP(5);
8288
William M. Brack76e95df2003-10-18 16:20:14 +00008289 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8291 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008292 }
8293 SKIP_BLANKS;
8294
8295 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008296 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008297 */
8298 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008299 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008300 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008301 } else {
8302 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8303 /*
8304 * TODO: Blueberry should be detected here
8305 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008306 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8307 "Unsupported version '%s'\n",
8308 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008309 }
8310 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008311 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008312 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008313 }
Owen Taylor3473f882001-02-23 17:55:21 +00008314
8315 /*
8316 * We may have the encoding declaration
8317 */
William M. Brack76e95df2003-10-18 16:20:14 +00008318 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008319 if ((RAW == '?') && (NXT(1) == '>')) {
8320 SKIP(2);
8321 return;
8322 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008324 }
8325 xmlParseEncodingDecl(ctxt);
8326 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8327 /*
8328 * The XML REC instructs us to stop parsing right here
8329 */
8330 return;
8331 }
8332
8333 /*
8334 * We may have the standalone status.
8335 */
William M. Brack76e95df2003-10-18 16:20:14 +00008336 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008337 if ((RAW == '?') && (NXT(1) == '>')) {
8338 SKIP(2);
8339 return;
8340 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008342 }
8343 SKIP_BLANKS;
8344 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8345
8346 SKIP_BLANKS;
8347 if ((RAW == '?') && (NXT(1) == '>')) {
8348 SKIP(2);
8349 } else if (RAW == '>') {
8350 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008351 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008352 NEXT;
8353 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008354 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008355 MOVETO_ENDTAG(CUR_PTR);
8356 NEXT;
8357 }
8358}
8359
8360/**
8361 * xmlParseMisc:
8362 * @ctxt: an XML parser context
8363 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008364 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008365 *
8366 * [27] Misc ::= Comment | PI | S
8367 */
8368
8369void
8370xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008371 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008372 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008373 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008374 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008375 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008376 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008377 NEXT;
8378 } else
8379 xmlParseComment(ctxt);
8380 }
8381}
8382
8383/**
8384 * xmlParseDocument:
8385 * @ctxt: an XML parser context
8386 *
8387 * parse an XML document (and build a tree if using the standard SAX
8388 * interface).
8389 *
8390 * [1] document ::= prolog element Misc*
8391 *
8392 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8393 *
8394 * Returns 0, -1 in case of error. the parser context is augmented
8395 * as a result of the parsing.
8396 */
8397
8398int
8399xmlParseDocument(xmlParserCtxtPtr ctxt) {
8400 xmlChar start[4];
8401 xmlCharEncoding enc;
8402
8403 xmlInitParser();
8404
8405 GROW;
8406
8407 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008408 * SAX: detecting the level.
8409 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008410 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008411
8412 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008413 * SAX: beginning of the document processing.
8414 */
8415 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8416 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8417
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008418 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8419 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008420 /*
8421 * Get the 4 first bytes and decode the charset
8422 * if enc != XML_CHAR_ENCODING_NONE
8423 * plug some encoding conversion routines.
8424 */
8425 start[0] = RAW;
8426 start[1] = NXT(1);
8427 start[2] = NXT(2);
8428 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008429 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008430 if (enc != XML_CHAR_ENCODING_NONE) {
8431 xmlSwitchEncoding(ctxt, enc);
8432 }
Owen Taylor3473f882001-02-23 17:55:21 +00008433 }
8434
8435
8436 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008437 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008438 }
8439
8440 /*
8441 * Check for the XMLDecl in the Prolog.
8442 */
8443 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008444 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008445
8446 /*
8447 * Note that we will switch encoding on the fly.
8448 */
8449 xmlParseXMLDecl(ctxt);
8450 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8451 /*
8452 * The XML REC instructs us to stop parsing right here
8453 */
8454 return(-1);
8455 }
8456 ctxt->standalone = ctxt->input->standalone;
8457 SKIP_BLANKS;
8458 } else {
8459 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8460 }
8461 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8462 ctxt->sax->startDocument(ctxt->userData);
8463
8464 /*
8465 * The Misc part of the Prolog
8466 */
8467 GROW;
8468 xmlParseMisc(ctxt);
8469
8470 /*
8471 * Then possibly doc type declaration(s) and more Misc
8472 * (doctypedecl Misc*)?
8473 */
8474 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008475 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008476
8477 ctxt->inSubset = 1;
8478 xmlParseDocTypeDecl(ctxt);
8479 if (RAW == '[') {
8480 ctxt->instate = XML_PARSER_DTD;
8481 xmlParseInternalSubset(ctxt);
8482 }
8483
8484 /*
8485 * Create and update the external subset.
8486 */
8487 ctxt->inSubset = 2;
8488 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8489 (!ctxt->disableSAX))
8490 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8491 ctxt->extSubSystem, ctxt->extSubURI);
8492 ctxt->inSubset = 0;
8493
8494
8495 ctxt->instate = XML_PARSER_PROLOG;
8496 xmlParseMisc(ctxt);
8497 }
8498
8499 /*
8500 * Time to start parsing the tree itself
8501 */
8502 GROW;
8503 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008504 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8505 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008506 } else {
8507 ctxt->instate = XML_PARSER_CONTENT;
8508 xmlParseElement(ctxt);
8509 ctxt->instate = XML_PARSER_EPILOG;
8510
8511
8512 /*
8513 * The Misc part at the end
8514 */
8515 xmlParseMisc(ctxt);
8516
Daniel Veillard561b7f82002-03-20 21:55:57 +00008517 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008518 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008519 }
8520 ctxt->instate = XML_PARSER_EOF;
8521 }
8522
8523 /*
8524 * SAX: end of the document processing.
8525 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008526 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008527 ctxt->sax->endDocument(ctxt->userData);
8528
Daniel Veillard5997aca2002-03-18 18:36:20 +00008529 /*
8530 * Remove locally kept entity definitions if the tree was not built
8531 */
8532 if ((ctxt->myDoc != NULL) &&
8533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8534 xmlFreeDoc(ctxt->myDoc);
8535 ctxt->myDoc = NULL;
8536 }
8537
Daniel Veillardc7612992002-02-17 22:47:37 +00008538 if (! ctxt->wellFormed) {
8539 ctxt->valid = 0;
8540 return(-1);
8541 }
Owen Taylor3473f882001-02-23 17:55:21 +00008542 return(0);
8543}
8544
8545/**
8546 * xmlParseExtParsedEnt:
8547 * @ctxt: an XML parser context
8548 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008549 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008550 * An external general parsed entity is well-formed if it matches the
8551 * production labeled extParsedEnt.
8552 *
8553 * [78] extParsedEnt ::= TextDecl? content
8554 *
8555 * Returns 0, -1 in case of error. the parser context is augmented
8556 * as a result of the parsing.
8557 */
8558
8559int
8560xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8561 xmlChar start[4];
8562 xmlCharEncoding enc;
8563
8564 xmlDefaultSAXHandlerInit();
8565
Daniel Veillard309f81d2003-09-23 09:02:53 +00008566 xmlDetectSAX2(ctxt);
8567
Owen Taylor3473f882001-02-23 17:55:21 +00008568 GROW;
8569
8570 /*
8571 * SAX: beginning of the document processing.
8572 */
8573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8574 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8575
8576 /*
8577 * Get the 4 first bytes and decode the charset
8578 * if enc != XML_CHAR_ENCODING_NONE
8579 * plug some encoding conversion routines.
8580 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008581 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8582 start[0] = RAW;
8583 start[1] = NXT(1);
8584 start[2] = NXT(2);
8585 start[3] = NXT(3);
8586 enc = xmlDetectCharEncoding(start, 4);
8587 if (enc != XML_CHAR_ENCODING_NONE) {
8588 xmlSwitchEncoding(ctxt, enc);
8589 }
Owen Taylor3473f882001-02-23 17:55:21 +00008590 }
8591
8592
8593 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008594 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008595 }
8596
8597 /*
8598 * Check for the XMLDecl in the Prolog.
8599 */
8600 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008601 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008602
8603 /*
8604 * Note that we will switch encoding on the fly.
8605 */
8606 xmlParseXMLDecl(ctxt);
8607 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8608 /*
8609 * The XML REC instructs us to stop parsing right here
8610 */
8611 return(-1);
8612 }
8613 SKIP_BLANKS;
8614 } else {
8615 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8616 }
8617 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8618 ctxt->sax->startDocument(ctxt->userData);
8619
8620 /*
8621 * Doing validity checking on chunk doesn't make sense
8622 */
8623 ctxt->instate = XML_PARSER_CONTENT;
8624 ctxt->validate = 0;
8625 ctxt->loadsubset = 0;
8626 ctxt->depth = 0;
8627
8628 xmlParseContent(ctxt);
8629
8630 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008631 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008632 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008633 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 }
8635
8636 /*
8637 * SAX: end of the document processing.
8638 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008639 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008640 ctxt->sax->endDocument(ctxt->userData);
8641
8642 if (! ctxt->wellFormed) return(-1);
8643 return(0);
8644}
8645
Daniel Veillard73b013f2003-09-30 12:36:01 +00008646#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008647/************************************************************************
8648 * *
8649 * Progressive parsing interfaces *
8650 * *
8651 ************************************************************************/
8652
8653/**
8654 * xmlParseLookupSequence:
8655 * @ctxt: an XML parser context
8656 * @first: the first char to lookup
8657 * @next: the next char to lookup or zero
8658 * @third: the next char to lookup or zero
8659 *
8660 * Try to find if a sequence (first, next, third) or just (first next) or
8661 * (first) is available in the input stream.
8662 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8663 * to avoid rescanning sequences of bytes, it DOES change the state of the
8664 * parser, do not use liberally.
8665 *
8666 * Returns the index to the current parsing point if the full sequence
8667 * is available, -1 otherwise.
8668 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008669static int
Owen Taylor3473f882001-02-23 17:55:21 +00008670xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8671 xmlChar next, xmlChar third) {
8672 int base, len;
8673 xmlParserInputPtr in;
8674 const xmlChar *buf;
8675
8676 in = ctxt->input;
8677 if (in == NULL) return(-1);
8678 base = in->cur - in->base;
8679 if (base < 0) return(-1);
8680 if (ctxt->checkIndex > base)
8681 base = ctxt->checkIndex;
8682 if (in->buf == NULL) {
8683 buf = in->base;
8684 len = in->length;
8685 } else {
8686 buf = in->buf->buffer->content;
8687 len = in->buf->buffer->use;
8688 }
8689 /* take into account the sequence length */
8690 if (third) len -= 2;
8691 else if (next) len --;
8692 for (;base < len;base++) {
8693 if (buf[base] == first) {
8694 if (third != 0) {
8695 if ((buf[base + 1] != next) ||
8696 (buf[base + 2] != third)) continue;
8697 } else if (next != 0) {
8698 if (buf[base + 1] != next) continue;
8699 }
8700 ctxt->checkIndex = 0;
8701#ifdef DEBUG_PUSH
8702 if (next == 0)
8703 xmlGenericError(xmlGenericErrorContext,
8704 "PP: lookup '%c' found at %d\n",
8705 first, base);
8706 else if (third == 0)
8707 xmlGenericError(xmlGenericErrorContext,
8708 "PP: lookup '%c%c' found at %d\n",
8709 first, next, base);
8710 else
8711 xmlGenericError(xmlGenericErrorContext,
8712 "PP: lookup '%c%c%c' found at %d\n",
8713 first, next, third, base);
8714#endif
8715 return(base - (in->cur - in->base));
8716 }
8717 }
8718 ctxt->checkIndex = base;
8719#ifdef DEBUG_PUSH
8720 if (next == 0)
8721 xmlGenericError(xmlGenericErrorContext,
8722 "PP: lookup '%c' failed\n", first);
8723 else if (third == 0)
8724 xmlGenericError(xmlGenericErrorContext,
8725 "PP: lookup '%c%c' failed\n", first, next);
8726 else
8727 xmlGenericError(xmlGenericErrorContext,
8728 "PP: lookup '%c%c%c' failed\n", first, next, third);
8729#endif
8730 return(-1);
8731}
8732
8733/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008734 * xmlParseGetLasts:
8735 * @ctxt: an XML parser context
8736 * @lastlt: pointer to store the last '<' from the input
8737 * @lastgt: pointer to store the last '>' from the input
8738 *
8739 * Lookup the last < and > in the current chunk
8740 */
8741static void
8742xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8743 const xmlChar **lastgt) {
8744 const xmlChar *tmp;
8745
8746 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8747 xmlGenericError(xmlGenericErrorContext,
8748 "Internal error: xmlParseGetLasts\n");
8749 return;
8750 }
8751 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8752 tmp = ctxt->input->end;
8753 tmp--;
8754 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8755 (*tmp != '>')) tmp--;
8756 if (tmp < ctxt->input->base) {
8757 *lastlt = NULL;
8758 *lastgt = NULL;
8759 } else if (*tmp == '<') {
8760 *lastlt = tmp;
8761 tmp--;
8762 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8763 if (tmp < ctxt->input->base)
8764 *lastgt = NULL;
8765 else
8766 *lastgt = tmp;
8767 } else {
8768 *lastgt = tmp;
8769 tmp--;
8770 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8771 if (tmp < ctxt->input->base)
8772 *lastlt = NULL;
8773 else
8774 *lastlt = tmp;
8775 }
8776
8777 } else {
8778 *lastlt = NULL;
8779 *lastgt = NULL;
8780 }
8781}
8782/**
Owen Taylor3473f882001-02-23 17:55:21 +00008783 * xmlParseTryOrFinish:
8784 * @ctxt: an XML parser context
8785 * @terminate: last chunk indicator
8786 *
8787 * Try to progress on parsing
8788 *
8789 * Returns zero if no parsing was possible
8790 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008791static int
Owen Taylor3473f882001-02-23 17:55:21 +00008792xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8793 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008794 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008795 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008796 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008797
8798#ifdef DEBUG_PUSH
8799 switch (ctxt->instate) {
8800 case XML_PARSER_EOF:
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: try EOF\n"); break;
8803 case XML_PARSER_START:
8804 xmlGenericError(xmlGenericErrorContext,
8805 "PP: try START\n"); break;
8806 case XML_PARSER_MISC:
8807 xmlGenericError(xmlGenericErrorContext,
8808 "PP: try MISC\n");break;
8809 case XML_PARSER_COMMENT:
8810 xmlGenericError(xmlGenericErrorContext,
8811 "PP: try COMMENT\n");break;
8812 case XML_PARSER_PROLOG:
8813 xmlGenericError(xmlGenericErrorContext,
8814 "PP: try PROLOG\n");break;
8815 case XML_PARSER_START_TAG:
8816 xmlGenericError(xmlGenericErrorContext,
8817 "PP: try START_TAG\n");break;
8818 case XML_PARSER_CONTENT:
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: try CONTENT\n");break;
8821 case XML_PARSER_CDATA_SECTION:
8822 xmlGenericError(xmlGenericErrorContext,
8823 "PP: try CDATA_SECTION\n");break;
8824 case XML_PARSER_END_TAG:
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: try END_TAG\n");break;
8827 case XML_PARSER_ENTITY_DECL:
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: try ENTITY_DECL\n");break;
8830 case XML_PARSER_ENTITY_VALUE:
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: try ENTITY_VALUE\n");break;
8833 case XML_PARSER_ATTRIBUTE_VALUE:
8834 xmlGenericError(xmlGenericErrorContext,
8835 "PP: try ATTRIBUTE_VALUE\n");break;
8836 case XML_PARSER_DTD:
8837 xmlGenericError(xmlGenericErrorContext,
8838 "PP: try DTD\n");break;
8839 case XML_PARSER_EPILOG:
8840 xmlGenericError(xmlGenericErrorContext,
8841 "PP: try EPILOG\n");break;
8842 case XML_PARSER_PI:
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: try PI\n");break;
8845 case XML_PARSER_IGNORE:
8846 xmlGenericError(xmlGenericErrorContext,
8847 "PP: try IGNORE\n");break;
8848 }
8849#endif
8850
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008851 if ((ctxt->input != NULL) &&
8852 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008853 xmlSHRINK(ctxt);
8854 ctxt->checkIndex = 0;
8855 }
8856 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008857
Daniel Veillarda880b122003-04-21 21:36:41 +00008858 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008859 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8860 return(0);
8861
8862
Owen Taylor3473f882001-02-23 17:55:21 +00008863 /*
8864 * Pop-up of finished entities.
8865 */
8866 while ((RAW == 0) && (ctxt->inputNr > 1))
8867 xmlPopInput(ctxt);
8868
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008869 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008870 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008871 avail = ctxt->input->length -
8872 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008873 else {
8874 /*
8875 * If we are operating on converted input, try to flush
8876 * remainng chars to avoid them stalling in the non-converted
8877 * buffer.
8878 */
8879 if ((ctxt->input->buf->raw != NULL) &&
8880 (ctxt->input->buf->raw->use > 0)) {
8881 int base = ctxt->input->base -
8882 ctxt->input->buf->buffer->content;
8883 int current = ctxt->input->cur - ctxt->input->base;
8884
8885 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8886 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8887 ctxt->input->cur = ctxt->input->base + current;
8888 ctxt->input->end =
8889 &ctxt->input->buf->buffer->content[
8890 ctxt->input->buf->buffer->use];
8891 }
8892 avail = ctxt->input->buf->buffer->use -
8893 (ctxt->input->cur - ctxt->input->base);
8894 }
Owen Taylor3473f882001-02-23 17:55:21 +00008895 if (avail < 1)
8896 goto done;
8897 switch (ctxt->instate) {
8898 case XML_PARSER_EOF:
8899 /*
8900 * Document parsing is done !
8901 */
8902 goto done;
8903 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008904 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8905 xmlChar start[4];
8906 xmlCharEncoding enc;
8907
8908 /*
8909 * Very first chars read from the document flow.
8910 */
8911 if (avail < 4)
8912 goto done;
8913
8914 /*
8915 * Get the 4 first bytes and decode the charset
8916 * if enc != XML_CHAR_ENCODING_NONE
8917 * plug some encoding conversion routines.
8918 */
8919 start[0] = RAW;
8920 start[1] = NXT(1);
8921 start[2] = NXT(2);
8922 start[3] = NXT(3);
8923 enc = xmlDetectCharEncoding(start, 4);
8924 if (enc != XML_CHAR_ENCODING_NONE) {
8925 xmlSwitchEncoding(ctxt, enc);
8926 }
8927 break;
8928 }
Owen Taylor3473f882001-02-23 17:55:21 +00008929
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008930 if (avail < 2)
8931 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008932 cur = ctxt->input->cur[0];
8933 next = ctxt->input->cur[1];
8934 if (cur == 0) {
8935 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8936 ctxt->sax->setDocumentLocator(ctxt->userData,
8937 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008938 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008939 ctxt->instate = XML_PARSER_EOF;
8940#ifdef DEBUG_PUSH
8941 xmlGenericError(xmlGenericErrorContext,
8942 "PP: entering EOF\n");
8943#endif
8944 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8945 ctxt->sax->endDocument(ctxt->userData);
8946 goto done;
8947 }
8948 if ((cur == '<') && (next == '?')) {
8949 /* PI or XML decl */
8950 if (avail < 5) return(ret);
8951 if ((!terminate) &&
8952 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8953 return(ret);
8954 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8955 ctxt->sax->setDocumentLocator(ctxt->userData,
8956 &xmlDefaultSAXLocator);
8957 if ((ctxt->input->cur[2] == 'x') &&
8958 (ctxt->input->cur[3] == 'm') &&
8959 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008960 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008961 ret += 5;
8962#ifdef DEBUG_PUSH
8963 xmlGenericError(xmlGenericErrorContext,
8964 "PP: Parsing XML Decl\n");
8965#endif
8966 xmlParseXMLDecl(ctxt);
8967 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8968 /*
8969 * The XML REC instructs us to stop parsing right
8970 * here
8971 */
8972 ctxt->instate = XML_PARSER_EOF;
8973 return(0);
8974 }
8975 ctxt->standalone = ctxt->input->standalone;
8976 if ((ctxt->encoding == NULL) &&
8977 (ctxt->input->encoding != NULL))
8978 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8979 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8980 (!ctxt->disableSAX))
8981 ctxt->sax->startDocument(ctxt->userData);
8982 ctxt->instate = XML_PARSER_MISC;
8983#ifdef DEBUG_PUSH
8984 xmlGenericError(xmlGenericErrorContext,
8985 "PP: entering MISC\n");
8986#endif
8987 } else {
8988 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8989 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8990 (!ctxt->disableSAX))
8991 ctxt->sax->startDocument(ctxt->userData);
8992 ctxt->instate = XML_PARSER_MISC;
8993#ifdef DEBUG_PUSH
8994 xmlGenericError(xmlGenericErrorContext,
8995 "PP: entering MISC\n");
8996#endif
8997 }
8998 } else {
8999 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9000 ctxt->sax->setDocumentLocator(ctxt->userData,
9001 &xmlDefaultSAXLocator);
9002 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9003 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9004 (!ctxt->disableSAX))
9005 ctxt->sax->startDocument(ctxt->userData);
9006 ctxt->instate = XML_PARSER_MISC;
9007#ifdef DEBUG_PUSH
9008 xmlGenericError(xmlGenericErrorContext,
9009 "PP: entering MISC\n");
9010#endif
9011 }
9012 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009013 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009014 const xmlChar *name;
9015 const xmlChar *prefix;
9016 const xmlChar *URI;
9017 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009018
9019 if ((avail < 2) && (ctxt->inputNr == 1))
9020 goto done;
9021 cur = ctxt->input->cur[0];
9022 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009023 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009024 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009025 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9026 ctxt->sax->endDocument(ctxt->userData);
9027 goto done;
9028 }
9029 if (!terminate) {
9030 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009031 /* > can be found unescaped in attribute values */
9032 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009033 goto done;
9034 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9035 goto done;
9036 }
9037 }
9038 if (ctxt->spaceNr == 0)
9039 spacePush(ctxt, -1);
9040 else
9041 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009042#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009043 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009044#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009045 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009046#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009047 else
9048 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009049#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009050 if (name == NULL) {
9051 spacePop(ctxt);
9052 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009053 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9054 ctxt->sax->endDocument(ctxt->userData);
9055 goto done;
9056 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009057#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009058 /*
9059 * [ VC: Root Element Type ]
9060 * The Name in the document type declaration must match
9061 * the element type of the root element.
9062 */
9063 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9064 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9065 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009066#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009067
9068 /*
9069 * Check for an Empty Element.
9070 */
9071 if ((RAW == '/') && (NXT(1) == '>')) {
9072 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009073
9074 if (ctxt->sax2) {
9075 if ((ctxt->sax != NULL) &&
9076 (ctxt->sax->endElementNs != NULL) &&
9077 (!ctxt->disableSAX))
9078 ctxt->sax->endElementNs(ctxt->userData, name,
9079 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009080#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009081 } else {
9082 if ((ctxt->sax != NULL) &&
9083 (ctxt->sax->endElement != NULL) &&
9084 (!ctxt->disableSAX))
9085 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009086#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009087 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009088 spacePop(ctxt);
9089 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009090 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009091 } else {
9092 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009093 }
9094 break;
9095 }
9096 if (RAW == '>') {
9097 NEXT;
9098 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009099 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009100 "Couldn't find end of Start Tag %s\n",
9101 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009102 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009103 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009104 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009105 if (ctxt->sax2)
9106 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009107#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108 else
9109 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009110#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009111
Daniel Veillarda880b122003-04-21 21:36:41 +00009112 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009113 break;
9114 }
9115 case XML_PARSER_CONTENT: {
9116 const xmlChar *test;
9117 unsigned int cons;
9118 if ((avail < 2) && (ctxt->inputNr == 1))
9119 goto done;
9120 cur = ctxt->input->cur[0];
9121 next = ctxt->input->cur[1];
9122
9123 test = CUR_PTR;
9124 cons = ctxt->input->consumed;
9125 if ((cur == '<') && (next == '/')) {
9126 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009127 break;
9128 } else if ((cur == '<') && (next == '?')) {
9129 if ((!terminate) &&
9130 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9131 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 xmlParsePI(ctxt);
9133 } else if ((cur == '<') && (next != '!')) {
9134 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009135 break;
9136 } else if ((cur == '<') && (next == '!') &&
9137 (ctxt->input->cur[2] == '-') &&
9138 (ctxt->input->cur[3] == '-')) {
9139 if ((!terminate) &&
9140 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9141 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009142 xmlParseComment(ctxt);
9143 ctxt->instate = XML_PARSER_CONTENT;
9144 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9145 (ctxt->input->cur[2] == '[') &&
9146 (ctxt->input->cur[3] == 'C') &&
9147 (ctxt->input->cur[4] == 'D') &&
9148 (ctxt->input->cur[5] == 'A') &&
9149 (ctxt->input->cur[6] == 'T') &&
9150 (ctxt->input->cur[7] == 'A') &&
9151 (ctxt->input->cur[8] == '[')) {
9152 SKIP(9);
9153 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009154 break;
9155 } else if ((cur == '<') && (next == '!') &&
9156 (avail < 9)) {
9157 goto done;
9158 } else if (cur == '&') {
9159 if ((!terminate) &&
9160 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9161 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009162 xmlParseReference(ctxt);
9163 } else {
9164 /* TODO Avoid the extra copy, handle directly !!! */
9165 /*
9166 * Goal of the following test is:
9167 * - minimize calls to the SAX 'character' callback
9168 * when they are mergeable
9169 * - handle an problem for isBlank when we only parse
9170 * a sequence of blank chars and the next one is
9171 * not available to check against '<' presence.
9172 * - tries to homogenize the differences in SAX
9173 * callbacks between the push and pull versions
9174 * of the parser.
9175 */
9176 if ((ctxt->inputNr == 1) &&
9177 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9178 if (!terminate) {
9179 if (ctxt->progressive) {
9180 if ((lastlt == NULL) ||
9181 (ctxt->input->cur > lastlt))
9182 goto done;
9183 } else if (xmlParseLookupSequence(ctxt,
9184 '<', 0, 0) < 0) {
9185 goto done;
9186 }
9187 }
9188 }
9189 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009190 xmlParseCharData(ctxt, 0);
9191 }
9192 /*
9193 * Pop-up of finished entities.
9194 */
9195 while ((RAW == 0) && (ctxt->inputNr > 1))
9196 xmlPopInput(ctxt);
9197 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009198 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9199 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009200 ctxt->instate = XML_PARSER_EOF;
9201 break;
9202 }
9203 break;
9204 }
9205 case XML_PARSER_END_TAG:
9206 if (avail < 2)
9207 goto done;
9208 if (!terminate) {
9209 if (ctxt->progressive) {
9210 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9211 goto done;
9212 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9213 goto done;
9214 }
9215 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009216 if (ctxt->sax2) {
9217 xmlParseEndTag2(ctxt,
9218 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9219 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009220 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009221 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009222 }
9223#ifdef LIBXML_SAX1_ENABLED
9224 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009225 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009226#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009227 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009228 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 } else {
9230 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009231 }
9232 break;
9233 case XML_PARSER_CDATA_SECTION: {
9234 /*
9235 * The Push mode need to have the SAX callback for
9236 * cdataBlock merge back contiguous callbacks.
9237 */
9238 int base;
9239
9240 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9241 if (base < 0) {
9242 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9243 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9244 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009245 ctxt->sax->cdataBlock(ctxt->userData,
9246 ctxt->input->cur,
9247 XML_PARSER_BIG_BUFFER_SIZE);
9248 else if (ctxt->sax->characters != NULL)
9249 ctxt->sax->characters(ctxt->userData,
9250 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 XML_PARSER_BIG_BUFFER_SIZE);
9252 }
9253 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9254 ctxt->checkIndex = 0;
9255 }
9256 goto done;
9257 } else {
9258 if ((ctxt->sax != NULL) && (base > 0) &&
9259 (!ctxt->disableSAX)) {
9260 if (ctxt->sax->cdataBlock != NULL)
9261 ctxt->sax->cdataBlock(ctxt->userData,
9262 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009263 else if (ctxt->sax->characters != NULL)
9264 ctxt->sax->characters(ctxt->userData,
9265 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009266 }
9267 SKIP(base + 3);
9268 ctxt->checkIndex = 0;
9269 ctxt->instate = XML_PARSER_CONTENT;
9270#ifdef DEBUG_PUSH
9271 xmlGenericError(xmlGenericErrorContext,
9272 "PP: entering CONTENT\n");
9273#endif
9274 }
9275 break;
9276 }
Owen Taylor3473f882001-02-23 17:55:21 +00009277 case XML_PARSER_MISC:
9278 SKIP_BLANKS;
9279 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009280 avail = ctxt->input->length -
9281 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009282 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009283 avail = ctxt->input->buf->buffer->use -
9284 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009285 if (avail < 2)
9286 goto done;
9287 cur = ctxt->input->cur[0];
9288 next = ctxt->input->cur[1];
9289 if ((cur == '<') && (next == '?')) {
9290 if ((!terminate) &&
9291 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9292 goto done;
9293#ifdef DEBUG_PUSH
9294 xmlGenericError(xmlGenericErrorContext,
9295 "PP: Parsing PI\n");
9296#endif
9297 xmlParsePI(ctxt);
9298 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009299 (ctxt->input->cur[2] == '-') &&
9300 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009301 if ((!terminate) &&
9302 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9303 goto done;
9304#ifdef DEBUG_PUSH
9305 xmlGenericError(xmlGenericErrorContext,
9306 "PP: Parsing Comment\n");
9307#endif
9308 xmlParseComment(ctxt);
9309 ctxt->instate = XML_PARSER_MISC;
9310 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 (ctxt->input->cur[2] == 'D') &&
9312 (ctxt->input->cur[3] == 'O') &&
9313 (ctxt->input->cur[4] == 'C') &&
9314 (ctxt->input->cur[5] == 'T') &&
9315 (ctxt->input->cur[6] == 'Y') &&
9316 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009317 (ctxt->input->cur[8] == 'E')) {
9318 if ((!terminate) &&
9319 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9320 goto done;
9321#ifdef DEBUG_PUSH
9322 xmlGenericError(xmlGenericErrorContext,
9323 "PP: Parsing internal subset\n");
9324#endif
9325 ctxt->inSubset = 1;
9326 xmlParseDocTypeDecl(ctxt);
9327 if (RAW == '[') {
9328 ctxt->instate = XML_PARSER_DTD;
9329#ifdef DEBUG_PUSH
9330 xmlGenericError(xmlGenericErrorContext,
9331 "PP: entering DTD\n");
9332#endif
9333 } else {
9334 /*
9335 * Create and update the external subset.
9336 */
9337 ctxt->inSubset = 2;
9338 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9339 (ctxt->sax->externalSubset != NULL))
9340 ctxt->sax->externalSubset(ctxt->userData,
9341 ctxt->intSubName, ctxt->extSubSystem,
9342 ctxt->extSubURI);
9343 ctxt->inSubset = 0;
9344 ctxt->instate = XML_PARSER_PROLOG;
9345#ifdef DEBUG_PUSH
9346 xmlGenericError(xmlGenericErrorContext,
9347 "PP: entering PROLOG\n");
9348#endif
9349 }
9350 } else if ((cur == '<') && (next == '!') &&
9351 (avail < 9)) {
9352 goto done;
9353 } else {
9354 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009355 ctxt->progressive = 1;
9356 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009357#ifdef DEBUG_PUSH
9358 xmlGenericError(xmlGenericErrorContext,
9359 "PP: entering START_TAG\n");
9360#endif
9361 }
9362 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009363 case XML_PARSER_PROLOG:
9364 SKIP_BLANKS;
9365 if (ctxt->input->buf == NULL)
9366 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9367 else
9368 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9369 if (avail < 2)
9370 goto done;
9371 cur = ctxt->input->cur[0];
9372 next = ctxt->input->cur[1];
9373 if ((cur == '<') && (next == '?')) {
9374 if ((!terminate) &&
9375 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9376 goto done;
9377#ifdef DEBUG_PUSH
9378 xmlGenericError(xmlGenericErrorContext,
9379 "PP: Parsing PI\n");
9380#endif
9381 xmlParsePI(ctxt);
9382 } else if ((cur == '<') && (next == '!') &&
9383 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9384 if ((!terminate) &&
9385 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9386 goto done;
9387#ifdef DEBUG_PUSH
9388 xmlGenericError(xmlGenericErrorContext,
9389 "PP: Parsing Comment\n");
9390#endif
9391 xmlParseComment(ctxt);
9392 ctxt->instate = XML_PARSER_PROLOG;
9393 } else if ((cur == '<') && (next == '!') &&
9394 (avail < 4)) {
9395 goto done;
9396 } else {
9397 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009398 ctxt->progressive = 1;
9399 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009400#ifdef DEBUG_PUSH
9401 xmlGenericError(xmlGenericErrorContext,
9402 "PP: entering START_TAG\n");
9403#endif
9404 }
9405 break;
9406 case XML_PARSER_EPILOG:
9407 SKIP_BLANKS;
9408 if (ctxt->input->buf == NULL)
9409 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9410 else
9411 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9412 if (avail < 2)
9413 goto done;
9414 cur = ctxt->input->cur[0];
9415 next = ctxt->input->cur[1];
9416 if ((cur == '<') && (next == '?')) {
9417 if ((!terminate) &&
9418 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9419 goto done;
9420#ifdef DEBUG_PUSH
9421 xmlGenericError(xmlGenericErrorContext,
9422 "PP: Parsing PI\n");
9423#endif
9424 xmlParsePI(ctxt);
9425 ctxt->instate = XML_PARSER_EPILOG;
9426 } else if ((cur == '<') && (next == '!') &&
9427 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9428 if ((!terminate) &&
9429 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9430 goto done;
9431#ifdef DEBUG_PUSH
9432 xmlGenericError(xmlGenericErrorContext,
9433 "PP: Parsing Comment\n");
9434#endif
9435 xmlParseComment(ctxt);
9436 ctxt->instate = XML_PARSER_EPILOG;
9437 } else if ((cur == '<') && (next == '!') &&
9438 (avail < 4)) {
9439 goto done;
9440 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009441 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009442 ctxt->instate = XML_PARSER_EOF;
9443#ifdef DEBUG_PUSH
9444 xmlGenericError(xmlGenericErrorContext,
9445 "PP: entering EOF\n");
9446#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009447 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009448 ctxt->sax->endDocument(ctxt->userData);
9449 goto done;
9450 }
9451 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009452 case XML_PARSER_DTD: {
9453 /*
9454 * Sorry but progressive parsing of the internal subset
9455 * is not expected to be supported. We first check that
9456 * the full content of the internal subset is available and
9457 * the parsing is launched only at that point.
9458 * Internal subset ends up with "']' S? '>'" in an unescaped
9459 * section and not in a ']]>' sequence which are conditional
9460 * sections (whoever argued to keep that crap in XML deserve
9461 * a place in hell !).
9462 */
9463 int base, i;
9464 xmlChar *buf;
9465 xmlChar quote = 0;
9466
9467 base = ctxt->input->cur - ctxt->input->base;
9468 if (base < 0) return(0);
9469 if (ctxt->checkIndex > base)
9470 base = ctxt->checkIndex;
9471 buf = ctxt->input->buf->buffer->content;
9472 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9473 base++) {
9474 if (quote != 0) {
9475 if (buf[base] == quote)
9476 quote = 0;
9477 continue;
9478 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009479 if ((quote == 0) && (buf[base] == '<')) {
9480 int found = 0;
9481 /* special handling of comments */
9482 if (((unsigned int) base + 4 <
9483 ctxt->input->buf->buffer->use) &&
9484 (buf[base + 1] == '!') &&
9485 (buf[base + 2] == '-') &&
9486 (buf[base + 3] == '-')) {
9487 for (;(unsigned int) base + 3 <
9488 ctxt->input->buf->buffer->use; base++) {
9489 if ((buf[base] == '-') &&
9490 (buf[base + 1] == '-') &&
9491 (buf[base + 2] == '>')) {
9492 found = 1;
9493 base += 2;
9494 break;
9495 }
9496 }
9497 if (!found)
9498 break;
9499 continue;
9500 }
9501 }
Owen Taylor3473f882001-02-23 17:55:21 +00009502 if (buf[base] == '"') {
9503 quote = '"';
9504 continue;
9505 }
9506 if (buf[base] == '\'') {
9507 quote = '\'';
9508 continue;
9509 }
9510 if (buf[base] == ']') {
9511 if ((unsigned int) base +1 >=
9512 ctxt->input->buf->buffer->use)
9513 break;
9514 if (buf[base + 1] == ']') {
9515 /* conditional crap, skip both ']' ! */
9516 base++;
9517 continue;
9518 }
9519 for (i = 0;
9520 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9521 i++) {
9522 if (buf[base + i] == '>')
9523 goto found_end_int_subset;
9524 }
9525 break;
9526 }
9527 }
9528 /*
9529 * We didn't found the end of the Internal subset
9530 */
9531 if (quote == 0)
9532 ctxt->checkIndex = base;
9533#ifdef DEBUG_PUSH
9534 if (next == 0)
9535 xmlGenericError(xmlGenericErrorContext,
9536 "PP: lookup of int subset end filed\n");
9537#endif
9538 goto done;
9539
9540found_end_int_subset:
9541 xmlParseInternalSubset(ctxt);
9542 ctxt->inSubset = 2;
9543 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9544 (ctxt->sax->externalSubset != NULL))
9545 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9546 ctxt->extSubSystem, ctxt->extSubURI);
9547 ctxt->inSubset = 0;
9548 ctxt->instate = XML_PARSER_PROLOG;
9549 ctxt->checkIndex = 0;
9550#ifdef DEBUG_PUSH
9551 xmlGenericError(xmlGenericErrorContext,
9552 "PP: entering PROLOG\n");
9553#endif
9554 break;
9555 }
9556 case XML_PARSER_COMMENT:
9557 xmlGenericError(xmlGenericErrorContext,
9558 "PP: internal error, state == COMMENT\n");
9559 ctxt->instate = XML_PARSER_CONTENT;
9560#ifdef DEBUG_PUSH
9561 xmlGenericError(xmlGenericErrorContext,
9562 "PP: entering CONTENT\n");
9563#endif
9564 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009565 case XML_PARSER_IGNORE:
9566 xmlGenericError(xmlGenericErrorContext,
9567 "PP: internal error, state == IGNORE");
9568 ctxt->instate = XML_PARSER_DTD;
9569#ifdef DEBUG_PUSH
9570 xmlGenericError(xmlGenericErrorContext,
9571 "PP: entering DTD\n");
9572#endif
9573 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009574 case XML_PARSER_PI:
9575 xmlGenericError(xmlGenericErrorContext,
9576 "PP: internal error, state == PI\n");
9577 ctxt->instate = XML_PARSER_CONTENT;
9578#ifdef DEBUG_PUSH
9579 xmlGenericError(xmlGenericErrorContext,
9580 "PP: entering CONTENT\n");
9581#endif
9582 break;
9583 case XML_PARSER_ENTITY_DECL:
9584 xmlGenericError(xmlGenericErrorContext,
9585 "PP: internal error, state == ENTITY_DECL\n");
9586 ctxt->instate = XML_PARSER_DTD;
9587#ifdef DEBUG_PUSH
9588 xmlGenericError(xmlGenericErrorContext,
9589 "PP: entering DTD\n");
9590#endif
9591 break;
9592 case XML_PARSER_ENTITY_VALUE:
9593 xmlGenericError(xmlGenericErrorContext,
9594 "PP: internal error, state == ENTITY_VALUE\n");
9595 ctxt->instate = XML_PARSER_CONTENT;
9596#ifdef DEBUG_PUSH
9597 xmlGenericError(xmlGenericErrorContext,
9598 "PP: entering DTD\n");
9599#endif
9600 break;
9601 case XML_PARSER_ATTRIBUTE_VALUE:
9602 xmlGenericError(xmlGenericErrorContext,
9603 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9604 ctxt->instate = XML_PARSER_START_TAG;
9605#ifdef DEBUG_PUSH
9606 xmlGenericError(xmlGenericErrorContext,
9607 "PP: entering START_TAG\n");
9608#endif
9609 break;
9610 case XML_PARSER_SYSTEM_LITERAL:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: internal error, state == SYSTEM_LITERAL\n");
9613 ctxt->instate = XML_PARSER_START_TAG;
9614#ifdef DEBUG_PUSH
9615 xmlGenericError(xmlGenericErrorContext,
9616 "PP: entering START_TAG\n");
9617#endif
9618 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009619 case XML_PARSER_PUBLIC_LITERAL:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: internal error, state == PUBLIC_LITERAL\n");
9622 ctxt->instate = XML_PARSER_START_TAG;
9623#ifdef DEBUG_PUSH
9624 xmlGenericError(xmlGenericErrorContext,
9625 "PP: entering START_TAG\n");
9626#endif
9627 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009628 }
9629 }
9630done:
9631#ifdef DEBUG_PUSH
9632 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9633#endif
9634 return(ret);
9635}
9636
9637/**
Owen Taylor3473f882001-02-23 17:55:21 +00009638 * xmlParseChunk:
9639 * @ctxt: an XML parser context
9640 * @chunk: an char array
9641 * @size: the size in byte of the chunk
9642 * @terminate: last chunk indicator
9643 *
9644 * Parse a Chunk of memory
9645 *
9646 * Returns zero if no error, the xmlParserErrors otherwise.
9647 */
9648int
9649xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9650 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009651 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9652 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009653 if (ctxt->instate == XML_PARSER_START)
9654 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009655 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9656 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9657 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9658 int cur = ctxt->input->cur - ctxt->input->base;
9659
9660 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9661 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9662 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009663 ctxt->input->end =
9664 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009665#ifdef DEBUG_PUSH
9666 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9667#endif
9668
Owen Taylor3473f882001-02-23 17:55:21 +00009669 } else if (ctxt->instate != XML_PARSER_EOF) {
9670 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9671 xmlParserInputBufferPtr in = ctxt->input->buf;
9672 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9673 (in->raw != NULL)) {
9674 int nbchars;
9675
9676 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9677 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009678 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009679 xmlGenericError(xmlGenericErrorContext,
9680 "xmlParseChunk: encoder error\n");
9681 return(XML_ERR_INVALID_ENCODING);
9682 }
9683 }
9684 }
9685 }
9686 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009687 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9688 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009689 if (terminate) {
9690 /*
9691 * Check for termination
9692 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009693 int avail = 0;
9694 if (ctxt->input->buf == NULL)
9695 avail = ctxt->input->length -
9696 (ctxt->input->cur - ctxt->input->base);
9697 else
9698 avail = ctxt->input->buf->buffer->use -
9699 (ctxt->input->cur - ctxt->input->base);
9700
Owen Taylor3473f882001-02-23 17:55:21 +00009701 if ((ctxt->instate != XML_PARSER_EOF) &&
9702 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009703 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009704 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009705 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009706 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009707 }
Owen Taylor3473f882001-02-23 17:55:21 +00009708 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009709 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009710 ctxt->sax->endDocument(ctxt->userData);
9711 }
9712 ctxt->instate = XML_PARSER_EOF;
9713 }
9714 return((xmlParserErrors) ctxt->errNo);
9715}
9716
9717/************************************************************************
9718 * *
9719 * I/O front end functions to the parser *
9720 * *
9721 ************************************************************************/
9722
9723/**
9724 * xmlStopParser:
9725 * @ctxt: an XML parser context
9726 *
9727 * Blocks further parser processing
9728 */
9729void
9730xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009731 if (ctxt == NULL)
9732 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009733 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009734 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009735 if (ctxt->input != NULL)
9736 ctxt->input->cur = BAD_CAST"";
9737}
9738
9739/**
9740 * xmlCreatePushParserCtxt:
9741 * @sax: a SAX handler
9742 * @user_data: The user data returned on SAX callbacks
9743 * @chunk: a pointer to an array of chars
9744 * @size: number of chars in the array
9745 * @filename: an optional file name or URI
9746 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009747 * Create a parser context for using the XML parser in push mode.
9748 * If @buffer and @size are non-NULL, the data is used to detect
9749 * the encoding. The remaining characters will be parsed so they
9750 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009751 * To allow content encoding detection, @size should be >= 4
9752 * The value of @filename is used for fetching external entities
9753 * and error/warning reports.
9754 *
9755 * Returns the new parser context or NULL
9756 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009757
Owen Taylor3473f882001-02-23 17:55:21 +00009758xmlParserCtxtPtr
9759xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9760 const char *chunk, int size, const char *filename) {
9761 xmlParserCtxtPtr ctxt;
9762 xmlParserInputPtr inputStream;
9763 xmlParserInputBufferPtr buf;
9764 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9765
9766 /*
9767 * plug some encoding conversion routines
9768 */
9769 if ((chunk != NULL) && (size >= 4))
9770 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9771
9772 buf = xmlAllocParserInputBuffer(enc);
9773 if (buf == NULL) return(NULL);
9774
9775 ctxt = xmlNewParserCtxt();
9776 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009777 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009778 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009779 return(NULL);
9780 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009781 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9782 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009784 xmlFreeParserInputBuffer(buf);
9785 xmlFreeParserCtxt(ctxt);
9786 return(NULL);
9787 }
Owen Taylor3473f882001-02-23 17:55:21 +00009788 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009789#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009790 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009791#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009792 xmlFree(ctxt->sax);
9793 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9794 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009795 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009796 xmlFreeParserInputBuffer(buf);
9797 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009798 return(NULL);
9799 }
9800 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9801 if (user_data != NULL)
9802 ctxt->userData = user_data;
9803 }
9804 if (filename == NULL) {
9805 ctxt->directory = NULL;
9806 } else {
9807 ctxt->directory = xmlParserGetDirectory(filename);
9808 }
9809
9810 inputStream = xmlNewInputStream(ctxt);
9811 if (inputStream == NULL) {
9812 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009813 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009814 return(NULL);
9815 }
9816
9817 if (filename == NULL)
9818 inputStream->filename = NULL;
9819 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009820 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009821 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009822 inputStream->buf = buf;
9823 inputStream->base = inputStream->buf->buffer->content;
9824 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009825 inputStream->end =
9826 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009827
9828 inputPush(ctxt, inputStream);
9829
9830 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9831 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009832 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9833 int cur = ctxt->input->cur - ctxt->input->base;
9834
Owen Taylor3473f882001-02-23 17:55:21 +00009835 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009836
9837 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9838 ctxt->input->cur = ctxt->input->base + cur;
9839 ctxt->input->end =
9840 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009841#ifdef DEBUG_PUSH
9842 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9843#endif
9844 }
9845
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009846 if (enc != XML_CHAR_ENCODING_NONE) {
9847 xmlSwitchEncoding(ctxt, enc);
9848 }
9849
Owen Taylor3473f882001-02-23 17:55:21 +00009850 return(ctxt);
9851}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009852#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009853
9854/**
9855 * xmlCreateIOParserCtxt:
9856 * @sax: a SAX handler
9857 * @user_data: The user data returned on SAX callbacks
9858 * @ioread: an I/O read function
9859 * @ioclose: an I/O close function
9860 * @ioctx: an I/O handler
9861 * @enc: the charset encoding if known
9862 *
9863 * Create a parser context for using the XML parser with an existing
9864 * I/O stream
9865 *
9866 * Returns the new parser context or NULL
9867 */
9868xmlParserCtxtPtr
9869xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9870 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9871 void *ioctx, xmlCharEncoding enc) {
9872 xmlParserCtxtPtr ctxt;
9873 xmlParserInputPtr inputStream;
9874 xmlParserInputBufferPtr buf;
9875
9876 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9877 if (buf == NULL) return(NULL);
9878
9879 ctxt = xmlNewParserCtxt();
9880 if (ctxt == NULL) {
9881 xmlFree(buf);
9882 return(NULL);
9883 }
9884 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009885#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009886 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009887#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009888 xmlFree(ctxt->sax);
9889 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9890 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009892 xmlFree(ctxt);
9893 return(NULL);
9894 }
9895 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9896 if (user_data != NULL)
9897 ctxt->userData = user_data;
9898 }
9899
9900 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9901 if (inputStream == NULL) {
9902 xmlFreeParserCtxt(ctxt);
9903 return(NULL);
9904 }
9905 inputPush(ctxt, inputStream);
9906
9907 return(ctxt);
9908}
9909
Daniel Veillard4432df22003-09-28 18:58:27 +00009910#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009911/************************************************************************
9912 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009913 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009914 * *
9915 ************************************************************************/
9916
9917/**
9918 * xmlIOParseDTD:
9919 * @sax: the SAX handler block or NULL
9920 * @input: an Input Buffer
9921 * @enc: the charset encoding if known
9922 *
9923 * Load and parse a DTD
9924 *
9925 * Returns the resulting xmlDtdPtr or NULL in case of error.
9926 * @input will be freed at parsing end.
9927 */
9928
9929xmlDtdPtr
9930xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9931 xmlCharEncoding enc) {
9932 xmlDtdPtr ret = NULL;
9933 xmlParserCtxtPtr ctxt;
9934 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009935 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009936
9937 if (input == NULL)
9938 return(NULL);
9939
9940 ctxt = xmlNewParserCtxt();
9941 if (ctxt == NULL) {
9942 return(NULL);
9943 }
9944
9945 /*
9946 * Set-up the SAX context
9947 */
9948 if (sax != NULL) {
9949 if (ctxt->sax != NULL)
9950 xmlFree(ctxt->sax);
9951 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +00009952 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009953 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009954 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009955
9956 /*
9957 * generate a parser input from the I/O handler
9958 */
9959
Daniel Veillard43caefb2003-12-07 19:32:22 +00009960 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009961 if (pinput == NULL) {
9962 if (sax != NULL) ctxt->sax = NULL;
9963 xmlFreeParserCtxt(ctxt);
9964 return(NULL);
9965 }
9966
9967 /*
9968 * plug some encoding conversion routines here.
9969 */
9970 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009971 if (enc != XML_CHAR_ENCODING_NONE) {
9972 xmlSwitchEncoding(ctxt, enc);
9973 }
Owen Taylor3473f882001-02-23 17:55:21 +00009974
9975 pinput->filename = NULL;
9976 pinput->line = 1;
9977 pinput->col = 1;
9978 pinput->base = ctxt->input->cur;
9979 pinput->cur = ctxt->input->cur;
9980 pinput->free = NULL;
9981
9982 /*
9983 * let's parse that entity knowing it's an external subset.
9984 */
9985 ctxt->inSubset = 2;
9986 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9987 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9988 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009989
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009990 if ((enc == XML_CHAR_ENCODING_NONE) &&
9991 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00009992 /*
9993 * Get the 4 first bytes and decode the charset
9994 * if enc != XML_CHAR_ENCODING_NONE
9995 * plug some encoding conversion routines.
9996 */
9997 start[0] = RAW;
9998 start[1] = NXT(1);
9999 start[2] = NXT(2);
10000 start[3] = NXT(3);
10001 enc = xmlDetectCharEncoding(start, 4);
10002 if (enc != XML_CHAR_ENCODING_NONE) {
10003 xmlSwitchEncoding(ctxt, enc);
10004 }
10005 }
10006
Owen Taylor3473f882001-02-23 17:55:21 +000010007 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10008
10009 if (ctxt->myDoc != NULL) {
10010 if (ctxt->wellFormed) {
10011 ret = ctxt->myDoc->extSubset;
10012 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010013 if (ret != NULL) {
10014 xmlNodePtr tmp;
10015
10016 ret->doc = NULL;
10017 tmp = ret->children;
10018 while (tmp != NULL) {
10019 tmp->doc = NULL;
10020 tmp = tmp->next;
10021 }
10022 }
Owen Taylor3473f882001-02-23 17:55:21 +000010023 } else {
10024 ret = NULL;
10025 }
10026 xmlFreeDoc(ctxt->myDoc);
10027 ctxt->myDoc = NULL;
10028 }
10029 if (sax != NULL) ctxt->sax = NULL;
10030 xmlFreeParserCtxt(ctxt);
10031
10032 return(ret);
10033}
10034
10035/**
10036 * xmlSAXParseDTD:
10037 * @sax: the SAX handler block
10038 * @ExternalID: a NAME* containing the External ID of the DTD
10039 * @SystemID: a NAME* containing the URL to the DTD
10040 *
10041 * Load and parse an external subset.
10042 *
10043 * Returns the resulting xmlDtdPtr or NULL in case of error.
10044 */
10045
10046xmlDtdPtr
10047xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10048 const xmlChar *SystemID) {
10049 xmlDtdPtr ret = NULL;
10050 xmlParserCtxtPtr ctxt;
10051 xmlParserInputPtr input = NULL;
10052 xmlCharEncoding enc;
10053
10054 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10055
10056 ctxt = xmlNewParserCtxt();
10057 if (ctxt == NULL) {
10058 return(NULL);
10059 }
10060
10061 /*
10062 * Set-up the SAX context
10063 */
10064 if (sax != NULL) {
10065 if (ctxt->sax != NULL)
10066 xmlFree(ctxt->sax);
10067 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010068 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010069 }
10070
10071 /*
10072 * Ask the Entity resolver to load the damn thing
10073 */
10074
10075 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010076 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (input == NULL) {
10078 if (sax != NULL) ctxt->sax = NULL;
10079 xmlFreeParserCtxt(ctxt);
10080 return(NULL);
10081 }
10082
10083 /*
10084 * plug some encoding conversion routines here.
10085 */
10086 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010087 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10088 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10089 xmlSwitchEncoding(ctxt, enc);
10090 }
Owen Taylor3473f882001-02-23 17:55:21 +000010091
10092 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010093 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010094 input->line = 1;
10095 input->col = 1;
10096 input->base = ctxt->input->cur;
10097 input->cur = ctxt->input->cur;
10098 input->free = NULL;
10099
10100 /*
10101 * let's parse that entity knowing it's an external subset.
10102 */
10103 ctxt->inSubset = 2;
10104 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10105 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10106 ExternalID, SystemID);
10107 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10108
10109 if (ctxt->myDoc != NULL) {
10110 if (ctxt->wellFormed) {
10111 ret = ctxt->myDoc->extSubset;
10112 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010113 if (ret != NULL) {
10114 xmlNodePtr tmp;
10115
10116 ret->doc = NULL;
10117 tmp = ret->children;
10118 while (tmp != NULL) {
10119 tmp->doc = NULL;
10120 tmp = tmp->next;
10121 }
10122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123 } else {
10124 ret = NULL;
10125 }
10126 xmlFreeDoc(ctxt->myDoc);
10127 ctxt->myDoc = NULL;
10128 }
10129 if (sax != NULL) ctxt->sax = NULL;
10130 xmlFreeParserCtxt(ctxt);
10131
10132 return(ret);
10133}
10134
Daniel Veillard4432df22003-09-28 18:58:27 +000010135
Owen Taylor3473f882001-02-23 17:55:21 +000010136/**
10137 * xmlParseDTD:
10138 * @ExternalID: a NAME* containing the External ID of the DTD
10139 * @SystemID: a NAME* containing the URL to the DTD
10140 *
10141 * Load and parse an external subset.
10142 *
10143 * Returns the resulting xmlDtdPtr or NULL in case of error.
10144 */
10145
10146xmlDtdPtr
10147xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10148 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10149}
Daniel Veillard4432df22003-09-28 18:58:27 +000010150#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010151
10152/************************************************************************
10153 * *
10154 * Front ends when parsing an Entity *
10155 * *
10156 ************************************************************************/
10157
10158/**
Owen Taylor3473f882001-02-23 17:55:21 +000010159 * xmlParseCtxtExternalEntity:
10160 * @ctx: the existing parsing context
10161 * @URL: the URL for the entity to load
10162 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010163 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010164 *
10165 * Parse an external general entity within an existing parsing context
10166 * An external general parsed entity is well-formed if it matches the
10167 * production labeled extParsedEnt.
10168 *
10169 * [78] extParsedEnt ::= TextDecl? content
10170 *
10171 * Returns 0 if the entity is well formed, -1 in case of args problem and
10172 * the parser error code otherwise
10173 */
10174
10175int
10176xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010177 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010178 xmlParserCtxtPtr ctxt;
10179 xmlDocPtr newDoc;
10180 xmlSAXHandlerPtr oldsax = NULL;
10181 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010182 xmlChar start[4];
10183 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010184
10185 if (ctx->depth > 40) {
10186 return(XML_ERR_ENTITY_LOOP);
10187 }
10188
Daniel Veillardcda96922001-08-21 10:56:31 +000010189 if (lst != NULL)
10190 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010191 if ((URL == NULL) && (ID == NULL))
10192 return(-1);
10193 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10194 return(-1);
10195
10196
10197 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10198 if (ctxt == NULL) return(-1);
10199 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010200 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010201 oldsax = ctxt->sax;
10202 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010203 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010204 newDoc = xmlNewDoc(BAD_CAST "1.0");
10205 if (newDoc == NULL) {
10206 xmlFreeParserCtxt(ctxt);
10207 return(-1);
10208 }
10209 if (ctx->myDoc != NULL) {
10210 newDoc->intSubset = ctx->myDoc->intSubset;
10211 newDoc->extSubset = ctx->myDoc->extSubset;
10212 }
10213 if (ctx->myDoc->URL != NULL) {
10214 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10215 }
10216 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10217 if (newDoc->children == NULL) {
10218 ctxt->sax = oldsax;
10219 xmlFreeParserCtxt(ctxt);
10220 newDoc->intSubset = NULL;
10221 newDoc->extSubset = NULL;
10222 xmlFreeDoc(newDoc);
10223 return(-1);
10224 }
10225 nodePush(ctxt, newDoc->children);
10226 if (ctx->myDoc == NULL) {
10227 ctxt->myDoc = newDoc;
10228 } else {
10229 ctxt->myDoc = ctx->myDoc;
10230 newDoc->children->doc = ctx->myDoc;
10231 }
10232
Daniel Veillard87a764e2001-06-20 17:41:10 +000010233 /*
10234 * Get the 4 first bytes and decode the charset
10235 * if enc != XML_CHAR_ENCODING_NONE
10236 * plug some encoding conversion routines.
10237 */
10238 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010239 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10240 start[0] = RAW;
10241 start[1] = NXT(1);
10242 start[2] = NXT(2);
10243 start[3] = NXT(3);
10244 enc = xmlDetectCharEncoding(start, 4);
10245 if (enc != XML_CHAR_ENCODING_NONE) {
10246 xmlSwitchEncoding(ctxt, enc);
10247 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010248 }
10249
Owen Taylor3473f882001-02-23 17:55:21 +000010250 /*
10251 * Parse a possible text declaration first
10252 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010253 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010254 xmlParseTextDecl(ctxt);
10255 }
10256
10257 /*
10258 * Doing validity checking on chunk doesn't make sense
10259 */
10260 ctxt->instate = XML_PARSER_CONTENT;
10261 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010262 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010263 ctxt->loadsubset = ctx->loadsubset;
10264 ctxt->depth = ctx->depth + 1;
10265 ctxt->replaceEntities = ctx->replaceEntities;
10266 if (ctxt->validate) {
10267 ctxt->vctxt.error = ctx->vctxt.error;
10268 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010269 } else {
10270 ctxt->vctxt.error = NULL;
10271 ctxt->vctxt.warning = NULL;
10272 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010273 ctxt->vctxt.nodeTab = NULL;
10274 ctxt->vctxt.nodeNr = 0;
10275 ctxt->vctxt.nodeMax = 0;
10276 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010277 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10278 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010279 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10280 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10281 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010282 ctxt->dictNames = ctx->dictNames;
10283 ctxt->attsDefault = ctx->attsDefault;
10284 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010285
10286 xmlParseContent(ctxt);
10287
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010288 ctx->validate = ctxt->validate;
10289 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010290 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010291 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010292 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010293 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010294 }
10295 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010296 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010297 }
10298
10299 if (!ctxt->wellFormed) {
10300 if (ctxt->errNo == 0)
10301 ret = 1;
10302 else
10303 ret = ctxt->errNo;
10304 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010305 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010306 xmlNodePtr cur;
10307
10308 /*
10309 * Return the newly created nodeset after unlinking it from
10310 * they pseudo parent.
10311 */
10312 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010313 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010314 while (cur != NULL) {
10315 cur->parent = NULL;
10316 cur = cur->next;
10317 }
10318 newDoc->children->children = NULL;
10319 }
10320 ret = 0;
10321 }
10322 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010323 ctxt->dict = NULL;
10324 ctxt->attsDefault = NULL;
10325 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010326 xmlFreeParserCtxt(ctxt);
10327 newDoc->intSubset = NULL;
10328 newDoc->extSubset = NULL;
10329 xmlFreeDoc(newDoc);
10330
10331 return(ret);
10332}
10333
10334/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010335 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010336 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010337 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010338 * @sax: the SAX handler bloc (possibly NULL)
10339 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10340 * @depth: Used for loop detection, use 0
10341 * @URL: the URL for the entity to load
10342 * @ID: the System ID for the entity to load
10343 * @list: the return value for the set of parsed nodes
10344 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010345 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010346 *
10347 * Returns 0 if the entity is well formed, -1 in case of args problem and
10348 * the parser error code otherwise
10349 */
10350
Daniel Veillard7d515752003-09-26 19:12:37 +000010351static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010352xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10353 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010354 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010355 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010356 xmlParserCtxtPtr ctxt;
10357 xmlDocPtr newDoc;
10358 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010359 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010360 xmlChar start[4];
10361 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010362
10363 if (depth > 40) {
10364 return(XML_ERR_ENTITY_LOOP);
10365 }
10366
10367
10368
10369 if (list != NULL)
10370 *list = NULL;
10371 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010372 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010373 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010374 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010375
10376
10377 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010378 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010379 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010380 if (oldctxt != NULL) {
10381 ctxt->_private = oldctxt->_private;
10382 ctxt->loadsubset = oldctxt->loadsubset;
10383 ctxt->validate = oldctxt->validate;
10384 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010385 ctxt->record_info = oldctxt->record_info;
10386 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10387 ctxt->node_seq.length = oldctxt->node_seq.length;
10388 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010389 } else {
10390 /*
10391 * Doing validity checking on chunk without context
10392 * doesn't make sense
10393 */
10394 ctxt->_private = NULL;
10395 ctxt->validate = 0;
10396 ctxt->external = 2;
10397 ctxt->loadsubset = 0;
10398 }
Owen Taylor3473f882001-02-23 17:55:21 +000010399 if (sax != NULL) {
10400 oldsax = ctxt->sax;
10401 ctxt->sax = sax;
10402 if (user_data != NULL)
10403 ctxt->userData = user_data;
10404 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010405 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010406 newDoc = xmlNewDoc(BAD_CAST "1.0");
10407 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010408 ctxt->node_seq.maximum = 0;
10409 ctxt->node_seq.length = 0;
10410 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010411 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010412 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010413 }
10414 if (doc != NULL) {
10415 newDoc->intSubset = doc->intSubset;
10416 newDoc->extSubset = doc->extSubset;
10417 }
10418 if (doc->URL != NULL) {
10419 newDoc->URL = xmlStrdup(doc->URL);
10420 }
10421 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10422 if (newDoc->children == NULL) {
10423 if (sax != NULL)
10424 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010425 ctxt->node_seq.maximum = 0;
10426 ctxt->node_seq.length = 0;
10427 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010428 xmlFreeParserCtxt(ctxt);
10429 newDoc->intSubset = NULL;
10430 newDoc->extSubset = NULL;
10431 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010432 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010433 }
10434 nodePush(ctxt, newDoc->children);
10435 if (doc == NULL) {
10436 ctxt->myDoc = newDoc;
10437 } else {
10438 ctxt->myDoc = doc;
10439 newDoc->children->doc = doc;
10440 }
10441
Daniel Veillard87a764e2001-06-20 17:41:10 +000010442 /*
10443 * Get the 4 first bytes and decode the charset
10444 * if enc != XML_CHAR_ENCODING_NONE
10445 * plug some encoding conversion routines.
10446 */
10447 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010448 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10449 start[0] = RAW;
10450 start[1] = NXT(1);
10451 start[2] = NXT(2);
10452 start[3] = NXT(3);
10453 enc = xmlDetectCharEncoding(start, 4);
10454 if (enc != XML_CHAR_ENCODING_NONE) {
10455 xmlSwitchEncoding(ctxt, enc);
10456 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010457 }
10458
Owen Taylor3473f882001-02-23 17:55:21 +000010459 /*
10460 * Parse a possible text declaration first
10461 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010462 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010463 xmlParseTextDecl(ctxt);
10464 }
10465
Owen Taylor3473f882001-02-23 17:55:21 +000010466 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010467 ctxt->depth = depth;
10468
10469 xmlParseContent(ctxt);
10470
Daniel Veillard561b7f82002-03-20 21:55:57 +000010471 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010472 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010473 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
10476 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010477 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010478 }
10479
10480 if (!ctxt->wellFormed) {
10481 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010482 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010483 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010484 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010485 } else {
10486 if (list != NULL) {
10487 xmlNodePtr cur;
10488
10489 /*
10490 * Return the newly created nodeset after unlinking it from
10491 * they pseudo parent.
10492 */
10493 cur = newDoc->children->children;
10494 *list = cur;
10495 while (cur != NULL) {
10496 cur->parent = NULL;
10497 cur = cur->next;
10498 }
10499 newDoc->children->children = NULL;
10500 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010501 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010502 }
10503 if (sax != NULL)
10504 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010505 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10506 oldctxt->node_seq.length = ctxt->node_seq.length;
10507 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010508 ctxt->node_seq.maximum = 0;
10509 ctxt->node_seq.length = 0;
10510 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010511 xmlFreeParserCtxt(ctxt);
10512 newDoc->intSubset = NULL;
10513 newDoc->extSubset = NULL;
10514 xmlFreeDoc(newDoc);
10515
10516 return(ret);
10517}
10518
Daniel Veillard81273902003-09-30 00:43:48 +000010519#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010520/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010521 * xmlParseExternalEntity:
10522 * @doc: the document the chunk pertains to
10523 * @sax: the SAX handler bloc (possibly NULL)
10524 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10525 * @depth: Used for loop detection, use 0
10526 * @URL: the URL for the entity to load
10527 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010528 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010529 *
10530 * Parse an external general entity
10531 * An external general parsed entity is well-formed if it matches the
10532 * production labeled extParsedEnt.
10533 *
10534 * [78] extParsedEnt ::= TextDecl? content
10535 *
10536 * Returns 0 if the entity is well formed, -1 in case of args problem and
10537 * the parser error code otherwise
10538 */
10539
10540int
10541xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010542 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010543 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010544 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010545}
10546
10547/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010548 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010549 * @doc: the document the chunk pertains to
10550 * @sax: the SAX handler bloc (possibly NULL)
10551 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10552 * @depth: Used for loop detection, use 0
10553 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010554 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010555 *
10556 * Parse a well-balanced chunk of an XML document
10557 * called by the parser
10558 * The allowed sequence for the Well Balanced Chunk is the one defined by
10559 * the content production in the XML grammar:
10560 *
10561 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10562 *
10563 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10564 * the parser error code otherwise
10565 */
10566
10567int
10568xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010569 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010570 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10571 depth, string, lst, 0 );
10572}
Daniel Veillard81273902003-09-30 00:43:48 +000010573#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010574
10575/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010576 * xmlParseBalancedChunkMemoryInternal:
10577 * @oldctxt: the existing parsing context
10578 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10579 * @user_data: the user data field for the parser context
10580 * @lst: the return value for the set of parsed nodes
10581 *
10582 *
10583 * Parse a well-balanced chunk of an XML document
10584 * called by the parser
10585 * The allowed sequence for the Well Balanced Chunk is the one defined by
10586 * the content production in the XML grammar:
10587 *
10588 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10589 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010590 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10591 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010592 *
10593 * In case recover is set to 1, the nodelist will not be empty even if
10594 * the parsed chunk is not well balanced.
10595 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010596static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010597xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10598 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10599 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010600 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010601 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010602 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010603 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010604 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010605
10606 if (oldctxt->depth > 40) {
10607 return(XML_ERR_ENTITY_LOOP);
10608 }
10609
10610
10611 if (lst != NULL)
10612 *lst = NULL;
10613 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010614 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010615
10616 size = xmlStrlen(string);
10617
10618 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010619 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010620 if (user_data != NULL)
10621 ctxt->userData = user_data;
10622 else
10623 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010624 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10625 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010626 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10627 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10628 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010629
10630 oldsax = ctxt->sax;
10631 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010632 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010633 ctxt->replaceEntities = oldctxt->replaceEntities;
10634 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010635
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010636 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010637 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010638 newDoc = xmlNewDoc(BAD_CAST "1.0");
10639 if (newDoc == NULL) {
10640 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010641 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010642 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010643 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010644 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010645 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010646 } else {
10647 ctxt->myDoc = oldctxt->myDoc;
10648 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010649 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010650 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010651 BAD_CAST "pseudoroot", NULL);
10652 if (ctxt->myDoc->children == NULL) {
10653 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010654 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010655 xmlFreeParserCtxt(ctxt);
10656 if (newDoc != NULL)
10657 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010658 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010659 }
10660 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010661 ctxt->instate = XML_PARSER_CONTENT;
10662 ctxt->depth = oldctxt->depth + 1;
10663
Daniel Veillard328f48c2002-11-15 15:24:34 +000010664 ctxt->validate = 0;
10665 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010666 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10667 /*
10668 * ID/IDREF registration will be done in xmlValidateElement below
10669 */
10670 ctxt->loadsubset |= XML_SKIP_IDS;
10671 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010672 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010673 ctxt->attsDefault = oldctxt->attsDefault;
10674 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010675
Daniel Veillard68e9e742002-11-16 15:35:11 +000010676 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010677 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010678 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010679 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010680 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010681 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010682 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010684 }
10685
10686 if (!ctxt->wellFormed) {
10687 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010688 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010689 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010690 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010691 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010692 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010693 }
10694
William M. Brack7b9154b2003-09-27 19:23:50 +000010695 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010696 xmlNodePtr cur;
10697
10698 /*
10699 * Return the newly created nodeset after unlinking it from
10700 * they pseudo parent.
10701 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010702 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010703 *lst = cur;
10704 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010705#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010706 if (oldctxt->validate && oldctxt->wellFormed &&
10707 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10708 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10709 oldctxt->myDoc, cur);
10710 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010711#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010712 cur->parent = NULL;
10713 cur = cur->next;
10714 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010715 ctxt->myDoc->children->children = NULL;
10716 }
10717 if (ctxt->myDoc != NULL) {
10718 xmlFreeNode(ctxt->myDoc->children);
10719 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010720 }
10721
10722 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010723 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010724 ctxt->attsDefault = NULL;
10725 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010726 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010727 if (newDoc != NULL)
10728 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010729
10730 return(ret);
10731}
10732
Daniel Veillard81273902003-09-30 00:43:48 +000010733#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010734/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010735 * xmlParseBalancedChunkMemoryRecover:
10736 * @doc: the document the chunk pertains to
10737 * @sax: the SAX handler bloc (possibly NULL)
10738 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10739 * @depth: Used for loop detection, use 0
10740 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10741 * @lst: the return value for the set of parsed nodes
10742 * @recover: return nodes even if the data is broken (use 0)
10743 *
10744 *
10745 * Parse a well-balanced chunk of an XML document
10746 * called by the parser
10747 * The allowed sequence for the Well Balanced Chunk is the one defined by
10748 * the content production in the XML grammar:
10749 *
10750 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10751 *
10752 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10753 * the parser error code otherwise
10754 *
10755 * In case recover is set to 1, the nodelist will not be empty even if
10756 * the parsed chunk is not well balanced.
10757 */
10758int
10759xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10760 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10761 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010762 xmlParserCtxtPtr ctxt;
10763 xmlDocPtr newDoc;
10764 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010765 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010766 int size;
10767 int ret = 0;
10768
10769 if (depth > 40) {
10770 return(XML_ERR_ENTITY_LOOP);
10771 }
10772
10773
Daniel Veillardcda96922001-08-21 10:56:31 +000010774 if (lst != NULL)
10775 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010776 if (string == NULL)
10777 return(-1);
10778
10779 size = xmlStrlen(string);
10780
10781 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10782 if (ctxt == NULL) return(-1);
10783 ctxt->userData = ctxt;
10784 if (sax != NULL) {
10785 oldsax = ctxt->sax;
10786 ctxt->sax = sax;
10787 if (user_data != NULL)
10788 ctxt->userData = user_data;
10789 }
10790 newDoc = xmlNewDoc(BAD_CAST "1.0");
10791 if (newDoc == NULL) {
10792 xmlFreeParserCtxt(ctxt);
10793 return(-1);
10794 }
10795 if (doc != NULL) {
10796 newDoc->intSubset = doc->intSubset;
10797 newDoc->extSubset = doc->extSubset;
10798 }
10799 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10800 if (newDoc->children == NULL) {
10801 if (sax != NULL)
10802 ctxt->sax = oldsax;
10803 xmlFreeParserCtxt(ctxt);
10804 newDoc->intSubset = NULL;
10805 newDoc->extSubset = NULL;
10806 xmlFreeDoc(newDoc);
10807 return(-1);
10808 }
10809 nodePush(ctxt, newDoc->children);
10810 if (doc == NULL) {
10811 ctxt->myDoc = newDoc;
10812 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010813 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010814 newDoc->children->doc = doc;
10815 }
10816 ctxt->instate = XML_PARSER_CONTENT;
10817 ctxt->depth = depth;
10818
10819 /*
10820 * Doing validity checking on chunk doesn't make sense
10821 */
10822 ctxt->validate = 0;
10823 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010824 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010825
Daniel Veillardb39bc392002-10-26 19:29:51 +000010826 if ( doc != NULL ){
10827 content = doc->children;
10828 doc->children = NULL;
10829 xmlParseContent(ctxt);
10830 doc->children = content;
10831 }
10832 else {
10833 xmlParseContent(ctxt);
10834 }
Owen Taylor3473f882001-02-23 17:55:21 +000010835 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010836 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010837 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010838 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010839 }
10840 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010841 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010842 }
10843
10844 if (!ctxt->wellFormed) {
10845 if (ctxt->errNo == 0)
10846 ret = 1;
10847 else
10848 ret = ctxt->errNo;
10849 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010850 ret = 0;
10851 }
10852
10853 if (lst != NULL && (ret == 0 || recover == 1)) {
10854 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010855
10856 /*
10857 * Return the newly created nodeset after unlinking it from
10858 * they pseudo parent.
10859 */
10860 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010861 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010862 while (cur != NULL) {
10863 cur->parent = NULL;
10864 cur = cur->next;
10865 }
10866 newDoc->children->children = NULL;
10867 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010868
Owen Taylor3473f882001-02-23 17:55:21 +000010869 if (sax != NULL)
10870 ctxt->sax = oldsax;
10871 xmlFreeParserCtxt(ctxt);
10872 newDoc->intSubset = NULL;
10873 newDoc->extSubset = NULL;
10874 xmlFreeDoc(newDoc);
10875
10876 return(ret);
10877}
10878
10879/**
10880 * xmlSAXParseEntity:
10881 * @sax: the SAX handler block
10882 * @filename: the filename
10883 *
10884 * parse an XML external entity out of context and build a tree.
10885 * It use the given SAX function block to handle the parsing callback.
10886 * If sax is NULL, fallback to the default DOM tree building routines.
10887 *
10888 * [78] extParsedEnt ::= TextDecl? content
10889 *
10890 * This correspond to a "Well Balanced" chunk
10891 *
10892 * Returns the resulting document tree
10893 */
10894
10895xmlDocPtr
10896xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10897 xmlDocPtr ret;
10898 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010899
10900 ctxt = xmlCreateFileParserCtxt(filename);
10901 if (ctxt == NULL) {
10902 return(NULL);
10903 }
10904 if (sax != NULL) {
10905 if (ctxt->sax != NULL)
10906 xmlFree(ctxt->sax);
10907 ctxt->sax = sax;
10908 ctxt->userData = NULL;
10909 }
10910
Owen Taylor3473f882001-02-23 17:55:21 +000010911 xmlParseExtParsedEnt(ctxt);
10912
10913 if (ctxt->wellFormed)
10914 ret = ctxt->myDoc;
10915 else {
10916 ret = NULL;
10917 xmlFreeDoc(ctxt->myDoc);
10918 ctxt->myDoc = NULL;
10919 }
10920 if (sax != NULL)
10921 ctxt->sax = NULL;
10922 xmlFreeParserCtxt(ctxt);
10923
10924 return(ret);
10925}
10926
10927/**
10928 * xmlParseEntity:
10929 * @filename: the filename
10930 *
10931 * parse an XML external entity out of context and build a tree.
10932 *
10933 * [78] extParsedEnt ::= TextDecl? content
10934 *
10935 * This correspond to a "Well Balanced" chunk
10936 *
10937 * Returns the resulting document tree
10938 */
10939
10940xmlDocPtr
10941xmlParseEntity(const char *filename) {
10942 return(xmlSAXParseEntity(NULL, filename));
10943}
Daniel Veillard81273902003-09-30 00:43:48 +000010944#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010945
10946/**
10947 * xmlCreateEntityParserCtxt:
10948 * @URL: the entity URL
10949 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010950 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010951 *
10952 * Create a parser context for an external entity
10953 * Automatic support for ZLIB/Compress compressed document is provided
10954 * by default if found at compile-time.
10955 *
10956 * Returns the new parser context or NULL
10957 */
10958xmlParserCtxtPtr
10959xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10960 const xmlChar *base) {
10961 xmlParserCtxtPtr ctxt;
10962 xmlParserInputPtr inputStream;
10963 char *directory = NULL;
10964 xmlChar *uri;
10965
10966 ctxt = xmlNewParserCtxt();
10967 if (ctxt == NULL) {
10968 return(NULL);
10969 }
10970
10971 uri = xmlBuildURI(URL, base);
10972
10973 if (uri == NULL) {
10974 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10975 if (inputStream == NULL) {
10976 xmlFreeParserCtxt(ctxt);
10977 return(NULL);
10978 }
10979
10980 inputPush(ctxt, inputStream);
10981
10982 if ((ctxt->directory == NULL) && (directory == NULL))
10983 directory = xmlParserGetDirectory((char *)URL);
10984 if ((ctxt->directory == NULL) && (directory != NULL))
10985 ctxt->directory = directory;
10986 } else {
10987 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10988 if (inputStream == NULL) {
10989 xmlFree(uri);
10990 xmlFreeParserCtxt(ctxt);
10991 return(NULL);
10992 }
10993
10994 inputPush(ctxt, inputStream);
10995
10996 if ((ctxt->directory == NULL) && (directory == NULL))
10997 directory = xmlParserGetDirectory((char *)uri);
10998 if ((ctxt->directory == NULL) && (directory != NULL))
10999 ctxt->directory = directory;
11000 xmlFree(uri);
11001 }
Owen Taylor3473f882001-02-23 17:55:21 +000011002 return(ctxt);
11003}
11004
11005/************************************************************************
11006 * *
11007 * Front ends when parsing from a file *
11008 * *
11009 ************************************************************************/
11010
11011/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011012 * xmlCreateURLParserCtxt:
11013 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011014 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011015 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011016 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011017 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011018 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011019 *
11020 * Returns the new parser context or NULL
11021 */
11022xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011023xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011024{
11025 xmlParserCtxtPtr ctxt;
11026 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011027 char *directory = NULL;
11028
Owen Taylor3473f882001-02-23 17:55:21 +000011029 ctxt = xmlNewParserCtxt();
11030 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011031 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011032 return(NULL);
11033 }
11034
Daniel Veillard61b93382003-11-03 14:28:31 +000011035 if (options != 0)
11036 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011037
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011038 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011039 if (inputStream == NULL) {
11040 xmlFreeParserCtxt(ctxt);
11041 return(NULL);
11042 }
11043
Owen Taylor3473f882001-02-23 17:55:21 +000011044 inputPush(ctxt, inputStream);
11045 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011046 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011047 if ((ctxt->directory == NULL) && (directory != NULL))
11048 ctxt->directory = directory;
11049
11050 return(ctxt);
11051}
11052
Daniel Veillard61b93382003-11-03 14:28:31 +000011053/**
11054 * xmlCreateFileParserCtxt:
11055 * @filename: the filename
11056 *
11057 * Create a parser context for a file content.
11058 * Automatic support for ZLIB/Compress compressed document is provided
11059 * by default if found at compile-time.
11060 *
11061 * Returns the new parser context or NULL
11062 */
11063xmlParserCtxtPtr
11064xmlCreateFileParserCtxt(const char *filename)
11065{
11066 return(xmlCreateURLParserCtxt(filename, 0));
11067}
11068
Daniel Veillard81273902003-09-30 00:43:48 +000011069#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011070/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011071 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011072 * @sax: the SAX handler block
11073 * @filename: the filename
11074 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11075 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011076 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011077 *
11078 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11079 * compressed document is provided by default if found at compile-time.
11080 * It use the given SAX function block to handle the parsing callback.
11081 * If sax is NULL, fallback to the default DOM tree building routines.
11082 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011083 * User data (void *) is stored within the parser context in the
11084 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011085 *
Owen Taylor3473f882001-02-23 17:55:21 +000011086 * Returns the resulting document tree
11087 */
11088
11089xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011090xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11091 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011092 xmlDocPtr ret;
11093 xmlParserCtxtPtr ctxt;
11094 char *directory = NULL;
11095
Daniel Veillard635ef722001-10-29 11:48:19 +000011096 xmlInitParser();
11097
Owen Taylor3473f882001-02-23 17:55:21 +000011098 ctxt = xmlCreateFileParserCtxt(filename);
11099 if (ctxt == NULL) {
11100 return(NULL);
11101 }
11102 if (sax != NULL) {
11103 if (ctxt->sax != NULL)
11104 xmlFree(ctxt->sax);
11105 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011106 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011107 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011108 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011109 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011110 }
Owen Taylor3473f882001-02-23 17:55:21 +000011111
11112 if ((ctxt->directory == NULL) && (directory == NULL))
11113 directory = xmlParserGetDirectory(filename);
11114 if ((ctxt->directory == NULL) && (directory != NULL))
11115 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11116
Daniel Veillarddad3f682002-11-17 16:47:27 +000011117 ctxt->recovery = recovery;
11118
Owen Taylor3473f882001-02-23 17:55:21 +000011119 xmlParseDocument(ctxt);
11120
William M. Brackc07329e2003-09-08 01:57:30 +000011121 if ((ctxt->wellFormed) || recovery) {
11122 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011123 if (ret != NULL) {
11124 if (ctxt->input->buf->compressed > 0)
11125 ret->compression = 9;
11126 else
11127 ret->compression = ctxt->input->buf->compressed;
11128 }
William M. Brackc07329e2003-09-08 01:57:30 +000011129 }
Owen Taylor3473f882001-02-23 17:55:21 +000011130 else {
11131 ret = NULL;
11132 xmlFreeDoc(ctxt->myDoc);
11133 ctxt->myDoc = NULL;
11134 }
11135 if (sax != NULL)
11136 ctxt->sax = NULL;
11137 xmlFreeParserCtxt(ctxt);
11138
11139 return(ret);
11140}
11141
11142/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011143 * xmlSAXParseFile:
11144 * @sax: the SAX handler block
11145 * @filename: the filename
11146 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11147 * documents
11148 *
11149 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11150 * compressed document is provided by default if found at compile-time.
11151 * It use the given SAX function block to handle the parsing callback.
11152 * If sax is NULL, fallback to the default DOM tree building routines.
11153 *
11154 * Returns the resulting document tree
11155 */
11156
11157xmlDocPtr
11158xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11159 int recovery) {
11160 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11161}
11162
11163/**
Owen Taylor3473f882001-02-23 17:55:21 +000011164 * xmlRecoverDoc:
11165 * @cur: a pointer to an array of xmlChar
11166 *
11167 * parse an XML in-memory document and build a tree.
11168 * In the case the document is not Well Formed, a tree is built anyway
11169 *
11170 * Returns the resulting document tree
11171 */
11172
11173xmlDocPtr
11174xmlRecoverDoc(xmlChar *cur) {
11175 return(xmlSAXParseDoc(NULL, cur, 1));
11176}
11177
11178/**
11179 * xmlParseFile:
11180 * @filename: the filename
11181 *
11182 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11183 * compressed document is provided by default if found at compile-time.
11184 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011185 * Returns the resulting document tree if the file was wellformed,
11186 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011187 */
11188
11189xmlDocPtr
11190xmlParseFile(const char *filename) {
11191 return(xmlSAXParseFile(NULL, filename, 0));
11192}
11193
11194/**
11195 * xmlRecoverFile:
11196 * @filename: the filename
11197 *
11198 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11199 * compressed document is provided by default if found at compile-time.
11200 * In the case the document is not Well Formed, a tree is built anyway
11201 *
11202 * Returns the resulting document tree
11203 */
11204
11205xmlDocPtr
11206xmlRecoverFile(const char *filename) {
11207 return(xmlSAXParseFile(NULL, filename, 1));
11208}
11209
11210
11211/**
11212 * xmlSetupParserForBuffer:
11213 * @ctxt: an XML parser context
11214 * @buffer: a xmlChar * buffer
11215 * @filename: a file name
11216 *
11217 * Setup the parser context to parse a new buffer; Clears any prior
11218 * contents from the parser context. The buffer parameter must not be
11219 * NULL, but the filename parameter can be
11220 */
11221void
11222xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11223 const char* filename)
11224{
11225 xmlParserInputPtr input;
11226
11227 input = xmlNewInputStream(ctxt);
11228 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011229 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011230 xmlFree(ctxt);
11231 return;
11232 }
11233
11234 xmlClearParserCtxt(ctxt);
11235 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011236 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011237 input->base = buffer;
11238 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011239 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011240 inputPush(ctxt, input);
11241}
11242
11243/**
11244 * xmlSAXUserParseFile:
11245 * @sax: a SAX handler
11246 * @user_data: The user data returned on SAX callbacks
11247 * @filename: a file name
11248 *
11249 * parse an XML file and call the given SAX handler routines.
11250 * Automatic support for ZLIB/Compress compressed document is provided
11251 *
11252 * Returns 0 in case of success or a error number otherwise
11253 */
11254int
11255xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11256 const char *filename) {
11257 int ret = 0;
11258 xmlParserCtxtPtr ctxt;
11259
11260 ctxt = xmlCreateFileParserCtxt(filename);
11261 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011262#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011263 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011264#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011265 xmlFree(ctxt->sax);
11266 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011267 xmlDetectSAX2(ctxt);
11268
Owen Taylor3473f882001-02-23 17:55:21 +000011269 if (user_data != NULL)
11270 ctxt->userData = user_data;
11271
11272 xmlParseDocument(ctxt);
11273
11274 if (ctxt->wellFormed)
11275 ret = 0;
11276 else {
11277 if (ctxt->errNo != 0)
11278 ret = ctxt->errNo;
11279 else
11280 ret = -1;
11281 }
11282 if (sax != NULL)
11283 ctxt->sax = NULL;
11284 xmlFreeParserCtxt(ctxt);
11285
11286 return ret;
11287}
Daniel Veillard81273902003-09-30 00:43:48 +000011288#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011289
11290/************************************************************************
11291 * *
11292 * Front ends when parsing from memory *
11293 * *
11294 ************************************************************************/
11295
11296/**
11297 * xmlCreateMemoryParserCtxt:
11298 * @buffer: a pointer to a char array
11299 * @size: the size of the array
11300 *
11301 * Create a parser context for an XML in-memory document.
11302 *
11303 * Returns the new parser context or NULL
11304 */
11305xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011306xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011307 xmlParserCtxtPtr ctxt;
11308 xmlParserInputPtr input;
11309 xmlParserInputBufferPtr buf;
11310
11311 if (buffer == NULL)
11312 return(NULL);
11313 if (size <= 0)
11314 return(NULL);
11315
11316 ctxt = xmlNewParserCtxt();
11317 if (ctxt == NULL)
11318 return(NULL);
11319
Daniel Veillard53350552003-09-18 13:35:51 +000011320 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011321 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011322 if (buf == NULL) {
11323 xmlFreeParserCtxt(ctxt);
11324 return(NULL);
11325 }
Owen Taylor3473f882001-02-23 17:55:21 +000011326
11327 input = xmlNewInputStream(ctxt);
11328 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011329 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011330 xmlFreeParserCtxt(ctxt);
11331 return(NULL);
11332 }
11333
11334 input->filename = NULL;
11335 input->buf = buf;
11336 input->base = input->buf->buffer->content;
11337 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011338 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011339
11340 inputPush(ctxt, input);
11341 return(ctxt);
11342}
11343
Daniel Veillard81273902003-09-30 00:43:48 +000011344#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011345/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011346 * xmlSAXParseMemoryWithData:
11347 * @sax: the SAX handler block
11348 * @buffer: an pointer to a char array
11349 * @size: the size of the array
11350 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11351 * documents
11352 * @data: the userdata
11353 *
11354 * parse an XML in-memory block and use the given SAX function block
11355 * to handle the parsing callback. If sax is NULL, fallback to the default
11356 * DOM tree building routines.
11357 *
11358 * User data (void *) is stored within the parser context in the
11359 * context's _private member, so it is available nearly everywhere in libxml
11360 *
11361 * Returns the resulting document tree
11362 */
11363
11364xmlDocPtr
11365xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11366 int size, int recovery, void *data) {
11367 xmlDocPtr ret;
11368 xmlParserCtxtPtr ctxt;
11369
11370 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11371 if (ctxt == NULL) return(NULL);
11372 if (sax != NULL) {
11373 if (ctxt->sax != NULL)
11374 xmlFree(ctxt->sax);
11375 ctxt->sax = sax;
11376 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011377 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011378 if (data!=NULL) {
11379 ctxt->_private=data;
11380 }
11381
Daniel Veillardadba5f12003-04-04 16:09:01 +000011382 ctxt->recovery = recovery;
11383
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011384 xmlParseDocument(ctxt);
11385
11386 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11387 else {
11388 ret = NULL;
11389 xmlFreeDoc(ctxt->myDoc);
11390 ctxt->myDoc = NULL;
11391 }
11392 if (sax != NULL)
11393 ctxt->sax = NULL;
11394 xmlFreeParserCtxt(ctxt);
11395
11396 return(ret);
11397}
11398
11399/**
Owen Taylor3473f882001-02-23 17:55:21 +000011400 * xmlSAXParseMemory:
11401 * @sax: the SAX handler block
11402 * @buffer: an pointer to a char array
11403 * @size: the size of the array
11404 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11405 * documents
11406 *
11407 * parse an XML in-memory block and use the given SAX function block
11408 * to handle the parsing callback. If sax is NULL, fallback to the default
11409 * DOM tree building routines.
11410 *
11411 * Returns the resulting document tree
11412 */
11413xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011414xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11415 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011416 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011417}
11418
11419/**
11420 * xmlParseMemory:
11421 * @buffer: an pointer to a char array
11422 * @size: the size of the array
11423 *
11424 * parse an XML in-memory block and build a tree.
11425 *
11426 * Returns the resulting document tree
11427 */
11428
Daniel Veillard50822cb2001-07-26 20:05:51 +000011429xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011430 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11431}
11432
11433/**
11434 * xmlRecoverMemory:
11435 * @buffer: an pointer to a char array
11436 * @size: the size of the array
11437 *
11438 * parse an XML in-memory block and build a tree.
11439 * In the case the document is not Well Formed, a tree is built anyway
11440 *
11441 * Returns the resulting document tree
11442 */
11443
Daniel Veillard50822cb2001-07-26 20:05:51 +000011444xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011445 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11446}
11447
11448/**
11449 * xmlSAXUserParseMemory:
11450 * @sax: a SAX handler
11451 * @user_data: The user data returned on SAX callbacks
11452 * @buffer: an in-memory XML document input
11453 * @size: the length of the XML document in bytes
11454 *
11455 * A better SAX parsing routine.
11456 * parse an XML in-memory buffer and call the given SAX handler routines.
11457 *
11458 * Returns 0 in case of success or a error number otherwise
11459 */
11460int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011461 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011462 int ret = 0;
11463 xmlParserCtxtPtr ctxt;
11464 xmlSAXHandlerPtr oldsax = NULL;
11465
Daniel Veillard9e923512002-08-14 08:48:52 +000011466 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011467 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11468 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011469 oldsax = ctxt->sax;
11470 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011471 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011472 if (user_data != NULL)
11473 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011474
11475 xmlParseDocument(ctxt);
11476
11477 if (ctxt->wellFormed)
11478 ret = 0;
11479 else {
11480 if (ctxt->errNo != 0)
11481 ret = ctxt->errNo;
11482 else
11483 ret = -1;
11484 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011485 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011486 xmlFreeParserCtxt(ctxt);
11487
11488 return ret;
11489}
Daniel Veillard81273902003-09-30 00:43:48 +000011490#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011491
11492/**
11493 * xmlCreateDocParserCtxt:
11494 * @cur: a pointer to an array of xmlChar
11495 *
11496 * Creates a parser context for an XML in-memory document.
11497 *
11498 * Returns the new parser context or NULL
11499 */
11500xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011501xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011502 int len;
11503
11504 if (cur == NULL)
11505 return(NULL);
11506 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011507 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011508}
11509
Daniel Veillard81273902003-09-30 00:43:48 +000011510#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011511/**
11512 * xmlSAXParseDoc:
11513 * @sax: the SAX handler block
11514 * @cur: a pointer to an array of xmlChar
11515 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11516 * documents
11517 *
11518 * parse an XML in-memory document and build a tree.
11519 * It use the given SAX function block to handle the parsing callback.
11520 * If sax is NULL, fallback to the default DOM tree building routines.
11521 *
11522 * Returns the resulting document tree
11523 */
11524
11525xmlDocPtr
11526xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11527 xmlDocPtr ret;
11528 xmlParserCtxtPtr ctxt;
11529
11530 if (cur == NULL) return(NULL);
11531
11532
11533 ctxt = xmlCreateDocParserCtxt(cur);
11534 if (ctxt == NULL) return(NULL);
11535 if (sax != NULL) {
11536 ctxt->sax = sax;
11537 ctxt->userData = NULL;
11538 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011539 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011540
11541 xmlParseDocument(ctxt);
11542 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11543 else {
11544 ret = NULL;
11545 xmlFreeDoc(ctxt->myDoc);
11546 ctxt->myDoc = NULL;
11547 }
11548 if (sax != NULL)
11549 ctxt->sax = NULL;
11550 xmlFreeParserCtxt(ctxt);
11551
11552 return(ret);
11553}
11554
11555/**
11556 * xmlParseDoc:
11557 * @cur: a pointer to an array of xmlChar
11558 *
11559 * parse an XML in-memory document and build a tree.
11560 *
11561 * Returns the resulting document tree
11562 */
11563
11564xmlDocPtr
11565xmlParseDoc(xmlChar *cur) {
11566 return(xmlSAXParseDoc(NULL, cur, 0));
11567}
Daniel Veillard81273902003-09-30 00:43:48 +000011568#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011569
Daniel Veillard81273902003-09-30 00:43:48 +000011570#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011571/************************************************************************
11572 * *
11573 * Specific function to keep track of entities references *
11574 * and used by the XSLT debugger *
11575 * *
11576 ************************************************************************/
11577
11578static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11579
11580/**
11581 * xmlAddEntityReference:
11582 * @ent : A valid entity
11583 * @firstNode : A valid first node for children of entity
11584 * @lastNode : A valid last node of children entity
11585 *
11586 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11587 */
11588static void
11589xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11590 xmlNodePtr lastNode)
11591{
11592 if (xmlEntityRefFunc != NULL) {
11593 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11594 }
11595}
11596
11597
11598/**
11599 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011600 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011601 *
11602 * Set the function to call call back when a xml reference has been made
11603 */
11604void
11605xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11606{
11607 xmlEntityRefFunc = func;
11608}
Daniel Veillard81273902003-09-30 00:43:48 +000011609#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011610
11611/************************************************************************
11612 * *
11613 * Miscellaneous *
11614 * *
11615 ************************************************************************/
11616
11617#ifdef LIBXML_XPATH_ENABLED
11618#include <libxml/xpath.h>
11619#endif
11620
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011621extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011622static int xmlParserInitialized = 0;
11623
11624/**
11625 * xmlInitParser:
11626 *
11627 * Initialization function for the XML parser.
11628 * This is not reentrant. Call once before processing in case of
11629 * use in multithreaded programs.
11630 */
11631
11632void
11633xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011634 if (xmlParserInitialized != 0)
11635 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011636
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011637 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11638 (xmlGenericError == NULL))
11639 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011640 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011641 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011642 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011643 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011644 xmlDefaultSAXHandlerInit();
11645 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011646#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011647 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011648#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011649#ifdef LIBXML_HTML_ENABLED
11650 htmlInitAutoClose();
11651 htmlDefaultSAXHandlerInit();
11652#endif
11653#ifdef LIBXML_XPATH_ENABLED
11654 xmlXPathInit();
11655#endif
11656 xmlParserInitialized = 1;
11657}
11658
11659/**
11660 * xmlCleanupParser:
11661 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011662 * Cleanup function for the XML library. It tries to reclaim all
11663 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011664 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011665 * function should not prevent reusing the library but one should
11666 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011667 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011668 */
11669
11670void
11671xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011672 if (!xmlParserInitialized)
11673 return;
11674
Owen Taylor3473f882001-02-23 17:55:21 +000011675 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011676#ifdef LIBXML_CATALOG_ENABLED
11677 xmlCatalogCleanup();
11678#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011679 xmlCleanupInputCallbacks();
11680#ifdef LIBXML_OUTPUT_ENABLED
11681 xmlCleanupOutputCallbacks();
11682#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011683 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011684 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011685 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011686 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011687 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011688}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011689
11690/************************************************************************
11691 * *
11692 * New set (2.6.0) of simpler and more flexible APIs *
11693 * *
11694 ************************************************************************/
11695
11696/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011697 * DICT_FREE:
11698 * @str: a string
11699 *
11700 * Free a string if it is not owned by the "dict" dictionnary in the
11701 * current scope
11702 */
11703#define DICT_FREE(str) \
11704 if ((str) && ((!dict) || \
11705 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11706 xmlFree((char *)(str));
11707
11708/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011709 * xmlCtxtReset:
11710 * @ctxt: an XML parser context
11711 *
11712 * Reset a parser context
11713 */
11714void
11715xmlCtxtReset(xmlParserCtxtPtr ctxt)
11716{
11717 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011718 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011719
11720 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11721 xmlFreeInputStream(input);
11722 }
11723 ctxt->inputNr = 0;
11724 ctxt->input = NULL;
11725
11726 ctxt->spaceNr = 0;
11727 ctxt->spaceTab[0] = -1;
11728 ctxt->space = &ctxt->spaceTab[0];
11729
11730
11731 ctxt->nodeNr = 0;
11732 ctxt->node = NULL;
11733
11734 ctxt->nameNr = 0;
11735 ctxt->name = NULL;
11736
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011737 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011738 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011739 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011740 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011741 DICT_FREE(ctxt->directory);
11742 ctxt->directory = NULL;
11743 DICT_FREE(ctxt->extSubURI);
11744 ctxt->extSubURI = NULL;
11745 DICT_FREE(ctxt->extSubSystem);
11746 ctxt->extSubSystem = NULL;
11747 if (ctxt->myDoc != NULL)
11748 xmlFreeDoc(ctxt->myDoc);
11749 ctxt->myDoc = NULL;
11750
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011751 ctxt->standalone = -1;
11752 ctxt->hasExternalSubset = 0;
11753 ctxt->hasPErefs = 0;
11754 ctxt->html = 0;
11755 ctxt->external = 0;
11756 ctxt->instate = XML_PARSER_START;
11757 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011758
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011759 ctxt->wellFormed = 1;
11760 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011761 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011762 ctxt->valid = 1;
11763 ctxt->vctxt.userData = ctxt;
11764 ctxt->vctxt.error = xmlParserValidityError;
11765 ctxt->vctxt.warning = xmlParserValidityWarning;
11766 ctxt->record_info = 0;
11767 ctxt->nbChars = 0;
11768 ctxt->checkIndex = 0;
11769 ctxt->inSubset = 0;
11770 ctxt->errNo = XML_ERR_OK;
11771 ctxt->depth = 0;
11772 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11773 ctxt->catalogs = NULL;
11774 xmlInitNodeInfoSeq(&ctxt->node_seq);
11775
11776 if (ctxt->attsDefault != NULL) {
11777 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11778 ctxt->attsDefault = NULL;
11779 }
11780 if (ctxt->attsSpecial != NULL) {
11781 xmlHashFree(ctxt->attsSpecial, NULL);
11782 ctxt->attsSpecial = NULL;
11783 }
11784
Daniel Veillard4432df22003-09-28 18:58:27 +000011785#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011786 if (ctxt->catalogs != NULL)
11787 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011788#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011789 if (ctxt->lastError.code != XML_ERR_OK)
11790 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011791}
11792
11793/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011794 * xmlCtxtResetPush:
11795 * @ctxt: an XML parser context
11796 * @chunk: a pointer to an array of chars
11797 * @size: number of chars in the array
11798 * @filename: an optional file name or URI
11799 * @encoding: the document encoding, or NULL
11800 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011801 * Reset a push parser context
11802 *
11803 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011804 */
11805int
11806xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11807 int size, const char *filename, const char *encoding)
11808{
11809 xmlParserInputPtr inputStream;
11810 xmlParserInputBufferPtr buf;
11811 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11812
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011813 if (ctxt == NULL)
11814 return(1);
11815
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011816 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11817 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11818
11819 buf = xmlAllocParserInputBuffer(enc);
11820 if (buf == NULL)
11821 return(1);
11822
11823 if (ctxt == NULL) {
11824 xmlFreeParserInputBuffer(buf);
11825 return(1);
11826 }
11827
11828 xmlCtxtReset(ctxt);
11829
11830 if (ctxt->pushTab == NULL) {
11831 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11832 sizeof(xmlChar *));
11833 if (ctxt->pushTab == NULL) {
11834 xmlErrMemory(ctxt, NULL);
11835 xmlFreeParserInputBuffer(buf);
11836 return(1);
11837 }
11838 }
11839
11840 if (filename == NULL) {
11841 ctxt->directory = NULL;
11842 } else {
11843 ctxt->directory = xmlParserGetDirectory(filename);
11844 }
11845
11846 inputStream = xmlNewInputStream(ctxt);
11847 if (inputStream == NULL) {
11848 xmlFreeParserInputBuffer(buf);
11849 return(1);
11850 }
11851
11852 if (filename == NULL)
11853 inputStream->filename = NULL;
11854 else
11855 inputStream->filename = (char *)
11856 xmlCanonicPath((const xmlChar *) filename);
11857 inputStream->buf = buf;
11858 inputStream->base = inputStream->buf->buffer->content;
11859 inputStream->cur = inputStream->buf->buffer->content;
11860 inputStream->end =
11861 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11862
11863 inputPush(ctxt, inputStream);
11864
11865 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11866 (ctxt->input->buf != NULL)) {
11867 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11868 int cur = ctxt->input->cur - ctxt->input->base;
11869
11870 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11871
11872 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11873 ctxt->input->cur = ctxt->input->base + cur;
11874 ctxt->input->end =
11875 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11876 use];
11877#ifdef DEBUG_PUSH
11878 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11879#endif
11880 }
11881
11882 if (encoding != NULL) {
11883 xmlCharEncodingHandlerPtr hdlr;
11884
11885 hdlr = xmlFindCharEncodingHandler(encoding);
11886 if (hdlr != NULL) {
11887 xmlSwitchToEncoding(ctxt, hdlr);
11888 } else {
11889 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11890 "Unsupported encoding %s\n", BAD_CAST encoding);
11891 }
11892 } else if (enc != XML_CHAR_ENCODING_NONE) {
11893 xmlSwitchEncoding(ctxt, enc);
11894 }
11895
11896 return(0);
11897}
11898
11899/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011900 * xmlCtxtUseOptions:
11901 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011902 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011903 *
11904 * Applies the options to the parser context
11905 *
11906 * Returns 0 in case of success, the set of unknown or unimplemented options
11907 * in case of error.
11908 */
11909int
11910xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11911{
11912 if (options & XML_PARSE_RECOVER) {
11913 ctxt->recovery = 1;
11914 options -= XML_PARSE_RECOVER;
11915 } else
11916 ctxt->recovery = 0;
11917 if (options & XML_PARSE_DTDLOAD) {
11918 ctxt->loadsubset = XML_DETECT_IDS;
11919 options -= XML_PARSE_DTDLOAD;
11920 } else
11921 ctxt->loadsubset = 0;
11922 if (options & XML_PARSE_DTDATTR) {
11923 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11924 options -= XML_PARSE_DTDATTR;
11925 }
11926 if (options & XML_PARSE_NOENT) {
11927 ctxt->replaceEntities = 1;
11928 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11929 options -= XML_PARSE_NOENT;
11930 } else
11931 ctxt->replaceEntities = 0;
11932 if (options & XML_PARSE_NOWARNING) {
11933 ctxt->sax->warning = NULL;
11934 options -= XML_PARSE_NOWARNING;
11935 }
11936 if (options & XML_PARSE_NOERROR) {
11937 ctxt->sax->error = NULL;
11938 ctxt->sax->fatalError = NULL;
11939 options -= XML_PARSE_NOERROR;
11940 }
11941 if (options & XML_PARSE_PEDANTIC) {
11942 ctxt->pedantic = 1;
11943 options -= XML_PARSE_PEDANTIC;
11944 } else
11945 ctxt->pedantic = 0;
11946 if (options & XML_PARSE_NOBLANKS) {
11947 ctxt->keepBlanks = 0;
11948 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11949 options -= XML_PARSE_NOBLANKS;
11950 } else
11951 ctxt->keepBlanks = 1;
11952 if (options & XML_PARSE_DTDVALID) {
11953 ctxt->validate = 1;
11954 if (options & XML_PARSE_NOWARNING)
11955 ctxt->vctxt.warning = NULL;
11956 if (options & XML_PARSE_NOERROR)
11957 ctxt->vctxt.error = NULL;
11958 options -= XML_PARSE_DTDVALID;
11959 } else
11960 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011961#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011962 if (options & XML_PARSE_SAX1) {
11963 ctxt->sax->startElement = xmlSAX2StartElement;
11964 ctxt->sax->endElement = xmlSAX2EndElement;
11965 ctxt->sax->startElementNs = NULL;
11966 ctxt->sax->endElementNs = NULL;
11967 ctxt->sax->initialized = 1;
11968 options -= XML_PARSE_SAX1;
11969 }
Daniel Veillard81273902003-09-30 00:43:48 +000011970#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011971 if (options & XML_PARSE_NODICT) {
11972 ctxt->dictNames = 0;
11973 options -= XML_PARSE_NODICT;
11974 } else {
11975 ctxt->dictNames = 1;
11976 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000011977 if (options & XML_PARSE_NOCDATA) {
11978 ctxt->sax->cdataBlock = NULL;
11979 options -= XML_PARSE_NOCDATA;
11980 }
11981 if (options & XML_PARSE_NSCLEAN) {
11982 ctxt->options |= XML_PARSE_NSCLEAN;
11983 options -= XML_PARSE_NSCLEAN;
11984 }
Daniel Veillard61b93382003-11-03 14:28:31 +000011985 if (options & XML_PARSE_NONET) {
11986 ctxt->options |= XML_PARSE_NONET;
11987 options -= XML_PARSE_NONET;
11988 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000011989 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011990 return (options);
11991}
11992
11993/**
11994 * xmlDoRead:
11995 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000011996 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011997 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011998 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011999 * @reuse: keep the context for reuse
12000 *
12001 * Common front-end for the xmlRead functions
12002 *
12003 * Returns the resulting document tree or NULL
12004 */
12005static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012006xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12007 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012008{
12009 xmlDocPtr ret;
12010
12011 xmlCtxtUseOptions(ctxt, options);
12012 if (encoding != NULL) {
12013 xmlCharEncodingHandlerPtr hdlr;
12014
12015 hdlr = xmlFindCharEncodingHandler(encoding);
12016 if (hdlr != NULL)
12017 xmlSwitchToEncoding(ctxt, hdlr);
12018 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012019 if ((URL != NULL) && (ctxt->input != NULL) &&
12020 (ctxt->input->filename == NULL))
12021 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012022 xmlParseDocument(ctxt);
12023 if ((ctxt->wellFormed) || ctxt->recovery)
12024 ret = ctxt->myDoc;
12025 else {
12026 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012027 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012028 xmlFreeDoc(ctxt->myDoc);
12029 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012030 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012031 ctxt->myDoc = NULL;
12032 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012033 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012034 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012035
12036 return (ret);
12037}
12038
12039/**
12040 * xmlReadDoc:
12041 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012042 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012043 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012044 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012045 *
12046 * parse an XML in-memory document and build a tree.
12047 *
12048 * Returns the resulting document tree
12049 */
12050xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012051xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052{
12053 xmlParserCtxtPtr ctxt;
12054
12055 if (cur == NULL)
12056 return (NULL);
12057
12058 ctxt = xmlCreateDocParserCtxt(cur);
12059 if (ctxt == NULL)
12060 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012061 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012062}
12063
12064/**
12065 * xmlReadFile:
12066 * @filename: a file or URL
12067 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012068 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012069 *
12070 * parse an XML file from the filesystem or the network.
12071 *
12072 * Returns the resulting document tree
12073 */
12074xmlDocPtr
12075xmlReadFile(const char *filename, const char *encoding, int options)
12076{
12077 xmlParserCtxtPtr ctxt;
12078
Daniel Veillard61b93382003-11-03 14:28:31 +000012079 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012080 if (ctxt == NULL)
12081 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012082 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012083}
12084
12085/**
12086 * xmlReadMemory:
12087 * @buffer: a pointer to a char array
12088 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012089 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012090 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012091 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012092 *
12093 * parse an XML in-memory document and build a tree.
12094 *
12095 * Returns the resulting document tree
12096 */
12097xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012098xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012099{
12100 xmlParserCtxtPtr ctxt;
12101
12102 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12103 if (ctxt == NULL)
12104 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012105 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012106}
12107
12108/**
12109 * xmlReadFd:
12110 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012111 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012113 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012114 *
12115 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012116 * NOTE that the file descriptor will not be closed when the
12117 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012118 *
12119 * Returns the resulting document tree
12120 */
12121xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012122xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012123{
12124 xmlParserCtxtPtr ctxt;
12125 xmlParserInputBufferPtr input;
12126 xmlParserInputPtr stream;
12127
12128 if (fd < 0)
12129 return (NULL);
12130
12131 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12132 if (input == NULL)
12133 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012134 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012135 ctxt = xmlNewParserCtxt();
12136 if (ctxt == NULL) {
12137 xmlFreeParserInputBuffer(input);
12138 return (NULL);
12139 }
12140 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12141 if (stream == NULL) {
12142 xmlFreeParserInputBuffer(input);
12143 xmlFreeParserCtxt(ctxt);
12144 return (NULL);
12145 }
12146 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012147 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012148}
12149
12150/**
12151 * xmlReadIO:
12152 * @ioread: an I/O read function
12153 * @ioclose: an I/O close function
12154 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012155 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012156 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012157 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012158 *
12159 * parse an XML document from I/O functions and source and build a tree.
12160 *
12161 * Returns the resulting document tree
12162 */
12163xmlDocPtr
12164xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012165 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012166{
12167 xmlParserCtxtPtr ctxt;
12168 xmlParserInputBufferPtr input;
12169 xmlParserInputPtr stream;
12170
12171 if (ioread == NULL)
12172 return (NULL);
12173
12174 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12175 XML_CHAR_ENCODING_NONE);
12176 if (input == NULL)
12177 return (NULL);
12178 ctxt = xmlNewParserCtxt();
12179 if (ctxt == NULL) {
12180 xmlFreeParserInputBuffer(input);
12181 return (NULL);
12182 }
12183 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12184 if (stream == NULL) {
12185 xmlFreeParserInputBuffer(input);
12186 xmlFreeParserCtxt(ctxt);
12187 return (NULL);
12188 }
12189 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012190 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012191}
12192
12193/**
12194 * xmlCtxtReadDoc:
12195 * @ctxt: an XML parser context
12196 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012197 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012198 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012199 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012200 *
12201 * parse an XML in-memory document and build a tree.
12202 * This reuses the existing @ctxt parser context
12203 *
12204 * Returns the resulting document tree
12205 */
12206xmlDocPtr
12207xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012208 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012209{
12210 xmlParserInputPtr stream;
12211
12212 if (cur == NULL)
12213 return (NULL);
12214 if (ctxt == NULL)
12215 return (NULL);
12216
12217 xmlCtxtReset(ctxt);
12218
12219 stream = xmlNewStringInputStream(ctxt, cur);
12220 if (stream == NULL) {
12221 return (NULL);
12222 }
12223 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012224 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012225}
12226
12227/**
12228 * xmlCtxtReadFile:
12229 * @ctxt: an XML parser context
12230 * @filename: a file or URL
12231 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012232 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012233 *
12234 * parse an XML file from the filesystem or the network.
12235 * This reuses the existing @ctxt parser context
12236 *
12237 * Returns the resulting document tree
12238 */
12239xmlDocPtr
12240xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12241 const char *encoding, int options)
12242{
12243 xmlParserInputPtr stream;
12244
12245 if (filename == NULL)
12246 return (NULL);
12247 if (ctxt == NULL)
12248 return (NULL);
12249
12250 xmlCtxtReset(ctxt);
12251
12252 stream = xmlNewInputFromFile(ctxt, filename);
12253 if (stream == NULL) {
12254 return (NULL);
12255 }
12256 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012257 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258}
12259
12260/**
12261 * xmlCtxtReadMemory:
12262 * @ctxt: an XML parser context
12263 * @buffer: a pointer to a char array
12264 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012266 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012267 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012268 *
12269 * parse an XML in-memory document and build a tree.
12270 * This reuses the existing @ctxt parser context
12271 *
12272 * Returns the resulting document tree
12273 */
12274xmlDocPtr
12275xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012276 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012277{
12278 xmlParserInputBufferPtr input;
12279 xmlParserInputPtr stream;
12280
12281 if (ctxt == NULL)
12282 return (NULL);
12283 if (buffer == NULL)
12284 return (NULL);
12285
12286 xmlCtxtReset(ctxt);
12287
12288 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12289 if (input == NULL) {
12290 return(NULL);
12291 }
12292
12293 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12294 if (stream == NULL) {
12295 xmlFreeParserInputBuffer(input);
12296 return(NULL);
12297 }
12298
12299 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012300 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012301}
12302
12303/**
12304 * xmlCtxtReadFd:
12305 * @ctxt: an XML parser context
12306 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012307 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012308 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012309 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012310 *
12311 * parse an XML from a file descriptor and build a tree.
12312 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012313 * NOTE that the file descriptor will not be closed when the
12314 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012315 *
12316 * Returns the resulting document tree
12317 */
12318xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012319xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12320 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012321{
12322 xmlParserInputBufferPtr input;
12323 xmlParserInputPtr stream;
12324
12325 if (fd < 0)
12326 return (NULL);
12327 if (ctxt == NULL)
12328 return (NULL);
12329
12330 xmlCtxtReset(ctxt);
12331
12332
12333 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12334 if (input == NULL)
12335 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012336 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012337 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12338 if (stream == NULL) {
12339 xmlFreeParserInputBuffer(input);
12340 return (NULL);
12341 }
12342 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012343 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012344}
12345
12346/**
12347 * xmlCtxtReadIO:
12348 * @ctxt: an XML parser context
12349 * @ioread: an I/O read function
12350 * @ioclose: an I/O close function
12351 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012352 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012353 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012354 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012355 *
12356 * parse an XML document from I/O functions and source and build a tree.
12357 * This reuses the existing @ctxt parser context
12358 *
12359 * Returns the resulting document tree
12360 */
12361xmlDocPtr
12362xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12363 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012364 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012365 const char *encoding, int options)
12366{
12367 xmlParserInputBufferPtr input;
12368 xmlParserInputPtr stream;
12369
12370 if (ioread == NULL)
12371 return (NULL);
12372 if (ctxt == NULL)
12373 return (NULL);
12374
12375 xmlCtxtReset(ctxt);
12376
12377 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12378 XML_CHAR_ENCODING_NONE);
12379 if (input == NULL)
12380 return (NULL);
12381 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12382 if (stream == NULL) {
12383 xmlFreeParserInputBuffer(input);
12384 return (NULL);
12385 }
12386 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012387 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012388}