blob: a5fd0114b09f9c81545875d284a79a1ad1ec2204 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
609 (ctxt->str_xml_ns == NULL)) {
610 xmlErrMemory(ctxt, NULL);
611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000612}
613
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614typedef struct _xmlDefAttrs xmlDefAttrs;
615typedef xmlDefAttrs *xmlDefAttrsPtr;
616struct _xmlDefAttrs {
617 int nbAttrs; /* number of defaulted attributes on that element */
618 int maxAttrs; /* the size of the array */
619 const xmlChar *values[4]; /* array of localname/prefix/values */
620};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000621
622/**
623 * xmlAddDefAttrs:
624 * @ctxt: an XML parser context
625 * @fullname: the element fullname
626 * @fullattr: the attribute fullname
627 * @value: the attribute value
628 *
629 * Add a defaulted attribute for an element
630 */
631static void
632xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
633 const xmlChar *fullname,
634 const xmlChar *fullattr,
635 const xmlChar *value) {
636 xmlDefAttrsPtr defaults;
637 int len;
638 const xmlChar *name;
639 const xmlChar *prefix;
640
641 if (ctxt->attsDefault == NULL) {
642 ctxt->attsDefault = xmlHashCreate(10);
643 if (ctxt->attsDefault == NULL)
644 goto mem_error;
645 }
646
647 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000648 * split the element name into prefix:localname , the string found
649 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000650 */
651 name = xmlSplitQName3(fullname, &len);
652 if (name == NULL) {
653 name = xmlDictLookup(ctxt->dict, fullname, -1);
654 prefix = NULL;
655 } else {
656 name = xmlDictLookup(ctxt->dict, name, -1);
657 prefix = xmlDictLookup(ctxt->dict, fullname, len);
658 }
659
660 /*
661 * make sure there is some storage
662 */
663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
664 if (defaults == NULL) {
665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000666 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000667 if (defaults == NULL)
668 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000670 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
672 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 xmlDefAttrsPtr temp;
674
675 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000676 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000677 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 defaults->maxAttrs *= 2;
681 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
682 }
683
684 /*
685 * plit the element name into prefix:localname , the string found
686 * are within the DTD and hen not associated to namespace names.
687 */
688 name = xmlSplitQName3(fullattr, &len);
689 if (name == NULL) {
690 name = xmlDictLookup(ctxt->dict, fullattr, -1);
691 prefix = NULL;
692 } else {
693 name = xmlDictLookup(ctxt->dict, name, -1);
694 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
695 }
696
697 defaults->values[4 * defaults->nbAttrs] = name;
698 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
699 /* intern the string and precompute the end */
700 len = xmlStrlen(value);
701 value = xmlDictLookup(ctxt->dict, value, len);
702 defaults->values[4 * defaults->nbAttrs + 2] = value;
703 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
704 defaults->nbAttrs++;
705
706 return;
707
708mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000709 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000710 return;
711}
712
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000713/**
714 * xmlAddSpecialAttr:
715 * @ctxt: an XML parser context
716 * @fullname: the element fullname
717 * @fullattr: the attribute fullname
718 * @type: the attribute type
719 *
720 * Register that this attribute is not CDATA
721 */
722static void
723xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
724 const xmlChar *fullname,
725 const xmlChar *fullattr,
726 int type)
727{
728 if (ctxt->attsSpecial == NULL) {
729 ctxt->attsSpecial = xmlHashCreate(10);
730 if (ctxt->attsSpecial == NULL)
731 goto mem_error;
732 }
733
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000734 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
735 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000736 return;
737
738mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000739 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000740 return;
741}
742
Daniel Veillard4432df22003-09-28 18:58:27 +0000743/**
744 * xmlCheckLanguageID:
745 * @lang: pointer to the string value
746 *
747 * Checks that the value conforms to the LanguageID production:
748 *
749 * NOTE: this is somewhat deprecated, those productions were removed from
750 * the XML Second edition.
751 *
752 * [33] LanguageID ::= Langcode ('-' Subcode)*
753 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
754 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
755 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
756 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
757 * [38] Subcode ::= ([a-z] | [A-Z])+
758 *
759 * Returns 1 if correct 0 otherwise
760 **/
761int
762xmlCheckLanguageID(const xmlChar * lang)
763{
764 const xmlChar *cur = lang;
765
766 if (cur == NULL)
767 return (0);
768 if (((cur[0] == 'i') && (cur[1] == '-')) ||
769 ((cur[0] == 'I') && (cur[1] == '-'))) {
770 /*
771 * IANA code
772 */
773 cur += 2;
774 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
775 ((cur[0] >= 'a') && (cur[0] <= 'z')))
776 cur++;
777 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
778 ((cur[0] == 'X') && (cur[1] == '-'))) {
779 /*
780 * User code
781 */
782 cur += 2;
783 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
784 ((cur[0] >= 'a') && (cur[0] <= 'z')))
785 cur++;
786 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
787 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
788 /*
789 * ISO639
790 */
791 cur++;
792 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
793 ((cur[0] >= 'a') && (cur[0] <= 'z')))
794 cur++;
795 else
796 return (0);
797 } else
798 return (0);
799 while (cur[0] != 0) { /* non input consuming */
800 if (cur[0] != '-')
801 return (0);
802 cur++;
803 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
804 ((cur[0] >= 'a') && (cur[0] <= 'z')))
805 cur++;
806 else
807 return (0);
808 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
809 ((cur[0] >= 'a') && (cur[0] <= 'z')))
810 cur++;
811 }
812 return (1);
813}
814
Owen Taylor3473f882001-02-23 17:55:21 +0000815/************************************************************************
816 * *
817 * Parser stacks related functions and macros *
818 * *
819 ************************************************************************/
820
821xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
822 const xmlChar ** str);
823
Daniel Veillard0fb18932003-09-07 09:14:37 +0000824#ifdef SAX2
825/**
826 * nsPush:
827 * @ctxt: an XML parser context
828 * @prefix: the namespace prefix or NULL
829 * @URL: the namespace name
830 *
831 * Pushes a new parser namespace on top of the ns stack
832 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000833 * Returns -1 in case of error, -2 if the namespace should be discarded
834 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000835 */
836static int
837nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
838{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000839 if (ctxt->options & XML_PARSE_NSCLEAN) {
840 int i;
841 for (i = 0;i < ctxt->nsNr;i += 2) {
842 if (ctxt->nsTab[i] == prefix) {
843 /* in scope */
844 if (ctxt->nsTab[i + 1] == URL)
845 return(-2);
846 /* out of scope keep it */
847 break;
848 }
849 }
850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
852 ctxt->nsMax = 10;
853 ctxt->nsNr = 0;
854 ctxt->nsTab = (const xmlChar **)
855 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
856 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000857 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax = 0;
859 return (-1);
860 }
861 } else if (ctxt->nsNr >= ctxt->nsMax) {
862 ctxt->nsMax *= 2;
863 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000864 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
866 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000867 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax /= 2;
869 return (-1);
870 }
871 }
872 ctxt->nsTab[ctxt->nsNr++] = prefix;
873 ctxt->nsTab[ctxt->nsNr++] = URL;
874 return (ctxt->nsNr);
875}
876/**
877 * nsPop:
878 * @ctxt: an XML parser context
879 * @nr: the number to pop
880 *
881 * Pops the top @nr parser prefix/namespace from the ns stack
882 *
883 * Returns the number of namespaces removed
884 */
885static int
886nsPop(xmlParserCtxtPtr ctxt, int nr)
887{
888 int i;
889
890 if (ctxt->nsTab == NULL) return(0);
891 if (ctxt->nsNr < nr) {
892 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
893 nr = ctxt->nsNr;
894 }
895 if (ctxt->nsNr <= 0)
896 return (0);
897
898 for (i = 0;i < nr;i++) {
899 ctxt->nsNr--;
900 ctxt->nsTab[ctxt->nsNr] = NULL;
901 }
902 return(nr);
903}
904#endif
905
906static int
907xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
908 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 int maxatts;
911
912 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000913 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 atts = (const xmlChar **)
915 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
919 if (attallocs == NULL) goto mem_error;
920 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000921 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000922 } else if (nr + 5 > ctxt->maxatts) {
923 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
925 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
929 (maxatts / 5) * sizeof(int));
930 if (attallocs == NULL) goto mem_error;
931 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000932 ctxt->maxatts = maxatts;
933 }
934 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000935mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000936 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000938}
939
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000940/**
941 * inputPush:
942 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000944 *
945 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 *
947 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000948 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000949int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000950inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
951{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000952 if ((ctxt == NULL) || (value == NULL))
953 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000954 if (ctxt->inputNr >= ctxt->inputMax) {
955 ctxt->inputMax *= 2;
956 ctxt->inputTab =
957 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
958 ctxt->inputMax *
959 sizeof(ctxt->inputTab[0]));
960 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000961 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962 return (0);
963 }
964 }
965 ctxt->inputTab[ctxt->inputNr] = value;
966 ctxt->input = value;
967 return (ctxt->inputNr++);
968}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000969/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000970 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971 * @ctxt: an XML parser context
972 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000977xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978inputPop(xmlParserCtxtPtr ctxt)
979{
980 xmlParserInputPtr ret;
981
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000982 if (ctxt == NULL)
983 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000984 if (ctxt->inputNr <= 0)
985 return (0);
986 ctxt->inputNr--;
987 if (ctxt->inputNr > 0)
988 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
989 else
990 ctxt->input = NULL;
991 ret = ctxt->inputTab[ctxt->inputNr];
992 ctxt->inputTab[ctxt->inputNr] = 0;
993 return (ret);
994}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000995/**
996 * nodePush:
997 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000998 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000999 *
1000 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 *
1002 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001003 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001004int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001005nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1006{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001007 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001008 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001009 xmlNodePtr *tmp;
1010
1011 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1012 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001013 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001014 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001015 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001016 return (0);
1017 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001018 ctxt->nodeTab = tmp;
1019 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001020 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001021 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001022 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1024 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001025 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001026 return(0);
1027 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001028 ctxt->nodeTab[ctxt->nodeNr] = value;
1029 ctxt->node = value;
1030 return (ctxt->nodeNr++);
1031}
1032/**
1033 * nodePop:
1034 * @ctxt: an XML parser context
1035 *
1036 * Pops the top element node from the node stack
1037 *
1038 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001039 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001040xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001041nodePop(xmlParserCtxtPtr ctxt)
1042{
1043 xmlNodePtr ret;
1044
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001045 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001046 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 ctxt->nodeNr--;
1049 if (ctxt->nodeNr > 0)
1050 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1051 else
1052 ctxt->node = NULL;
1053 ret = ctxt->nodeTab[ctxt->nodeNr];
1054 ctxt->nodeTab[ctxt->nodeNr] = 0;
1055 return (ret);
1056}
Daniel Veillarda2351322004-06-27 12:08:10 +00001057
1058#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001059/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001060 * nameNsPush:
1061 * @ctxt: an XML parser context
1062 * @value: the element name
1063 * @prefix: the element prefix
1064 * @URI: the element namespace name
1065 *
1066 * Pushes a new element name/prefix/URL on top of the name stack
1067 *
1068 * Returns -1 in case of error, the index in the stack otherwise
1069 */
1070static int
1071nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1072 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1073{
1074 if (ctxt->nameNr >= ctxt->nameMax) {
1075 const xmlChar * *tmp;
1076 void **tmp2;
1077 ctxt->nameMax *= 2;
1078 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1079 ctxt->nameMax *
1080 sizeof(ctxt->nameTab[0]));
1081 if (tmp == NULL) {
1082 ctxt->nameMax /= 2;
1083 goto mem_error;
1084 }
1085 ctxt->nameTab = tmp;
1086 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1087 ctxt->nameMax * 3 *
1088 sizeof(ctxt->pushTab[0]));
1089 if (tmp2 == NULL) {
1090 ctxt->nameMax /= 2;
1091 goto mem_error;
1092 }
1093 ctxt->pushTab = tmp2;
1094 }
1095 ctxt->nameTab[ctxt->nameNr] = value;
1096 ctxt->name = value;
1097 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1098 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001099 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001100 return (ctxt->nameNr++);
1101mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001102 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 return (-1);
1104}
1105/**
1106 * nameNsPop:
1107 * @ctxt: an XML parser context
1108 *
1109 * Pops the top element/prefix/URI name from the name stack
1110 *
1111 * Returns the name just removed
1112 */
1113static const xmlChar *
1114nameNsPop(xmlParserCtxtPtr ctxt)
1115{
1116 const xmlChar *ret;
1117
1118 if (ctxt->nameNr <= 0)
1119 return (0);
1120 ctxt->nameNr--;
1121 if (ctxt->nameNr > 0)
1122 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1123 else
1124 ctxt->name = NULL;
1125 ret = ctxt->nameTab[ctxt->nameNr];
1126 ctxt->nameTab[ctxt->nameNr] = NULL;
1127 return (ret);
1128}
Daniel Veillarda2351322004-06-27 12:08:10 +00001129#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130
1131/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001132 * namePush:
1133 * @ctxt: an XML parser context
1134 * @value: the element name
1135 *
1136 * Pushes a new element name on top of the name stack
1137 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001138 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001139 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001140int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001141namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001142{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001143 if (ctxt == NULL) return (-1);
1144
Daniel Veillard1c732d22002-11-30 11:22:59 +00001145 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *
1150 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001151 if (tmp == NULL) {
1152 ctxt->nameMax /= 2;
1153 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001154 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001155 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
1157 ctxt->nameTab[ctxt->nameNr] = value;
1158 ctxt->name = value;
1159 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001161 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001163}
1164/**
1165 * namePop:
1166 * @ctxt: an XML parser context
1167 *
1168 * Pops the top element name from the name stack
1169 *
1170 * Returns the name just removed
1171 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001172const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001173namePop(xmlParserCtxtPtr ctxt)
1174{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001175 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001176
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001177 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1178 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001179 ctxt->nameNr--;
1180 if (ctxt->nameNr > 0)
1181 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1182 else
1183 ctxt->name = NULL;
1184 ret = ctxt->nameTab[ctxt->nameNr];
1185 ctxt->nameTab[ctxt->nameNr] = 0;
1186 return (ret);
1187}
Owen Taylor3473f882001-02-23 17:55:21 +00001188
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001189static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001190 if (ctxt->spaceNr >= ctxt->spaceMax) {
1191 ctxt->spaceMax *= 2;
1192 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1193 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1194 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001195 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001196 return(0);
1197 }
1198 }
1199 ctxt->spaceTab[ctxt->spaceNr] = val;
1200 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1201 return(ctxt->spaceNr++);
1202}
1203
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001204static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001205 int ret;
1206 if (ctxt->spaceNr <= 0) return(0);
1207 ctxt->spaceNr--;
1208 if (ctxt->spaceNr > 0)
1209 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1210 else
1211 ctxt->space = NULL;
1212 ret = ctxt->spaceTab[ctxt->spaceNr];
1213 ctxt->spaceTab[ctxt->spaceNr] = -1;
1214 return(ret);
1215}
1216
1217/*
1218 * Macros for accessing the content. Those should be used only by the parser,
1219 * and not exported.
1220 *
1221 * Dirty macros, i.e. one often need to make assumption on the context to
1222 * use them
1223 *
1224 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1225 * To be used with extreme caution since operations consuming
1226 * characters may move the input buffer to a different location !
1227 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1228 * This should be used internally by the parser
1229 * only to compare to ASCII values otherwise it would break when
1230 * running with UTF-8 encoding.
1231 * RAW same as CUR but in the input buffer, bypass any token
1232 * extraction that may have been done
1233 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1234 * to compare on ASCII based substring.
1235 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001236 * strings without newlines within the parser.
1237 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1238 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001239 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1240 *
1241 * NEXT Skip to the next character, this does the proper decoding
1242 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001243 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001244 * CUR_CHAR(l) returns the current unicode character (int), set l
1245 * to the number of xmlChars used for the encoding [0-5].
1246 * CUR_SCHAR same but operate on a string instead of the context
1247 * COPY_BUF copy the current unicode char to the target buffer, increment
1248 * the index
1249 * GROW, SHRINK handling of input buffers
1250 */
1251
Daniel Veillardfdc91562002-07-01 21:52:03 +00001252#define RAW (*ctxt->input->cur)
1253#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001254#define NXT(val) ctxt->input->cur[(val)]
1255#define CUR_PTR ctxt->input->cur
1256
Daniel Veillarda07050d2003-10-19 14:46:32 +00001257#define CMP4( s, c1, c2, c3, c4 ) \
1258 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1259 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1260#define CMP5( s, c1, c2, c3, c4, c5 ) \
1261 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1262#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1263 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1264#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1265 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1266#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1267 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1268#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1269 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1270 ((unsigned char *) s)[ 8 ] == c9 )
1271#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1272 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1273 ((unsigned char *) s)[ 9 ] == c10 )
1274
Owen Taylor3473f882001-02-23 17:55:21 +00001275#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001276 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001277 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001278 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1280 xmlPopInput(ctxt); \
1281 } while (0)
1282
Daniel Veillard0b787f32004-03-26 17:29:53 +00001283#define SKIPL(val) do { \
1284 int skipl; \
1285 for(skipl=0; skipl<val; skipl++) { \
1286 if (*(ctxt->input->cur) == '\n') { \
1287 ctxt->input->line++; ctxt->input->col = 1; \
1288 } else ctxt->input->col++; \
1289 ctxt->nbChars++; \
1290 ctxt->input->cur++; \
1291 } \
1292 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1293 if ((*ctxt->input->cur == 0) && \
1294 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1295 xmlPopInput(ctxt); \
1296 } while (0)
1297
Daniel Veillarda880b122003-04-21 21:36:41 +00001298#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001299 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1300 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001301 xmlSHRINK (ctxt);
1302
1303static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1304 xmlParserInputShrink(ctxt->input);
1305 if ((*ctxt->input->cur == 0) &&
1306 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1307 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001308 }
Owen Taylor3473f882001-02-23 17:55:21 +00001309
Daniel Veillarda880b122003-04-21 21:36:41 +00001310#define GROW if ((ctxt->progressive == 0) && \
1311 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001312 xmlGROW (ctxt);
1313
1314static void xmlGROW (xmlParserCtxtPtr ctxt) {
1315 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1316 if ((*ctxt->input->cur == 0) &&
1317 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1318 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001319}
Owen Taylor3473f882001-02-23 17:55:21 +00001320
1321#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1322
1323#define NEXT xmlNextChar(ctxt)
1324
Daniel Veillard21a0f912001-02-25 19:54:14 +00001325#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001326 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327 ctxt->input->cur++; \
1328 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001329 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001330 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1331 }
1332
Owen Taylor3473f882001-02-23 17:55:21 +00001333#define NEXTL(l) do { \
1334 if (*(ctxt->input->cur) == '\n') { \
1335 ctxt->input->line++; ctxt->input->col = 1; \
1336 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001337 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001338 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001339 } while (0)
1340
1341#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1342#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1343
1344#define COPY_BUF(l,b,i,v) \
1345 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001346 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001347
1348/**
1349 * xmlSkipBlankChars:
1350 * @ctxt: the XML parser context
1351 *
1352 * skip all blanks character found at that point in the input streams.
1353 * It pops up finished entities in the process if allowable at that point.
1354 *
1355 * Returns the number of space chars skipped
1356 */
1357
1358int
1359xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001360 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001361
1362 /*
1363 * It's Okay to use CUR/NEXT here since all the blanks are on
1364 * the ASCII range.
1365 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001366 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1367 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001368 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001369 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001370 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001372 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 if (*cur == '\n') {
1374 ctxt->input->line++; ctxt->input->col = 1;
1375 }
1376 cur++;
1377 res++;
1378 if (*cur == 0) {
1379 ctxt->input->cur = cur;
1380 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1381 cur = ctxt->input->cur;
1382 }
1383 }
1384 ctxt->input->cur = cur;
1385 } else {
1386 int cur;
1387 do {
1388 cur = CUR;
1389 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1390 NEXT;
1391 cur = CUR;
1392 res++;
1393 }
1394 while ((cur == 0) && (ctxt->inputNr > 1) &&
1395 (ctxt->instate != XML_PARSER_COMMENT)) {
1396 xmlPopInput(ctxt);
1397 cur = CUR;
1398 }
1399 /*
1400 * Need to handle support of entities branching here
1401 */
1402 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1403 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1404 }
Owen Taylor3473f882001-02-23 17:55:21 +00001405 return(res);
1406}
1407
1408/************************************************************************
1409 * *
1410 * Commodity functions to handle entities *
1411 * *
1412 ************************************************************************/
1413
1414/**
1415 * xmlPopInput:
1416 * @ctxt: an XML parser context
1417 *
1418 * xmlPopInput: the current input pointed by ctxt->input came to an end
1419 * pop it and return the next char.
1420 *
1421 * Returns the current xmlChar in the parser context
1422 */
1423xmlChar
1424xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001425 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001426 if (xmlParserDebugEntities)
1427 xmlGenericError(xmlGenericErrorContext,
1428 "Popping input %d\n", ctxt->inputNr);
1429 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001430 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001431 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1432 return(xmlPopInput(ctxt));
1433 return(CUR);
1434}
1435
1436/**
1437 * xmlPushInput:
1438 * @ctxt: an XML parser context
1439 * @input: an XML parser input fragment (entity, XML fragment ...).
1440 *
1441 * xmlPushInput: switch to a new input stream which is stacked on top
1442 * of the previous one(s).
1443 */
1444void
1445xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1446 if (input == NULL) return;
1447
1448 if (xmlParserDebugEntities) {
1449 if ((ctxt->input != NULL) && (ctxt->input->filename))
1450 xmlGenericError(xmlGenericErrorContext,
1451 "%s(%d): ", ctxt->input->filename,
1452 ctxt->input->line);
1453 xmlGenericError(xmlGenericErrorContext,
1454 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1455 }
1456 inputPush(ctxt, input);
1457 GROW;
1458}
1459
1460/**
1461 * xmlParseCharRef:
1462 * @ctxt: an XML parser context
1463 *
1464 * parse Reference declarations
1465 *
1466 * [66] CharRef ::= '&#' [0-9]+ ';' |
1467 * '&#x' [0-9a-fA-F]+ ';'
1468 *
1469 * [ WFC: Legal Character ]
1470 * Characters referred to using character references must match the
1471 * production for Char.
1472 *
1473 * Returns the value parsed (as an int), 0 in case of error
1474 */
1475int
1476xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001477 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001478 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001479 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480
Owen Taylor3473f882001-02-23 17:55:21 +00001481 /*
1482 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1483 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001484 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001485 (NXT(2) == 'x')) {
1486 SKIP(3);
1487 GROW;
1488 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001489 if (count++ > 20) {
1490 count = 0;
1491 GROW;
1492 }
1493 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001494 val = val * 16 + (CUR - '0');
1495 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1496 val = val * 16 + (CUR - 'a') + 10;
1497 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1498 val = val * 16 + (CUR - 'A') + 10;
1499 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001500 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001501 val = 0;
1502 break;
1503 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001504 if (val > 0x10FFFF)
1505 outofrange = val;
1506
Owen Taylor3473f882001-02-23 17:55:21 +00001507 NEXT;
1508 count++;
1509 }
1510 if (RAW == ';') {
1511 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001512 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001513 ctxt->nbChars ++;
1514 ctxt->input->cur++;
1515 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001516 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001517 SKIP(2);
1518 GROW;
1519 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001520 if (count++ > 20) {
1521 count = 0;
1522 GROW;
1523 }
1524 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001525 val = val * 10 + (CUR - '0');
1526 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001527 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = 0;
1529 break;
1530 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001531 if (val > 0x10FFFF)
1532 outofrange = val;
1533
Owen Taylor3473f882001-02-23 17:55:21 +00001534 NEXT;
1535 count++;
1536 }
1537 if (RAW == ';') {
1538 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001539 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001540 ctxt->nbChars ++;
1541 ctxt->input->cur++;
1542 }
1543 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001544 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001545 }
1546
1547 /*
1548 * [ WFC: Legal Character ]
1549 * Characters referred to using character references must match the
1550 * production for Char.
1551 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001552 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001553 return(val);
1554 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001555 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1556 "xmlParseCharRef: invalid xmlChar value %d\n",
1557 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001558 }
1559 return(0);
1560}
1561
1562/**
1563 * xmlParseStringCharRef:
1564 * @ctxt: an XML parser context
1565 * @str: a pointer to an index in the string
1566 *
1567 * parse Reference declarations, variant parsing from a string rather
1568 * than an an input flow.
1569 *
1570 * [66] CharRef ::= '&#' [0-9]+ ';' |
1571 * '&#x' [0-9a-fA-F]+ ';'
1572 *
1573 * [ WFC: Legal Character ]
1574 * Characters referred to using character references must match the
1575 * production for Char.
1576 *
1577 * Returns the value parsed (as an int), 0 in case of error, str will be
1578 * updated to the current value of the index
1579 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001580static int
Owen Taylor3473f882001-02-23 17:55:21 +00001581xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1582 const xmlChar *ptr;
1583 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001584 unsigned int val = 0;
1585 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001586
1587 if ((str == NULL) || (*str == NULL)) return(0);
1588 ptr = *str;
1589 cur = *ptr;
1590 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1591 ptr += 3;
1592 cur = *ptr;
1593 while (cur != ';') { /* Non input consuming loop */
1594 if ((cur >= '0') && (cur <= '9'))
1595 val = val * 16 + (cur - '0');
1596 else if ((cur >= 'a') && (cur <= 'f'))
1597 val = val * 16 + (cur - 'a') + 10;
1598 else if ((cur >= 'A') && (cur <= 'F'))
1599 val = val * 16 + (cur - 'A') + 10;
1600 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001601 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001602 val = 0;
1603 break;
1604 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001605 if (val > 0x10FFFF)
1606 outofrange = val;
1607
Owen Taylor3473f882001-02-23 17:55:21 +00001608 ptr++;
1609 cur = *ptr;
1610 }
1611 if (cur == ';')
1612 ptr++;
1613 } else if ((cur == '&') && (ptr[1] == '#')){
1614 ptr += 2;
1615 cur = *ptr;
1616 while (cur != ';') { /* Non input consuming loops */
1617 if ((cur >= '0') && (cur <= '9'))
1618 val = val * 10 + (cur - '0');
1619 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001621 val = 0;
1622 break;
1623 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001624 if (val > 0x10FFFF)
1625 outofrange = val;
1626
Owen Taylor3473f882001-02-23 17:55:21 +00001627 ptr++;
1628 cur = *ptr;
1629 }
1630 if (cur == ';')
1631 ptr++;
1632 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001633 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 return(0);
1635 }
1636 *str = ptr;
1637
1638 /*
1639 * [ WFC: Legal Character ]
1640 * Characters referred to using character references must match the
1641 * production for Char.
1642 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001643 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001644 return(val);
1645 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001646 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1647 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1648 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001649 }
1650 return(0);
1651}
1652
1653/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001654 * xmlNewBlanksWrapperInputStream:
1655 * @ctxt: an XML parser context
1656 * @entity: an Entity pointer
1657 *
1658 * Create a new input stream for wrapping
1659 * blanks around a PEReference
1660 *
1661 * Returns the new input stream or NULL
1662 */
1663
1664static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1665
Daniel Veillardf4862f02002-09-10 11:13:43 +00001666static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001667xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1668 xmlParserInputPtr input;
1669 xmlChar *buffer;
1670 size_t length;
1671 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001672 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1673 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001674 return(NULL);
1675 }
1676 if (xmlParserDebugEntities)
1677 xmlGenericError(xmlGenericErrorContext,
1678 "new blanks wrapper for entity: %s\n", entity->name);
1679 input = xmlNewInputStream(ctxt);
1680 if (input == NULL) {
1681 return(NULL);
1682 }
1683 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001684 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001685 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001686 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 return(NULL);
1688 }
1689 buffer [0] = ' ';
1690 buffer [1] = '%';
1691 buffer [length-3] = ';';
1692 buffer [length-2] = ' ';
1693 buffer [length-1] = 0;
1694 memcpy(buffer + 2, entity->name, length - 5);
1695 input->free = deallocblankswrapper;
1696 input->base = buffer;
1697 input->cur = buffer;
1698 input->length = length;
1699 input->end = &buffer[length];
1700 return(input);
1701}
1702
1703/**
Owen Taylor3473f882001-02-23 17:55:21 +00001704 * xmlParserHandlePEReference:
1705 * @ctxt: the parser context
1706 *
1707 * [69] PEReference ::= '%' Name ';'
1708 *
1709 * [ WFC: No Recursion ]
1710 * A parsed entity must not contain a recursive
1711 * reference to itself, either directly or indirectly.
1712 *
1713 * [ WFC: Entity Declared ]
1714 * In a document without any DTD, a document with only an internal DTD
1715 * subset which contains no parameter entity references, or a document
1716 * with "standalone='yes'", ... ... The declaration of a parameter
1717 * entity must precede any reference to it...
1718 *
1719 * [ VC: Entity Declared ]
1720 * In a document with an external subset or external parameter entities
1721 * with "standalone='no'", ... ... The declaration of a parameter entity
1722 * must precede any reference to it...
1723 *
1724 * [ WFC: In DTD ]
1725 * Parameter-entity references may only appear in the DTD.
1726 * NOTE: misleading but this is handled.
1727 *
1728 * A PEReference may have been detected in the current input stream
1729 * the handling is done accordingly to
1730 * http://www.w3.org/TR/REC-xml#entproc
1731 * i.e.
1732 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001733 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001734 */
1735void
1736xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001737 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001738 xmlEntityPtr entity = NULL;
1739 xmlParserInputPtr input;
1740
Owen Taylor3473f882001-02-23 17:55:21 +00001741 if (RAW != '%') return;
1742 switch(ctxt->instate) {
1743 case XML_PARSER_CDATA_SECTION:
1744 return;
1745 case XML_PARSER_COMMENT:
1746 return;
1747 case XML_PARSER_START_TAG:
1748 return;
1749 case XML_PARSER_END_TAG:
1750 return;
1751 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 return;
1754 case XML_PARSER_PROLOG:
1755 case XML_PARSER_START:
1756 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001757 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001758 return;
1759 case XML_PARSER_ENTITY_DECL:
1760 case XML_PARSER_CONTENT:
1761 case XML_PARSER_ATTRIBUTE_VALUE:
1762 case XML_PARSER_PI:
1763 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001764 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001765 /* we just ignore it there */
1766 return;
1767 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001768 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001769 return;
1770 case XML_PARSER_ENTITY_VALUE:
1771 /*
1772 * NOTE: in the case of entity values, we don't do the
1773 * substitution here since we need the literal
1774 * entity value to be able to save the internal
1775 * subset of the document.
1776 * This will be handled by xmlStringDecodeEntities
1777 */
1778 return;
1779 case XML_PARSER_DTD:
1780 /*
1781 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1782 * In the internal DTD subset, parameter-entity references
1783 * can occur only where markup declarations can occur, not
1784 * within markup declarations.
1785 * In that case this is handled in xmlParseMarkupDecl
1786 */
1787 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1788 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001789 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001790 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001791 break;
1792 case XML_PARSER_IGNORE:
1793 return;
1794 }
1795
1796 NEXT;
1797 name = xmlParseName(ctxt);
1798 if (xmlParserDebugEntities)
1799 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001800 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001801 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001802 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 } else {
1804 if (RAW == ';') {
1805 NEXT;
1806 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1807 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1808 if (entity == NULL) {
1809
1810 /*
1811 * [ WFC: Entity Declared ]
1812 * In a document without any DTD, a document with only an
1813 * internal DTD subset which contains no parameter entity
1814 * references, or a document with "standalone='yes'", ...
1815 * ... The declaration of a parameter entity must precede
1816 * any reference to it...
1817 */
1818 if ((ctxt->standalone == 1) ||
1819 ((ctxt->hasExternalSubset == 0) &&
1820 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001821 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001822 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001823 } else {
1824 /*
1825 * [ VC: Entity Declared ]
1826 * In a document with an external subset or external
1827 * parameter entities with "standalone='no'", ...
1828 * ... The declaration of a parameter entity must precede
1829 * any reference to it...
1830 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001831 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1832 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1833 "PEReference: %%%s; not found\n",
1834 name);
1835 } else
1836 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1837 "PEReference: %%%s; not found\n",
1838 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 ctxt->valid = 0;
1840 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001841 } else if (ctxt->input->free != deallocblankswrapper) {
1842 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1843 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001844 } else {
1845 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1846 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001847 xmlChar start[4];
1848 xmlCharEncoding enc;
1849
Owen Taylor3473f882001-02-23 17:55:21 +00001850 /*
1851 * handle the extra spaces added before and after
1852 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001853 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001854 */
1855 input = xmlNewEntityInputStream(ctxt, entity);
1856 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001857
1858 /*
1859 * Get the 4 first bytes and decode the charset
1860 * if enc != XML_CHAR_ENCODING_NONE
1861 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001862 * Note that, since we may have some non-UTF8
1863 * encoding (like UTF16, bug 135229), the 'length'
1864 * is not known, but we can calculate based upon
1865 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001866 */
1867 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001868 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001869 start[0] = RAW;
1870 start[1] = NXT(1);
1871 start[2] = NXT(2);
1872 start[3] = NXT(3);
1873 enc = xmlDetectCharEncoding(start, 4);
1874 if (enc != XML_CHAR_ENCODING_NONE) {
1875 xmlSwitchEncoding(ctxt, enc);
1876 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001877 }
1878
Owen Taylor3473f882001-02-23 17:55:21 +00001879 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001880 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1881 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001882 xmlParseTextDecl(ctxt);
1883 }
Owen Taylor3473f882001-02-23 17:55:21 +00001884 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001885 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1886 "PEReference: %s is not a parameter entity\n",
1887 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001888 }
1889 }
1890 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001891 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001892 }
Owen Taylor3473f882001-02-23 17:55:21 +00001893 }
1894}
1895
1896/*
1897 * Macro used to grow the current buffer.
1898 */
1899#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001900 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001901 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 if (tmp == NULL) goto mem_error; \
1905 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001906}
1907
1908/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001909 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001910 * @ctxt: the parser context
1911 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001913 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1914 * @end: an end marker xmlChar, 0 if none
1915 * @end2: an end marker xmlChar, 0 if none
1916 * @end3: an end marker xmlChar, 0 if none
1917 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001918 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001919 *
1920 * [67] Reference ::= EntityRef | CharRef
1921 *
1922 * [69] PEReference ::= '%' Name ';'
1923 *
1924 * Returns A newly allocated string with the substitution done. The caller
1925 * must deallocate it !
1926 */
1927xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001928xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1929 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001930 xmlChar *buffer = NULL;
1931 int buffer_size = 0;
1932
1933 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001934 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001935 xmlEntityPtr ent;
1936 int c,l;
1937 int nbchars = 0;
1938
Daniel Veillarda82b1822004-11-08 16:24:57 +00001939 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001940 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001941 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001942
1943 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001944 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001945 return(NULL);
1946 }
1947
1948 /*
1949 * allocate a translation buffer.
1950 */
1951 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001952 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001953 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001954
1955 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001956 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001957 * we are operating on already parsed values.
1958 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001959 if (str < last)
1960 c = CUR_SCHAR(str, l);
1961 else
1962 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001963 while ((c != 0) && (c != end) && /* non input consuming loop */
1964 (c != end2) && (c != end3)) {
1965
1966 if (c == 0) break;
1967 if ((c == '&') && (str[1] == '#')) {
1968 int val = xmlParseStringCharRef(ctxt, &str);
1969 if (val != 0) {
1970 COPY_BUF(0,buffer,nbchars,val);
1971 }
1972 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1973 if (xmlParserDebugEntities)
1974 xmlGenericError(xmlGenericErrorContext,
1975 "String decoding Entity Reference: %.30s\n",
1976 str);
1977 ent = xmlParseStringEntityRef(ctxt, &str);
1978 if ((ent != NULL) &&
1979 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1980 if (ent->content != NULL) {
1981 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1982 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001983 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1984 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001985 }
1986 } else if ((ent != NULL) && (ent->content != NULL)) {
1987 xmlChar *rep;
1988
1989 ctxt->depth++;
1990 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1991 0, 0, 0);
1992 ctxt->depth--;
1993 if (rep != NULL) {
1994 current = rep;
1995 while (*current != 0) { /* non input consuming loop */
1996 buffer[nbchars++] = *current++;
1997 if (nbchars >
1998 buffer_size - XML_PARSER_BUFFER_SIZE) {
1999 growBuffer(buffer);
2000 }
2001 }
2002 xmlFree(rep);
2003 }
2004 } else if (ent != NULL) {
2005 int i = xmlStrlen(ent->name);
2006 const xmlChar *cur = ent->name;
2007
2008 buffer[nbchars++] = '&';
2009 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2010 growBuffer(buffer);
2011 }
2012 for (;i > 0;i--)
2013 buffer[nbchars++] = *cur++;
2014 buffer[nbchars++] = ';';
2015 }
2016 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2017 if (xmlParserDebugEntities)
2018 xmlGenericError(xmlGenericErrorContext,
2019 "String decoding PE Reference: %.30s\n", str);
2020 ent = xmlParseStringPEReference(ctxt, &str);
2021 if (ent != NULL) {
2022 xmlChar *rep;
2023
2024 ctxt->depth++;
2025 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2026 0, 0, 0);
2027 ctxt->depth--;
2028 if (rep != NULL) {
2029 current = rep;
2030 while (*current != 0) { /* non input consuming loop */
2031 buffer[nbchars++] = *current++;
2032 if (nbchars >
2033 buffer_size - XML_PARSER_BUFFER_SIZE) {
2034 growBuffer(buffer);
2035 }
2036 }
2037 xmlFree(rep);
2038 }
2039 }
2040 } else {
2041 COPY_BUF(l,buffer,nbchars,c);
2042 str += l;
2043 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2044 growBuffer(buffer);
2045 }
2046 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002047 if (str < last)
2048 c = CUR_SCHAR(str, l);
2049 else
2050 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002051 }
2052 buffer[nbchars++] = 0;
2053 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002054
2055mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002056 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002057 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002058}
2059
Daniel Veillarde57ec792003-09-10 10:50:59 +00002060/**
2061 * xmlStringDecodeEntities:
2062 * @ctxt: the parser context
2063 * @str: the input string
2064 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2065 * @end: an end marker xmlChar, 0 if none
2066 * @end2: an end marker xmlChar, 0 if none
2067 * @end3: an end marker xmlChar, 0 if none
2068 *
2069 * Takes a entity string content and process to do the adequate substitutions.
2070 *
2071 * [67] Reference ::= EntityRef | CharRef
2072 *
2073 * [69] PEReference ::= '%' Name ';'
2074 *
2075 * Returns A newly allocated string with the substitution done. The caller
2076 * must deallocate it !
2077 */
2078xmlChar *
2079xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2080 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002081 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002082 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2083 end, end2, end3));
2084}
Owen Taylor3473f882001-02-23 17:55:21 +00002085
2086/************************************************************************
2087 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002088 * Commodity functions, cleanup needed ? *
2089 * *
2090 ************************************************************************/
2091
2092/**
2093 * areBlanks:
2094 * @ctxt: an XML parser context
2095 * @str: a xmlChar *
2096 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002097 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002098 *
2099 * Is this a sequence of blank chars that one can ignore ?
2100 *
2101 * Returns 1 if ignorable 0 otherwise.
2102 */
2103
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002104static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2105 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002106 int i, ret;
2107 xmlNodePtr lastChild;
2108
Daniel Veillard05c13a22001-09-09 08:38:09 +00002109 /*
2110 * Don't spend time trying to differentiate them, the same callback is
2111 * used !
2112 */
2113 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002114 return(0);
2115
Owen Taylor3473f882001-02-23 17:55:21 +00002116 /*
2117 * Check for xml:space value.
2118 */
2119 if (*(ctxt->space) == 1)
2120 return(0);
2121
2122 /*
2123 * Check that the string is made of blanks
2124 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002125 if (blank_chars == 0) {
2126 for (i = 0;i < len;i++)
2127 if (!(IS_BLANK_CH(str[i]))) return(0);
2128 }
Owen Taylor3473f882001-02-23 17:55:21 +00002129
2130 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002131 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002132 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002133 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002134 if (ctxt->myDoc != NULL) {
2135 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2136 if (ret == 0) return(1);
2137 if (ret == 1) return(0);
2138 }
2139
2140 /*
2141 * Otherwise, heuristic :-\
2142 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002143 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002144 if ((ctxt->node->children == NULL) &&
2145 (RAW == '<') && (NXT(1) == '/')) return(0);
2146
2147 lastChild = xmlGetLastChild(ctxt->node);
2148 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002149 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2150 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002151 } else if (xmlNodeIsText(lastChild))
2152 return(0);
2153 else if ((ctxt->node->children != NULL) &&
2154 (xmlNodeIsText(ctxt->node->children)))
2155 return(0);
2156 return(1);
2157}
2158
Owen Taylor3473f882001-02-23 17:55:21 +00002159/************************************************************************
2160 * *
2161 * Extra stuff for namespace support *
2162 * Relates to http://www.w3.org/TR/WD-xml-names *
2163 * *
2164 ************************************************************************/
2165
2166/**
2167 * xmlSplitQName:
2168 * @ctxt: an XML parser context
2169 * @name: an XML parser context
2170 * @prefix: a xmlChar **
2171 *
2172 * parse an UTF8 encoded XML qualified name string
2173 *
2174 * [NS 5] QName ::= (Prefix ':')? LocalPart
2175 *
2176 * [NS 6] Prefix ::= NCName
2177 *
2178 * [NS 7] LocalPart ::= NCName
2179 *
2180 * Returns the local part, and prefix is updated
2181 * to get the Prefix if any.
2182 */
2183
2184xmlChar *
2185xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2186 xmlChar buf[XML_MAX_NAMELEN + 5];
2187 xmlChar *buffer = NULL;
2188 int len = 0;
2189 int max = XML_MAX_NAMELEN;
2190 xmlChar *ret = NULL;
2191 const xmlChar *cur = name;
2192 int c;
2193
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002194 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 *prefix = NULL;
2196
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002197 if (cur == NULL) return(NULL);
2198
Owen Taylor3473f882001-02-23 17:55:21 +00002199#ifndef XML_XML_NAMESPACE
2200 /* xml: prefix is not really a namespace */
2201 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2202 (cur[2] == 'l') && (cur[3] == ':'))
2203 return(xmlStrdup(name));
2204#endif
2205
Daniel Veillard597bc482003-07-24 16:08:28 +00002206 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002207 if (cur[0] == ':')
2208 return(xmlStrdup(name));
2209
2210 c = *cur++;
2211 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2212 buf[len++] = c;
2213 c = *cur++;
2214 }
2215 if (len >= max) {
2216 /*
2217 * Okay someone managed to make a huge name, so he's ready to pay
2218 * for the processing speed.
2219 */
2220 max = len * 2;
2221
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002222 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002223 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002224 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002225 return(NULL);
2226 }
2227 memcpy(buffer, buf, len);
2228 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2229 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002230 xmlChar *tmp;
2231
Owen Taylor3473f882001-02-23 17:55:21 +00002232 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002233 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 if (tmp == NULL) {
2236 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002237 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002238 return(NULL);
2239 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002240 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002241 }
2242 buffer[len++] = c;
2243 c = *cur++;
2244 }
2245 buffer[len] = 0;
2246 }
2247
Daniel Veillard597bc482003-07-24 16:08:28 +00002248 /* nasty but well=formed
2249 if ((c == ':') && (*cur == 0)) {
2250 return(xmlStrdup(name));
2251 } */
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 if (buffer == NULL)
2254 ret = xmlStrndup(buf, len);
2255 else {
2256 ret = buffer;
2257 buffer = NULL;
2258 max = XML_MAX_NAMELEN;
2259 }
2260
2261
2262 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002263 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002264 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002265 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002266 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 }
Owen Taylor3473f882001-02-23 17:55:21 +00002268 len = 0;
2269
Daniel Veillardbb284f42002-10-16 18:02:47 +00002270 /*
2271 * Check that the first character is proper to start
2272 * a new name
2273 */
2274 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2275 ((c >= 0x41) && (c <= 0x5A)) ||
2276 (c == '_') || (c == ':'))) {
2277 int l;
2278 int first = CUR_SCHAR(cur, l);
2279
2280 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002281 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002282 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 }
2285 }
2286 cur++;
2287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2289 buf[len++] = c;
2290 c = *cur++;
2291 }
2292 if (len >= max) {
2293 /*
2294 * Okay someone managed to make a huge name, so he's ready to pay
2295 * for the processing speed.
2296 */
2297 max = len * 2;
2298
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002299 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002301 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002302 return(NULL);
2303 }
2304 memcpy(buffer, buf, len);
2305 while (c != 0) { /* tested bigname2.xml */
2306 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002307 xmlChar *tmp;
2308
Owen Taylor3473f882001-02-23 17:55:21 +00002309 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002310 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002313 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 return(NULL);
2316 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002317 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002318 }
2319 buffer[len++] = c;
2320 c = *cur++;
2321 }
2322 buffer[len] = 0;
2323 }
2324
2325 if (buffer == NULL)
2326 ret = xmlStrndup(buf, len);
2327 else {
2328 ret = buffer;
2329 }
2330 }
2331
2332 return(ret);
2333}
2334
2335/************************************************************************
2336 * *
2337 * The parser itself *
2338 * Relates to http://www.w3.org/TR/REC-xml *
2339 * *
2340 ************************************************************************/
2341
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002342static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002343static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002344 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002345
Owen Taylor3473f882001-02-23 17:55:21 +00002346/**
2347 * xmlParseName:
2348 * @ctxt: an XML parser context
2349 *
2350 * parse an XML name.
2351 *
2352 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2353 * CombiningChar | Extender
2354 *
2355 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2356 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002357 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002358 *
2359 * Returns the Name parsed or NULL
2360 */
2361
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002362const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002363xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002364 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002366 int count = 0;
2367
2368 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002369
2370 /*
2371 * Accelerator for simple ASCII names
2372 */
2373 in = ctxt->input->cur;
2374 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2375 ((*in >= 0x41) && (*in <= 0x5A)) ||
2376 (*in == '_') || (*in == ':')) {
2377 in++;
2378 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2379 ((*in >= 0x41) && (*in <= 0x5A)) ||
2380 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002381 (*in == '_') || (*in == '-') ||
2382 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002383 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002384 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002386 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002388 ctxt->nbChars += count;
2389 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002390 if (ret == NULL)
2391 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002392 return(ret);
2393 }
2394 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002395 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002396}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002397
Daniel Veillard46de64e2002-05-29 08:21:33 +00002398/**
2399 * xmlParseNameAndCompare:
2400 * @ctxt: an XML parser context
2401 *
2402 * parse an XML name and compares for match
2403 * (specialized for endtag parsing)
2404 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002405 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2406 * and the name for mismatch
2407 */
2408
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002409static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002410xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002411 register const xmlChar *cmp = other;
2412 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002413 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002414
2415 GROW;
2416
2417 in = ctxt->input->cur;
2418 while (*in != 0 && *in == *cmp) {
2419 ++in;
2420 ++cmp;
2421 }
William M. Brack76e95df2003-10-18 16:20:14 +00002422 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002423 /* success */
2424 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002425 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 }
2427 /* failure (or end of input buffer), check with full function */
2428 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002429 /* strings coming from the dictionnary direct compare possible */
2430 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002431 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002432 }
2433 return ret;
2434}
2435
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002436static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002437xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002438 int len = 0, l;
2439 int c;
2440 int count = 0;
2441
2442 /*
2443 * Handler for more complex cases
2444 */
2445 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002446 c = CUR_CHAR(l);
2447 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2448 (!IS_LETTER(c) && (c != '_') &&
2449 (c != ':'))) {
2450 return(NULL);
2451 }
2452
2453 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002454 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002455 (c == '.') || (c == '-') ||
2456 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002457 (IS_COMBINING(c)) ||
2458 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002459 if (count++ > 100) {
2460 count = 0;
2461 GROW;
2462 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002463 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002464 NEXTL(l);
2465 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002466 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002467 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002468}
2469
2470/**
2471 * xmlParseStringName:
2472 * @ctxt: an XML parser context
2473 * @str: a pointer to the string pointer (IN/OUT)
2474 *
2475 * parse an XML name.
2476 *
2477 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2478 * CombiningChar | Extender
2479 *
2480 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2481 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002482 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002483 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002484 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002485 * is updated to the current location in the string.
2486 */
2487
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002488static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002489xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2490 xmlChar buf[XML_MAX_NAMELEN + 5];
2491 const xmlChar *cur = *str;
2492 int len = 0, l;
2493 int c;
2494
2495 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002496 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002497 (c != ':')) {
2498 return(NULL);
2499 }
2500
William M. Brack871611b2003-10-18 04:53:14 +00002501 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002502 (c == '.') || (c == '-') ||
2503 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002504 (IS_COMBINING(c)) ||
2505 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002506 COPY_BUF(l,buf,len,c);
2507 cur += l;
2508 c = CUR_SCHAR(cur, l);
2509 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2510 /*
2511 * Okay someone managed to make a huge name, so he's ready to pay
2512 * for the processing speed.
2513 */
2514 xmlChar *buffer;
2515 int max = len * 2;
2516
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002517 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002518 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002519 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002520 return(NULL);
2521 }
2522 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002523 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002524 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002525 (c == '.') || (c == '-') ||
2526 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002527 (IS_COMBINING(c)) ||
2528 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002529 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002530 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002531 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002532 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002533 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002534 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002535 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002536 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002537 return(NULL);
2538 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
2541 COPY_BUF(l,buffer,len,c);
2542 cur += l;
2543 c = CUR_SCHAR(cur, l);
2544 }
2545 buffer[len] = 0;
2546 *str = cur;
2547 return(buffer);
2548 }
2549 }
2550 *str = cur;
2551 return(xmlStrndup(buf, len));
2552}
2553
2554/**
2555 * xmlParseNmtoken:
2556 * @ctxt: an XML parser context
2557 *
2558 * parse an XML Nmtoken.
2559 *
2560 * [7] Nmtoken ::= (NameChar)+
2561 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002562 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002563 *
2564 * Returns the Nmtoken parsed or NULL
2565 */
2566
2567xmlChar *
2568xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2569 xmlChar buf[XML_MAX_NAMELEN + 5];
2570 int len = 0, l;
2571 int c;
2572 int count = 0;
2573
2574 GROW;
2575 c = CUR_CHAR(l);
2576
William M. Brack871611b2003-10-18 04:53:14 +00002577 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002578 (c == '.') || (c == '-') ||
2579 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002580 (IS_COMBINING(c)) ||
2581 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (count++ > 100) {
2583 count = 0;
2584 GROW;
2585 }
2586 COPY_BUF(l,buf,len,c);
2587 NEXTL(l);
2588 c = CUR_CHAR(l);
2589 if (len >= XML_MAX_NAMELEN) {
2590 /*
2591 * Okay someone managed to make a huge token, so he's ready to pay
2592 * for the processing speed.
2593 */
2594 xmlChar *buffer;
2595 int max = len * 2;
2596
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002597 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002598 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002599 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002600 return(NULL);
2601 }
2602 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002603 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002604 (c == '.') || (c == '-') ||
2605 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002606 (IS_COMBINING(c)) ||
2607 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002608 if (count++ > 100) {
2609 count = 0;
2610 GROW;
2611 }
2612 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002613 xmlChar *tmp;
2614
Owen Taylor3473f882001-02-23 17:55:21 +00002615 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002616 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002617 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002618 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002619 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002620 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002621 return(NULL);
2622 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002624 }
2625 COPY_BUF(l,buffer,len,c);
2626 NEXTL(l);
2627 c = CUR_CHAR(l);
2628 }
2629 buffer[len] = 0;
2630 return(buffer);
2631 }
2632 }
2633 if (len == 0)
2634 return(NULL);
2635 return(xmlStrndup(buf, len));
2636}
2637
2638/**
2639 * xmlParseEntityValue:
2640 * @ctxt: an XML parser context
2641 * @orig: if non-NULL store a copy of the original entity value
2642 *
2643 * parse a value for ENTITY declarations
2644 *
2645 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2646 * "'" ([^%&'] | PEReference | Reference)* "'"
2647 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002648 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002649 */
2650
2651xmlChar *
2652xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2653 xmlChar *buf = NULL;
2654 int len = 0;
2655 int size = XML_PARSER_BUFFER_SIZE;
2656 int c, l;
2657 xmlChar stop;
2658 xmlChar *ret = NULL;
2659 const xmlChar *cur = NULL;
2660 xmlParserInputPtr input;
2661
2662 if (RAW == '"') stop = '"';
2663 else if (RAW == '\'') stop = '\'';
2664 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002665 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002666 return(NULL);
2667 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002668 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002669 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002670 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002671 return(NULL);
2672 }
2673
2674 /*
2675 * The content of the entity definition is copied in a buffer.
2676 */
2677
2678 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2679 input = ctxt->input;
2680 GROW;
2681 NEXT;
2682 c = CUR_CHAR(l);
2683 /*
2684 * NOTE: 4.4.5 Included in Literal
2685 * When a parameter entity reference appears in a literal entity
2686 * value, ... a single or double quote character in the replacement
2687 * text is always treated as a normal data character and will not
2688 * terminate the literal.
2689 * In practice it means we stop the loop only when back at parsing
2690 * the initial entity and the quote is found
2691 */
William M. Brack871611b2003-10-18 04:53:14 +00002692 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002693 (ctxt->input != input))) {
2694 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002695 xmlChar *tmp;
2696
Owen Taylor3473f882001-02-23 17:55:21 +00002697 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2699 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002700 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002701 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002702 return(NULL);
2703 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002704 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002705 }
2706 COPY_BUF(l,buf,len,c);
2707 NEXTL(l);
2708 /*
2709 * Pop-up of finished entities.
2710 */
2711 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2712 xmlPopInput(ctxt);
2713
2714 GROW;
2715 c = CUR_CHAR(l);
2716 if (c == 0) {
2717 GROW;
2718 c = CUR_CHAR(l);
2719 }
2720 }
2721 buf[len] = 0;
2722
2723 /*
2724 * Raise problem w.r.t. '&' and '%' being used in non-entities
2725 * reference constructs. Note Charref will be handled in
2726 * xmlStringDecodeEntities()
2727 */
2728 cur = buf;
2729 while (*cur != 0) { /* non input consuming */
2730 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2731 xmlChar *name;
2732 xmlChar tmp = *cur;
2733
2734 cur++;
2735 name = xmlParseStringName(ctxt, &cur);
2736 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002737 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002738 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002739 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002740 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002741 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2742 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002743 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002744 }
2745 if (name != NULL)
2746 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002747 if (*cur == 0)
2748 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002749 }
2750 cur++;
2751 }
2752
2753 /*
2754 * Then PEReference entities are substituted.
2755 */
2756 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002757 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002758 xmlFree(buf);
2759 } else {
2760 NEXT;
2761 /*
2762 * NOTE: 4.4.7 Bypassed
2763 * When a general entity reference appears in the EntityValue in
2764 * an entity declaration, it is bypassed and left as is.
2765 * so XML_SUBSTITUTE_REF is not set here.
2766 */
2767 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2768 0, 0, 0);
2769 if (orig != NULL)
2770 *orig = buf;
2771 else
2772 xmlFree(buf);
2773 }
2774
2775 return(ret);
2776}
2777
2778/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002779 * xmlParseAttValueComplex:
2780 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002781 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002782 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002783 *
2784 * parse a value for an attribute, this is the fallback function
2785 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002786 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002787 *
2788 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2789 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002790static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002791xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002792 xmlChar limit = 0;
2793 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002794 int len = 0;
2795 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002796 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 xmlChar *current = NULL;
2798 xmlEntityPtr ent;
2799
Owen Taylor3473f882001-02-23 17:55:21 +00002800 if (NXT(0) == '"') {
2801 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2802 limit = '"';
2803 NEXT;
2804 } else if (NXT(0) == '\'') {
2805 limit = '\'';
2806 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2807 NEXT;
2808 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002809 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002810 return(NULL);
2811 }
2812
2813 /*
2814 * allocate a translation buffer.
2815 */
2816 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002817 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002818 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002819
2820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002821 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002822 */
2823 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002824 while ((NXT(0) != limit) && /* checked */
2825 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002828 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002829 if (NXT(1) == '#') {
2830 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002833 if (ctxt->replaceEntities) {
2834 if (len > buf_size - 10) {
2835 growBuffer(buf);
2836 }
2837 buf[len++] = '&';
2838 } else {
2839 /*
2840 * The reparsing will be done in xmlStringGetNodeList()
2841 * called by the attribute() function in SAX.c
2842 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002843 if (len > buf_size - 10) {
2844 growBuffer(buf);
2845 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002846 buf[len++] = '&';
2847 buf[len++] = '#';
2848 buf[len++] = '3';
2849 buf[len++] = '8';
2850 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002851 }
2852 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002853 if (len > buf_size - 10) {
2854 growBuffer(buf);
2855 }
Owen Taylor3473f882001-02-23 17:55:21 +00002856 len += xmlCopyChar(0, &buf[len], val);
2857 }
2858 } else {
2859 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002860 if ((ent != NULL) &&
2861 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2862 if (len > buf_size - 10) {
2863 growBuffer(buf);
2864 }
2865 if ((ctxt->replaceEntities == 0) &&
2866 (ent->content[0] == '&')) {
2867 buf[len++] = '&';
2868 buf[len++] = '#';
2869 buf[len++] = '3';
2870 buf[len++] = '8';
2871 buf[len++] = ';';
2872 } else {
2873 buf[len++] = ent->content[0];
2874 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002875 } else if ((ent != NULL) &&
2876 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002877 xmlChar *rep;
2878
2879 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2880 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002881 XML_SUBSTITUTE_REF,
2882 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 if (rep != NULL) {
2884 current = rep;
2885 while (*current != 0) { /* non input consuming */
2886 buf[len++] = *current++;
2887 if (len > buf_size - 10) {
2888 growBuffer(buf);
2889 }
2890 }
2891 xmlFree(rep);
2892 }
2893 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002894 if (len > buf_size - 10) {
2895 growBuffer(buf);
2896 }
Owen Taylor3473f882001-02-23 17:55:21 +00002897 if (ent->content != NULL)
2898 buf[len++] = ent->content[0];
2899 }
2900 } else if (ent != NULL) {
2901 int i = xmlStrlen(ent->name);
2902 const xmlChar *cur = ent->name;
2903
2904 /*
2905 * This may look absurd but is needed to detect
2906 * entities problems
2907 */
2908 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2909 (ent->content != NULL)) {
2910 xmlChar *rep;
2911 rep = xmlStringDecodeEntities(ctxt, ent->content,
2912 XML_SUBSTITUTE_REF, 0, 0, 0);
2913 if (rep != NULL)
2914 xmlFree(rep);
2915 }
2916
2917 /*
2918 * Just output the reference
2919 */
2920 buf[len++] = '&';
2921 if (len > buf_size - i - 10) {
2922 growBuffer(buf);
2923 }
2924 for (;i > 0;i--)
2925 buf[len++] = *cur++;
2926 buf[len++] = ';';
2927 }
2928 }
2929 } else {
2930 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002931 if ((len != 0) || (!normalize)) {
2932 if ((!normalize) || (!in_space)) {
2933 COPY_BUF(l,buf,len,0x20);
2934 if (len > buf_size - 10) {
2935 growBuffer(buf);
2936 }
2937 }
2938 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002939 }
2940 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002941 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002942 COPY_BUF(l,buf,len,c);
2943 if (len > buf_size - 10) {
2944 growBuffer(buf);
2945 }
2946 }
2947 NEXTL(l);
2948 }
2949 GROW;
2950 c = CUR_CHAR(l);
2951 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002952 if ((in_space) && (normalize)) {
2953 while (buf[len - 1] == 0x20) len--;
2954 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002955 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002956 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002957 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002958 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002959 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2960 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002961 } else
2962 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002963 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002965
2966mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002967 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002968 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002969}
2970
2971/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002972 * xmlParseAttValue:
2973 * @ctxt: an XML parser context
2974 *
2975 * parse a value for an attribute
2976 * Note: the parser won't do substitution of entities here, this
2977 * will be handled later in xmlStringGetNodeList
2978 *
2979 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2980 * "'" ([^<&'] | Reference)* "'"
2981 *
2982 * 3.3.3 Attribute-Value Normalization:
2983 * Before the value of an attribute is passed to the application or
2984 * checked for validity, the XML processor must normalize it as follows:
2985 * - a character reference is processed by appending the referenced
2986 * character to the attribute value
2987 * - an entity reference is processed by recursively processing the
2988 * replacement text of the entity
2989 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2990 * appending #x20 to the normalized value, except that only a single
2991 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2992 * parsed entity or the literal entity value of an internal parsed entity
2993 * - other characters are processed by appending them to the normalized value
2994 * If the declared value is not CDATA, then the XML processor must further
2995 * process the normalized attribute value by discarding any leading and
2996 * trailing space (#x20) characters, and by replacing sequences of space
2997 * (#x20) characters by a single space (#x20) character.
2998 * All attributes for which no declaration has been read should be treated
2999 * by a non-validating parser as if declared CDATA.
3000 *
3001 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3002 */
3003
3004
3005xmlChar *
3006xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003007 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003009}
3010
3011/**
Owen Taylor3473f882001-02-23 17:55:21 +00003012 * xmlParseSystemLiteral:
3013 * @ctxt: an XML parser context
3014 *
3015 * parse an XML Literal
3016 *
3017 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3018 *
3019 * Returns the SystemLiteral parsed or NULL
3020 */
3021
3022xmlChar *
3023xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3024 xmlChar *buf = NULL;
3025 int len = 0;
3026 int size = XML_PARSER_BUFFER_SIZE;
3027 int cur, l;
3028 xmlChar stop;
3029 int state = ctxt->instate;
3030 int count = 0;
3031
3032 SHRINK;
3033 if (RAW == '"') {
3034 NEXT;
3035 stop = '"';
3036 } else if (RAW == '\'') {
3037 NEXT;
3038 stop = '\'';
3039 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003040 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003041 return(NULL);
3042 }
3043
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003044 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003045 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003046 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003047 return(NULL);
3048 }
3049 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3050 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003051 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003053 xmlChar *tmp;
3054
Owen Taylor3473f882001-02-23 17:55:21 +00003055 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003056 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3057 if (tmp == NULL) {
3058 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003059 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003060 ctxt->instate = (xmlParserInputState) state;
3061 return(NULL);
3062 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003063 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003064 }
3065 count++;
3066 if (count > 50) {
3067 GROW;
3068 count = 0;
3069 }
3070 COPY_BUF(l,buf,len,cur);
3071 NEXTL(l);
3072 cur = CUR_CHAR(l);
3073 if (cur == 0) {
3074 GROW;
3075 SHRINK;
3076 cur = CUR_CHAR(l);
3077 }
3078 }
3079 buf[len] = 0;
3080 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003081 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003082 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003083 } else {
3084 NEXT;
3085 }
3086 return(buf);
3087}
3088
3089/**
3090 * xmlParsePubidLiteral:
3091 * @ctxt: an XML parser context
3092 *
3093 * parse an XML public literal
3094 *
3095 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3096 *
3097 * Returns the PubidLiteral parsed or NULL.
3098 */
3099
3100xmlChar *
3101xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3102 xmlChar *buf = NULL;
3103 int len = 0;
3104 int size = XML_PARSER_BUFFER_SIZE;
3105 xmlChar cur;
3106 xmlChar stop;
3107 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003108 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003109
3110 SHRINK;
3111 if (RAW == '"') {
3112 NEXT;
3113 stop = '"';
3114 } else if (RAW == '\'') {
3115 NEXT;
3116 stop = '\'';
3117 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003119 return(NULL);
3120 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003121 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003122 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003123 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003126 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003128 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003129 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003130 xmlChar *tmp;
3131
Owen Taylor3473f882001-02-23 17:55:21 +00003132 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003133 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3134 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003135 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003136 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003137 return(NULL);
3138 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003139 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003140 }
3141 buf[len++] = cur;
3142 count++;
3143 if (count > 50) {
3144 GROW;
3145 count = 0;
3146 }
3147 NEXT;
3148 cur = CUR;
3149 if (cur == 0) {
3150 GROW;
3151 SHRINK;
3152 cur = CUR;
3153 }
3154 }
3155 buf[len] = 0;
3156 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003158 } else {
3159 NEXT;
3160 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003161 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003162 return(buf);
3163}
3164
Daniel Veillard48b2f892001-02-25 16:11:03 +00003165void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003166/**
3167 * xmlParseCharData:
3168 * @ctxt: an XML parser context
3169 * @cdata: int indicating whether we are within a CDATA section
3170 *
3171 * parse a CharData section.
3172 * if we are within a CDATA section ']]>' marks an end of section.
3173 *
3174 * The right angle bracket (>) may be represented using the string "&gt;",
3175 * and must, for compatibility, be escaped using "&gt;" or a character
3176 * reference when it appears in the string "]]>" in content, when that
3177 * string is not marking the end of a CDATA section.
3178 *
3179 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3180 */
3181
3182void
3183xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003184 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003185 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003186 int line = ctxt->input->line;
3187 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003188
3189 SHRINK;
3190 GROW;
3191 /*
3192 * Accelerated common case where input don't need to be
3193 * modified before passing it to the handler.
3194 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003195 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003196 in = ctxt->input->cur;
3197 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003198get_more_space:
3199 while (*in == 0x20) in++;
3200 if (*in == 0xA) {
3201 ctxt->input->line++;
3202 in++;
3203 while (*in == 0xA) {
3204 ctxt->input->line++;
3205 in++;
3206 }
3207 goto get_more_space;
3208 }
3209 if (*in == '<') {
3210 nbchar = in - ctxt->input->cur;
3211 if (nbchar > 0) {
3212 const xmlChar *tmp = ctxt->input->cur;
3213 ctxt->input->cur = in;
3214
Daniel Veillard34099b42004-11-04 17:34:35 +00003215 if ((ctxt->sax != NULL) &&
3216 (ctxt->sax->ignorableWhitespace !=
3217 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003218 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3219 ctxt->sax->ignorableWhitespace(ctxt->userData,
3220 tmp, nbchar);
3221 } else if (ctxt->sax->characters != NULL)
3222 ctxt->sax->characters(ctxt->userData,
3223 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003224 } else if ((ctxt->sax != NULL) &&
3225 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003226 ctxt->sax->characters(ctxt->userData,
3227 tmp, nbchar);
3228 }
3229 }
3230 return;
3231 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003232get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003233 while (((*in > ']') && (*in <= 0x7F)) ||
3234 ((*in > '&') && (*in < '<')) ||
3235 ((*in > '<') && (*in < ']')) ||
3236 ((*in >= 0x20) && (*in < '&')) ||
3237 (*in == 0x09))
3238 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003239 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003240 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003241 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003242 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003243 ctxt->input->line++;
3244 in++;
3245 }
3246 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003247 }
3248 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003249 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003250 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003251 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003252 return;
3253 }
3254 in++;
3255 goto get_more;
3256 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003257 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003258 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003259 if ((ctxt->sax != NULL) &&
3260 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003261 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003262 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003263 const xmlChar *tmp = ctxt->input->cur;
3264 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003265
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003266 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003267 ctxt->sax->ignorableWhitespace(ctxt->userData,
3268 tmp, nbchar);
3269 } else if (ctxt->sax->characters != NULL)
3270 ctxt->sax->characters(ctxt->userData,
3271 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003272 line = ctxt->input->line;
3273 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003274 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003275 if (ctxt->sax->characters != NULL)
3276 ctxt->sax->characters(ctxt->userData,
3277 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003278 line = ctxt->input->line;
3279 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003280 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003281 }
3282 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003283 if (*in == 0xD) {
3284 in++;
3285 if (*in == 0xA) {
3286 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003288 ctxt->input->line++;
3289 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003290 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003291 in--;
3292 }
3293 if (*in == '<') {
3294 return;
3295 }
3296 if (*in == '&') {
3297 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 }
3299 SHRINK;
3300 GROW;
3301 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003302 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003303 nbchar = 0;
3304 }
Daniel Veillard50582112001-03-26 22:52:16 +00003305 ctxt->input->line = line;
3306 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 xmlParseCharDataComplex(ctxt, cdata);
3308}
3309
Daniel Veillard01c13b52002-12-10 15:19:08 +00003310/**
3311 * xmlParseCharDataComplex:
3312 * @ctxt: an XML parser context
3313 * @cdata: int indicating whether we are within a CDATA section
3314 *
3315 * parse a CharData section.this is the fallback function
3316 * of xmlParseCharData() when the parsing requires handling
3317 * of non-ASCII characters.
3318 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319void
3320xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003321 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3322 int nbchar = 0;
3323 int cur, l;
3324 int count = 0;
3325
3326 SHRINK;
3327 GROW;
3328 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003329 while ((cur != '<') && /* checked */
3330 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003331 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 if ((cur == ']') && (NXT(1) == ']') &&
3333 (NXT(2) == '>')) {
3334 if (cdata) break;
3335 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003336 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003337 }
3338 }
3339 COPY_BUF(l,buf,nbchar,cur);
3340 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003341 buf[nbchar] = 0;
3342
Owen Taylor3473f882001-02-23 17:55:21 +00003343 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003344 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003345 */
3346 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003347 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003348 if (ctxt->sax->ignorableWhitespace != NULL)
3349 ctxt->sax->ignorableWhitespace(ctxt->userData,
3350 buf, nbchar);
3351 } else {
3352 if (ctxt->sax->characters != NULL)
3353 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3354 }
3355 }
3356 nbchar = 0;
3357 }
3358 count++;
3359 if (count > 50) {
3360 GROW;
3361 count = 0;
3362 }
3363 NEXTL(l);
3364 cur = CUR_CHAR(l);
3365 }
3366 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003367 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003368 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003369 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003370 */
3371 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003372 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003373 if (ctxt->sax->ignorableWhitespace != NULL)
3374 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3375 } else {
3376 if (ctxt->sax->characters != NULL)
3377 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3378 }
3379 }
3380 }
3381}
3382
3383/**
3384 * xmlParseExternalID:
3385 * @ctxt: an XML parser context
3386 * @publicID: a xmlChar** receiving PubidLiteral
3387 * @strict: indicate whether we should restrict parsing to only
3388 * production [75], see NOTE below
3389 *
3390 * Parse an External ID or a Public ID
3391 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003392 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003393 * 'PUBLIC' S PubidLiteral S SystemLiteral
3394 *
3395 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3396 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3397 *
3398 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3399 *
3400 * Returns the function returns SystemLiteral and in the second
3401 * case publicID receives PubidLiteral, is strict is off
3402 * it is possible to return NULL and have publicID set.
3403 */
3404
3405xmlChar *
3406xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3407 xmlChar *URI = NULL;
3408
3409 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003410
3411 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003412 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003413 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003414 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3416 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003417 }
3418 SKIP_BLANKS;
3419 URI = xmlParseSystemLiteral(ctxt);
3420 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003421 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003422 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003423 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003424 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003425 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003427 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003428 }
3429 SKIP_BLANKS;
3430 *publicID = xmlParsePubidLiteral(ctxt);
3431 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 }
3434 if (strict) {
3435 /*
3436 * We don't handle [83] so "S SystemLiteral" is required.
3437 */
William M. Brack76e95df2003-10-18 16:20:14 +00003438 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003439 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003440 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003441 }
3442 } else {
3443 /*
3444 * We handle [83] so we return immediately, if
3445 * "S SystemLiteral" is not detected. From a purely parsing
3446 * point of view that's a nice mess.
3447 */
3448 const xmlChar *ptr;
3449 GROW;
3450
3451 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003452 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003453
William M. Brack76e95df2003-10-18 16:20:14 +00003454 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003455 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3456 }
3457 SKIP_BLANKS;
3458 URI = xmlParseSystemLiteral(ctxt);
3459 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003460 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003461 }
3462 }
3463 return(URI);
3464}
3465
3466/**
3467 * xmlParseComment:
3468 * @ctxt: an XML parser context
3469 *
3470 * Skip an XML (SGML) comment <!-- .... -->
3471 * The spec says that "For compatibility, the string "--" (double-hyphen)
3472 * must not occur within comments. "
3473 *
3474 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3475 */
3476void
3477xmlParseComment(xmlParserCtxtPtr ctxt) {
3478 xmlChar *buf = NULL;
3479 int len;
3480 int size = XML_PARSER_BUFFER_SIZE;
3481 int q, ql;
3482 int r, rl;
3483 int cur, l;
3484 xmlParserInputState state;
3485 xmlParserInputPtr input = ctxt->input;
3486 int count = 0;
3487
3488 /*
3489 * Check that there is a comment right here.
3490 */
3491 if ((RAW != '<') || (NXT(1) != '!') ||
3492 (NXT(2) != '-') || (NXT(3) != '-')) return;
3493
3494 state = ctxt->instate;
3495 ctxt->instate = XML_PARSER_COMMENT;
3496 SHRINK;
3497 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003498 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003500 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 ctxt->instate = state;
3502 return;
3503 }
3504 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003505 if (q == 0)
3506 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003507 NEXTL(ql);
3508 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003509 if (r == 0)
3510 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 NEXTL(rl);
3512 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003513 if (cur == 0)
3514 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003515 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003516 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003517 ((cur != '>') ||
3518 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003519 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003521 }
3522 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003523 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003524 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003525 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3526 if (new_buf == NULL) {
3527 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003528 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003529 ctxt->instate = state;
3530 return;
3531 }
William M. Bracka3215c72004-07-31 16:24:01 +00003532 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003533 }
3534 COPY_BUF(ql,buf,len,q);
3535 q = r;
3536 ql = rl;
3537 r = cur;
3538 rl = l;
3539
3540 count++;
3541 if (count > 50) {
3542 GROW;
3543 count = 0;
3544 }
3545 NEXTL(l);
3546 cur = CUR_CHAR(l);
3547 if (cur == 0) {
3548 SHRINK;
3549 GROW;
3550 cur = CUR_CHAR(l);
3551 }
3552 }
3553 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003554 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003555 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003556 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003557 xmlFree(buf);
3558 } else {
3559 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003560 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3561 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
3563 NEXT;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3565 (!ctxt->disableSAX))
3566 ctxt->sax->comment(ctxt->userData, buf);
3567 xmlFree(buf);
3568 }
3569 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003570 return;
3571not_terminated:
3572 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3573 "Comment not terminated\n", NULL);
3574 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003575}
3576
3577/**
3578 * xmlParsePITarget:
3579 * @ctxt: an XML parser context
3580 *
3581 * parse the name of a PI
3582 *
3583 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3584 *
3585 * Returns the PITarget name or NULL
3586 */
3587
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003588const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003589xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003590 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003591
3592 name = xmlParseName(ctxt);
3593 if ((name != NULL) &&
3594 ((name[0] == 'x') || (name[0] == 'X')) &&
3595 ((name[1] == 'm') || (name[1] == 'M')) &&
3596 ((name[2] == 'l') || (name[2] == 'L'))) {
3597 int i;
3598 if ((name[0] == 'x') && (name[1] == 'm') &&
3599 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003600 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003601 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003602 return(name);
3603 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003604 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003605 return(name);
3606 }
3607 for (i = 0;;i++) {
3608 if (xmlW3CPIs[i] == NULL) break;
3609 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3610 return(name);
3611 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003612 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3613 "xmlParsePITarget: invalid name prefix 'xml'\n",
3614 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003615 }
3616 return(name);
3617}
3618
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003619#ifdef LIBXML_CATALOG_ENABLED
3620/**
3621 * xmlParseCatalogPI:
3622 * @ctxt: an XML parser context
3623 * @catalog: the PI value string
3624 *
3625 * parse an XML Catalog Processing Instruction.
3626 *
3627 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3628 *
3629 * Occurs only if allowed by the user and if happening in the Misc
3630 * part of the document before any doctype informations
3631 * This will add the given catalog to the parsing context in order
3632 * to be used if there is a resolution need further down in the document
3633 */
3634
3635static void
3636xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3637 xmlChar *URL = NULL;
3638 const xmlChar *tmp, *base;
3639 xmlChar marker;
3640
3641 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003642 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003643 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3644 goto error;
3645 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003646 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003647 if (*tmp != '=') {
3648 return;
3649 }
3650 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003651 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003652 marker = *tmp;
3653 if ((marker != '\'') && (marker != '"'))
3654 goto error;
3655 tmp++;
3656 base = tmp;
3657 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3658 if (*tmp == 0)
3659 goto error;
3660 URL = xmlStrndup(base, tmp - base);
3661 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003662 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003663 if (*tmp != 0)
3664 goto error;
3665
3666 if (URL != NULL) {
3667 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3668 xmlFree(URL);
3669 }
3670 return;
3671
3672error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003673 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3674 "Catalog PI syntax error: %s\n",
3675 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003676 if (URL != NULL)
3677 xmlFree(URL);
3678}
3679#endif
3680
Owen Taylor3473f882001-02-23 17:55:21 +00003681/**
3682 * xmlParsePI:
3683 * @ctxt: an XML parser context
3684 *
3685 * parse an XML Processing Instruction.
3686 *
3687 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3688 *
3689 * The processing is transfered to SAX once parsed.
3690 */
3691
3692void
3693xmlParsePI(xmlParserCtxtPtr ctxt) {
3694 xmlChar *buf = NULL;
3695 int len = 0;
3696 int size = XML_PARSER_BUFFER_SIZE;
3697 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003698 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003699 xmlParserInputState state;
3700 int count = 0;
3701
3702 if ((RAW == '<') && (NXT(1) == '?')) {
3703 xmlParserInputPtr input = ctxt->input;
3704 state = ctxt->instate;
3705 ctxt->instate = XML_PARSER_PI;
3706 /*
3707 * this is a Processing Instruction.
3708 */
3709 SKIP(2);
3710 SHRINK;
3711
3712 /*
3713 * Parse the target name and check for special support like
3714 * namespace.
3715 */
3716 target = xmlParsePITarget(ctxt);
3717 if (target != NULL) {
3718 if ((RAW == '?') && (NXT(1) == '>')) {
3719 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003720 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3721 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723 SKIP(2);
3724
3725 /*
3726 * SAX: PI detected.
3727 */
3728 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3729 (ctxt->sax->processingInstruction != NULL))
3730 ctxt->sax->processingInstruction(ctxt->userData,
3731 target, NULL);
3732 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003733 return;
3734 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003735 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003736 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003737 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003738 ctxt->instate = state;
3739 return;
3740 }
3741 cur = CUR;
3742 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003743 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3744 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 SKIP_BLANKS;
3747 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003748 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003749 ((cur != '?') || (NXT(1) != '>'))) {
3750 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003751 xmlChar *tmp;
3752
Owen Taylor3473f882001-02-23 17:55:21 +00003753 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003754 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3755 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003756 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003757 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 ctxt->instate = state;
3759 return;
3760 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003761 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003762 }
3763 count++;
3764 if (count > 50) {
3765 GROW;
3766 count = 0;
3767 }
3768 COPY_BUF(l,buf,len,cur);
3769 NEXTL(l);
3770 cur = CUR_CHAR(l);
3771 if (cur == 0) {
3772 SHRINK;
3773 GROW;
3774 cur = CUR_CHAR(l);
3775 }
3776 }
3777 buf[len] = 0;
3778 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003779 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3780 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 } else {
3782 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003783 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3784 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003785 }
3786 SKIP(2);
3787
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003788#ifdef LIBXML_CATALOG_ENABLED
3789 if (((state == XML_PARSER_MISC) ||
3790 (state == XML_PARSER_START)) &&
3791 (xmlStrEqual(target, XML_CATALOG_PI))) {
3792 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3793 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3794 (allow == XML_CATA_ALLOW_ALL))
3795 xmlParseCatalogPI(ctxt, buf);
3796 }
3797#endif
3798
3799
Owen Taylor3473f882001-02-23 17:55:21 +00003800 /*
3801 * SAX: PI detected.
3802 */
3803 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3804 (ctxt->sax->processingInstruction != NULL))
3805 ctxt->sax->processingInstruction(ctxt->userData,
3806 target, buf);
3807 }
3808 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003810 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003811 }
3812 ctxt->instate = state;
3813 }
3814}
3815
3816/**
3817 * xmlParseNotationDecl:
3818 * @ctxt: an XML parser context
3819 *
3820 * parse a notation declaration
3821 *
3822 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3823 *
3824 * Hence there is actually 3 choices:
3825 * 'PUBLIC' S PubidLiteral
3826 * 'PUBLIC' S PubidLiteral S SystemLiteral
3827 * and 'SYSTEM' S SystemLiteral
3828 *
3829 * See the NOTE on xmlParseExternalID().
3830 */
3831
3832void
3833xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003834 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 xmlChar *Pubid;
3836 xmlChar *Systemid;
3837
Daniel Veillarda07050d2003-10-19 14:46:32 +00003838 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003839 xmlParserInputPtr input = ctxt->input;
3840 SHRINK;
3841 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003842 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003843 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3844 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003845 return;
3846 }
3847 SKIP_BLANKS;
3848
Daniel Veillard76d66f42001-05-16 21:05:17 +00003849 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003850 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003851 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003852 return;
3853 }
William M. Brack76e95df2003-10-18 16:20:14 +00003854 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003856 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003857 return;
3858 }
3859 SKIP_BLANKS;
3860
3861 /*
3862 * Parse the IDs.
3863 */
3864 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3865 SKIP_BLANKS;
3866
3867 if (RAW == '>') {
3868 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3870 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003871 }
3872 NEXT;
3873 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3874 (ctxt->sax->notationDecl != NULL))
3875 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3876 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003877 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003878 }
Owen Taylor3473f882001-02-23 17:55:21 +00003879 if (Systemid != NULL) xmlFree(Systemid);
3880 if (Pubid != NULL) xmlFree(Pubid);
3881 }
3882}
3883
3884/**
3885 * xmlParseEntityDecl:
3886 * @ctxt: an XML parser context
3887 *
3888 * parse <!ENTITY declarations
3889 *
3890 * [70] EntityDecl ::= GEDecl | PEDecl
3891 *
3892 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3893 *
3894 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3895 *
3896 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3897 *
3898 * [74] PEDef ::= EntityValue | ExternalID
3899 *
3900 * [76] NDataDecl ::= S 'NDATA' S Name
3901 *
3902 * [ VC: Notation Declared ]
3903 * The Name must match the declared name of a notation.
3904 */
3905
3906void
3907xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003908 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003909 xmlChar *value = NULL;
3910 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003911 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003912 int isParameter = 0;
3913 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003914 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003915
3916 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003917 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003918 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003919 SHRINK;
3920 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003921 skipped = SKIP_BLANKS;
3922 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003923 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3924 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003925 }
Owen Taylor3473f882001-02-23 17:55:21 +00003926
3927 if (RAW == '%') {
3928 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003929 skipped = SKIP_BLANKS;
3930 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003931 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3932 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003933 }
Owen Taylor3473f882001-02-23 17:55:21 +00003934 isParameter = 1;
3935 }
3936
Daniel Veillard76d66f42001-05-16 21:05:17 +00003937 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003938 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003939 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3940 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003941 return;
3942 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003943 skipped = SKIP_BLANKS;
3944 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3946 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003947 }
Owen Taylor3473f882001-02-23 17:55:21 +00003948
Daniel Veillardf5582f12002-06-11 10:08:16 +00003949 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 /*
3951 * handle the various case of definitions...
3952 */
3953 if (isParameter) {
3954 if ((RAW == '"') || (RAW == '\'')) {
3955 value = xmlParseEntityValue(ctxt, &orig);
3956 if (value) {
3957 if ((ctxt->sax != NULL) &&
3958 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3959 ctxt->sax->entityDecl(ctxt->userData, name,
3960 XML_INTERNAL_PARAMETER_ENTITY,
3961 NULL, NULL, value);
3962 }
3963 } else {
3964 URI = xmlParseExternalID(ctxt, &literal, 1);
3965 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003966 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003967 }
3968 if (URI) {
3969 xmlURIPtr uri;
3970
3971 uri = xmlParseURI((const char *) URI);
3972 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003973 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3974 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003975 /*
3976 * This really ought to be a well formedness error
3977 * but the XML Core WG decided otherwise c.f. issue
3978 * E26 of the XML erratas.
3979 */
Owen Taylor3473f882001-02-23 17:55:21 +00003980 } else {
3981 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003982 /*
3983 * Okay this is foolish to block those but not
3984 * invalid URIs.
3985 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003986 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003987 } else {
3988 if ((ctxt->sax != NULL) &&
3989 (!ctxt->disableSAX) &&
3990 (ctxt->sax->entityDecl != NULL))
3991 ctxt->sax->entityDecl(ctxt->userData, name,
3992 XML_EXTERNAL_PARAMETER_ENTITY,
3993 literal, URI, NULL);
3994 }
3995 xmlFreeURI(uri);
3996 }
3997 }
3998 }
3999 } else {
4000 if ((RAW == '"') || (RAW == '\'')) {
4001 value = xmlParseEntityValue(ctxt, &orig);
4002 if ((ctxt->sax != NULL) &&
4003 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4004 ctxt->sax->entityDecl(ctxt->userData, name,
4005 XML_INTERNAL_GENERAL_ENTITY,
4006 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004007 /*
4008 * For expat compatibility in SAX mode.
4009 */
4010 if ((ctxt->myDoc == NULL) ||
4011 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4012 if (ctxt->myDoc == NULL) {
4013 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4014 }
4015 if (ctxt->myDoc->intSubset == NULL)
4016 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4017 BAD_CAST "fake", NULL, NULL);
4018
Daniel Veillard1af9a412003-08-20 22:54:39 +00004019 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4020 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004021 }
Owen Taylor3473f882001-02-23 17:55:21 +00004022 } else {
4023 URI = xmlParseExternalID(ctxt, &literal, 1);
4024 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004025 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004026 }
4027 if (URI) {
4028 xmlURIPtr uri;
4029
4030 uri = xmlParseURI((const char *)URI);
4031 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004032 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4033 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004034 /*
4035 * This really ought to be a well formedness error
4036 * but the XML Core WG decided otherwise c.f. issue
4037 * E26 of the XML erratas.
4038 */
Owen Taylor3473f882001-02-23 17:55:21 +00004039 } else {
4040 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004041 /*
4042 * Okay this is foolish to block those but not
4043 * invalid URIs.
4044 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004045 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004046 }
4047 xmlFreeURI(uri);
4048 }
4049 }
William M. Brack76e95df2003-10-18 16:20:14 +00004050 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4052 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004053 }
4054 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004055 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004056 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004057 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004058 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4059 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004060 }
4061 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004062 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4064 (ctxt->sax->unparsedEntityDecl != NULL))
4065 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4066 literal, URI, ndata);
4067 } else {
4068 if ((ctxt->sax != NULL) &&
4069 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4070 ctxt->sax->entityDecl(ctxt->userData, name,
4071 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4072 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004073 /*
4074 * For expat compatibility in SAX mode.
4075 * assuming the entity repalcement was asked for
4076 */
4077 if ((ctxt->replaceEntities != 0) &&
4078 ((ctxt->myDoc == NULL) ||
4079 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4080 if (ctxt->myDoc == NULL) {
4081 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4082 }
4083
4084 if (ctxt->myDoc->intSubset == NULL)
4085 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4086 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004087 xmlSAX2EntityDecl(ctxt, name,
4088 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4089 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004090 }
Owen Taylor3473f882001-02-23 17:55:21 +00004091 }
4092 }
4093 }
4094 SKIP_BLANKS;
4095 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004096 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004097 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004098 } else {
4099 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004100 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4101 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004102 }
4103 NEXT;
4104 }
4105 if (orig != NULL) {
4106 /*
4107 * Ugly mechanism to save the raw entity value.
4108 */
4109 xmlEntityPtr cur = NULL;
4110
4111 if (isParameter) {
4112 if ((ctxt->sax != NULL) &&
4113 (ctxt->sax->getParameterEntity != NULL))
4114 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4115 } else {
4116 if ((ctxt->sax != NULL) &&
4117 (ctxt->sax->getEntity != NULL))
4118 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004119 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004120 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004121 }
Owen Taylor3473f882001-02-23 17:55:21 +00004122 }
4123 if (cur != NULL) {
4124 if (cur->orig != NULL)
4125 xmlFree(orig);
4126 else
4127 cur->orig = orig;
4128 } else
4129 xmlFree(orig);
4130 }
Owen Taylor3473f882001-02-23 17:55:21 +00004131 if (value != NULL) xmlFree(value);
4132 if (URI != NULL) xmlFree(URI);
4133 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004134 }
4135}
4136
4137/**
4138 * xmlParseDefaultDecl:
4139 * @ctxt: an XML parser context
4140 * @value: Receive a possible fixed default value for the attribute
4141 *
4142 * Parse an attribute default declaration
4143 *
4144 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4145 *
4146 * [ VC: Required Attribute ]
4147 * if the default declaration is the keyword #REQUIRED, then the
4148 * attribute must be specified for all elements of the type in the
4149 * attribute-list declaration.
4150 *
4151 * [ VC: Attribute Default Legal ]
4152 * The declared default value must meet the lexical constraints of
4153 * the declared attribute type c.f. xmlValidateAttributeDecl()
4154 *
4155 * [ VC: Fixed Attribute Default ]
4156 * if an attribute has a default value declared with the #FIXED
4157 * keyword, instances of that attribute must match the default value.
4158 *
4159 * [ WFC: No < in Attribute Values ]
4160 * handled in xmlParseAttValue()
4161 *
4162 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4163 * or XML_ATTRIBUTE_FIXED.
4164 */
4165
4166int
4167xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4168 int val;
4169 xmlChar *ret;
4170
4171 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004172 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004173 SKIP(9);
4174 return(XML_ATTRIBUTE_REQUIRED);
4175 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004176 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004177 SKIP(8);
4178 return(XML_ATTRIBUTE_IMPLIED);
4179 }
4180 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004181 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004182 SKIP(6);
4183 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004184 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004185 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4186 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004187 }
4188 SKIP_BLANKS;
4189 }
4190 ret = xmlParseAttValue(ctxt);
4191 ctxt->instate = XML_PARSER_DTD;
4192 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004193 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004194 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004195 } else
4196 *value = ret;
4197 return(val);
4198}
4199
4200/**
4201 * xmlParseNotationType:
4202 * @ctxt: an XML parser context
4203 *
4204 * parse an Notation attribute type.
4205 *
4206 * Note: the leading 'NOTATION' S part has already being parsed...
4207 *
4208 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4209 *
4210 * [ VC: Notation Attributes ]
4211 * Values of this type must match one of the notation names included
4212 * in the declaration; all notation names in the declaration must be declared.
4213 *
4214 * Returns: the notation attribute tree built while parsing
4215 */
4216
4217xmlEnumerationPtr
4218xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004219 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4221
4222 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004223 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004224 return(NULL);
4225 }
4226 SHRINK;
4227 do {
4228 NEXT;
4229 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004230 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004231 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004232 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4233 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004234 return(ret);
4235 }
4236 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004237 if (cur == NULL) return(ret);
4238 if (last == NULL) ret = last = cur;
4239 else {
4240 last->next = cur;
4241 last = cur;
4242 }
4243 SKIP_BLANKS;
4244 } while (RAW == '|');
4245 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if ((last != NULL) && (last != ret))
4248 xmlFreeEnumeration(last);
4249 return(ret);
4250 }
4251 NEXT;
4252 return(ret);
4253}
4254
4255/**
4256 * xmlParseEnumerationType:
4257 * @ctxt: an XML parser context
4258 *
4259 * parse an Enumeration attribute type.
4260 *
4261 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4262 *
4263 * [ VC: Enumeration ]
4264 * Values of this type must match one of the Nmtoken tokens in
4265 * the declaration
4266 *
4267 * Returns: the enumeration attribute tree built while parsing
4268 */
4269
4270xmlEnumerationPtr
4271xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4272 xmlChar *name;
4273 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4274
4275 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004276 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004277 return(NULL);
4278 }
4279 SHRINK;
4280 do {
4281 NEXT;
4282 SKIP_BLANKS;
4283 name = xmlParseNmtoken(ctxt);
4284 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004285 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004286 return(ret);
4287 }
4288 cur = xmlCreateEnumeration(name);
4289 xmlFree(name);
4290 if (cur == NULL) return(ret);
4291 if (last == NULL) ret = last = cur;
4292 else {
4293 last->next = cur;
4294 last = cur;
4295 }
4296 SKIP_BLANKS;
4297 } while (RAW == '|');
4298 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004299 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return(ret);
4301 }
4302 NEXT;
4303 return(ret);
4304}
4305
4306/**
4307 * xmlParseEnumeratedType:
4308 * @ctxt: an XML parser context
4309 * @tree: the enumeration tree built while parsing
4310 *
4311 * parse an Enumerated attribute type.
4312 *
4313 * [57] EnumeratedType ::= NotationType | Enumeration
4314 *
4315 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4316 *
4317 *
4318 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4319 */
4320
4321int
4322xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004323 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004324 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004325 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004326 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4327 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004328 return(0);
4329 }
4330 SKIP_BLANKS;
4331 *tree = xmlParseNotationType(ctxt);
4332 if (*tree == NULL) return(0);
4333 return(XML_ATTRIBUTE_NOTATION);
4334 }
4335 *tree = xmlParseEnumerationType(ctxt);
4336 if (*tree == NULL) return(0);
4337 return(XML_ATTRIBUTE_ENUMERATION);
4338}
4339
4340/**
4341 * xmlParseAttributeType:
4342 * @ctxt: an XML parser context
4343 * @tree: the enumeration tree built while parsing
4344 *
4345 * parse the Attribute list def for an element
4346 *
4347 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4348 *
4349 * [55] StringType ::= 'CDATA'
4350 *
4351 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4352 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4353 *
4354 * Validity constraints for attribute values syntax are checked in
4355 * xmlValidateAttributeValue()
4356 *
4357 * [ VC: ID ]
4358 * Values of type ID must match the Name production. A name must not
4359 * appear more than once in an XML document as a value of this type;
4360 * i.e., ID values must uniquely identify the elements which bear them.
4361 *
4362 * [ VC: One ID per Element Type ]
4363 * No element type may have more than one ID attribute specified.
4364 *
4365 * [ VC: ID Attribute Default ]
4366 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4367 *
4368 * [ VC: IDREF ]
4369 * Values of type IDREF must match the Name production, and values
4370 * of type IDREFS must match Names; each IDREF Name must match the value
4371 * of an ID attribute on some element in the XML document; i.e. IDREF
4372 * values must match the value of some ID attribute.
4373 *
4374 * [ VC: Entity Name ]
4375 * Values of type ENTITY must match the Name production, values
4376 * of type ENTITIES must match Names; each Entity Name must match the
4377 * name of an unparsed entity declared in the DTD.
4378 *
4379 * [ VC: Name Token ]
4380 * Values of type NMTOKEN must match the Nmtoken production; values
4381 * of type NMTOKENS must match Nmtokens.
4382 *
4383 * Returns the attribute type
4384 */
4385int
4386xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4387 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004388 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004389 SKIP(5);
4390 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004391 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004392 SKIP(6);
4393 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004394 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004395 SKIP(5);
4396 return(XML_ATTRIBUTE_IDREF);
4397 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4398 SKIP(2);
4399 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004400 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004401 SKIP(6);
4402 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004403 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004404 SKIP(8);
4405 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004406 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004407 SKIP(8);
4408 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004409 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004410 SKIP(7);
4411 return(XML_ATTRIBUTE_NMTOKEN);
4412 }
4413 return(xmlParseEnumeratedType(ctxt, tree));
4414}
4415
4416/**
4417 * xmlParseAttributeListDecl:
4418 * @ctxt: an XML parser context
4419 *
4420 * : parse the Attribute list def for an element
4421 *
4422 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4423 *
4424 * [53] AttDef ::= S Name S AttType S DefaultDecl
4425 *
4426 */
4427void
4428xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004429 const xmlChar *elemName;
4430 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004431 xmlEnumerationPtr tree;
4432
Daniel Veillarda07050d2003-10-19 14:46:32 +00004433 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004434 xmlParserInputPtr input = ctxt->input;
4435
4436 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004437 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004439 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004440 }
4441 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004442 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004443 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4445 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004446 return;
4447 }
4448 SKIP_BLANKS;
4449 GROW;
4450 while (RAW != '>') {
4451 const xmlChar *check = CUR_PTR;
4452 int type;
4453 int def;
4454 xmlChar *defaultValue = NULL;
4455
4456 GROW;
4457 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004458 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004459 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4461 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004462 break;
4463 }
4464 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004465 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004466 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004467 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004468 if (defaultValue != NULL)
4469 xmlFree(defaultValue);
4470 break;
4471 }
4472 SKIP_BLANKS;
4473
4474 type = xmlParseAttributeType(ctxt, &tree);
4475 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004476 if (defaultValue != NULL)
4477 xmlFree(defaultValue);
4478 break;
4479 }
4480
4481 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004482 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4484 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004485 if (defaultValue != NULL)
4486 xmlFree(defaultValue);
4487 if (tree != NULL)
4488 xmlFreeEnumeration(tree);
4489 break;
4490 }
4491 SKIP_BLANKS;
4492
4493 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4494 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004495 if (defaultValue != NULL)
4496 xmlFree(defaultValue);
4497 if (tree != NULL)
4498 xmlFreeEnumeration(tree);
4499 break;
4500 }
4501
4502 GROW;
4503 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004504 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004506 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004507 if (defaultValue != NULL)
4508 xmlFree(defaultValue);
4509 if (tree != NULL)
4510 xmlFreeEnumeration(tree);
4511 break;
4512 }
4513 SKIP_BLANKS;
4514 }
4515 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004516 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4517 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004518 if (defaultValue != NULL)
4519 xmlFree(defaultValue);
4520 if (tree != NULL)
4521 xmlFreeEnumeration(tree);
4522 break;
4523 }
4524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4525 (ctxt->sax->attributeDecl != NULL))
4526 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4527 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004528 else if (tree != NULL)
4529 xmlFreeEnumeration(tree);
4530
4531 if ((ctxt->sax2) && (defaultValue != NULL) &&
4532 (def != XML_ATTRIBUTE_IMPLIED) &&
4533 (def != XML_ATTRIBUTE_REQUIRED)) {
4534 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4535 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004536 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4537 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4538 }
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (defaultValue != NULL)
4540 xmlFree(defaultValue);
4541 GROW;
4542 }
4543 if (RAW == '>') {
4544 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004545 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4546 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004547 }
4548 NEXT;
4549 }
Owen Taylor3473f882001-02-23 17:55:21 +00004550 }
4551}
4552
4553/**
4554 * xmlParseElementMixedContentDecl:
4555 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004556 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004557 *
4558 * parse the declaration for a Mixed Element content
4559 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4560 *
4561 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4562 * '(' S? '#PCDATA' S? ')'
4563 *
4564 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4565 *
4566 * [ VC: No Duplicate Types ]
4567 * The same name must not appear more than once in a single
4568 * mixed-content declaration.
4569 *
4570 * returns: the list of the xmlElementContentPtr describing the element choices
4571 */
4572xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004573xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004575 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004576
4577 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004578 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004579 SKIP(7);
4580 SKIP_BLANKS;
4581 SHRINK;
4582 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004583 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004584 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4585"Element content declaration doesn't start and stop in the same entity\n",
4586 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004587 }
Owen Taylor3473f882001-02-23 17:55:21 +00004588 NEXT;
4589 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4590 if (RAW == '*') {
4591 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4592 NEXT;
4593 }
4594 return(ret);
4595 }
4596 if ((RAW == '(') || (RAW == '|')) {
4597 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4598 if (ret == NULL) return(NULL);
4599 }
4600 while (RAW == '|') {
4601 NEXT;
4602 if (elem == NULL) {
4603 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4604 if (ret == NULL) return(NULL);
4605 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004606 if (cur != NULL)
4607 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004608 cur = ret;
4609 } else {
4610 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4611 if (n == NULL) return(NULL);
4612 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004613 if (n->c1 != NULL)
4614 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004615 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004616 if (n != NULL)
4617 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004618 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 }
4620 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004621 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004622 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004623 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004624 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004625 xmlFreeElementContent(cur);
4626 return(NULL);
4627 }
4628 SKIP_BLANKS;
4629 GROW;
4630 }
4631 if ((RAW == ')') && (NXT(1) == '*')) {
4632 if (elem != NULL) {
4633 cur->c2 = xmlNewElementContent(elem,
4634 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (cur->c2 != NULL)
4636 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
4638 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004639 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004640 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4641"Element content declaration doesn't start and stop in the same entity\n",
4642 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004643 }
Owen Taylor3473f882001-02-23 17:55:21 +00004644 SKIP(2);
4645 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004646 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004647 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 return(NULL);
4649 }
4650
4651 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004652 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004653 }
4654 return(ret);
4655}
4656
4657/**
4658 * xmlParseElementChildrenContentDecl:
4659 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004660 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004661 *
4662 * parse the declaration for a Mixed Element content
4663 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4664 *
4665 *
4666 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4667 *
4668 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4669 *
4670 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4671 *
4672 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4673 *
4674 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4675 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004676 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004677 * opening or closing parentheses in a choice, seq, or Mixed
4678 * construct is contained in the replacement text for a parameter
4679 * entity, both must be contained in the same replacement text. For
4680 * interoperability, if a parameter-entity reference appears in a
4681 * choice, seq, or Mixed construct, its replacement text should not
4682 * be empty, and neither the first nor last non-blank character of
4683 * the replacement text should be a connector (| or ,).
4684 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004685 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004686 * hierarchy.
4687 */
4688xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004689xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004690 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004691 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004692 xmlChar type = 0;
4693
4694 SKIP_BLANKS;
4695 GROW;
4696 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004697 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004698
Owen Taylor3473f882001-02-23 17:55:21 +00004699 /* Recurse on first child */
4700 NEXT;
4701 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004702 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP_BLANKS;
4704 GROW;
4705 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004706 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004708 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004709 return(NULL);
4710 }
4711 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004712 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004713 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004714 return(NULL);
4715 }
Owen Taylor3473f882001-02-23 17:55:21 +00004716 GROW;
4717 if (RAW == '?') {
4718 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4719 NEXT;
4720 } else if (RAW == '*') {
4721 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4722 NEXT;
4723 } else if (RAW == '+') {
4724 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4725 NEXT;
4726 } else {
4727 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4728 }
Owen Taylor3473f882001-02-23 17:55:21 +00004729 GROW;
4730 }
4731 SKIP_BLANKS;
4732 SHRINK;
4733 while (RAW != ')') {
4734 /*
4735 * Each loop we parse one separator and one element.
4736 */
4737 if (RAW == ',') {
4738 if (type == 0) type = CUR;
4739
4740 /*
4741 * Detect "Name | Name , Name" error
4742 */
4743 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004744 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004745 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004746 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004747 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004748 xmlFreeElementContent(last);
4749 if (ret != NULL)
4750 xmlFreeElementContent(ret);
4751 return(NULL);
4752 }
4753 NEXT;
4754
4755 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4756 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004757 if ((last != NULL) && (last != ret))
4758 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004759 xmlFreeElementContent(ret);
4760 return(NULL);
4761 }
4762 if (last == NULL) {
4763 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004764 if (ret != NULL)
4765 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004766 ret = cur = op;
4767 } else {
4768 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004769 if (op != NULL)
4770 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004772 if (last != NULL)
4773 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004774 cur =op;
4775 last = NULL;
4776 }
4777 } else if (RAW == '|') {
4778 if (type == 0) type = CUR;
4779
4780 /*
4781 * Detect "Name , Name | Name" error
4782 */
4783 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004784 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004785 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004786 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004787 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004788 xmlFreeElementContent(last);
4789 if (ret != NULL)
4790 xmlFreeElementContent(ret);
4791 return(NULL);
4792 }
4793 NEXT;
4794
4795 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4796 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004797 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004798 xmlFreeElementContent(last);
4799 if (ret != NULL)
4800 xmlFreeElementContent(ret);
4801 return(NULL);
4802 }
4803 if (last == NULL) {
4804 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004805 if (ret != NULL)
4806 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 ret = cur = op;
4808 } else {
4809 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004810 if (op != NULL)
4811 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004812 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004813 if (last != NULL)
4814 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004815 cur =op;
4816 last = NULL;
4817 }
4818 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004819 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (ret != NULL)
4821 xmlFreeElementContent(ret);
4822 return(NULL);
4823 }
4824 GROW;
4825 SKIP_BLANKS;
4826 GROW;
4827 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004828 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004829 /* Recurse on second child */
4830 NEXT;
4831 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004832 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004833 SKIP_BLANKS;
4834 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004835 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004836 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004838 if (ret != NULL)
4839 xmlFreeElementContent(ret);
4840 return(NULL);
4841 }
4842 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004843 if (RAW == '?') {
4844 last->ocur = XML_ELEMENT_CONTENT_OPT;
4845 NEXT;
4846 } else if (RAW == '*') {
4847 last->ocur = XML_ELEMENT_CONTENT_MULT;
4848 NEXT;
4849 } else if (RAW == '+') {
4850 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4851 NEXT;
4852 } else {
4853 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4854 }
4855 }
4856 SKIP_BLANKS;
4857 GROW;
4858 }
4859 if ((cur != NULL) && (last != NULL)) {
4860 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004861 if (last != NULL)
4862 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004863 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004864 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004865 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4866"Element content declaration doesn't start and stop in the same entity\n",
4867 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004868 }
Owen Taylor3473f882001-02-23 17:55:21 +00004869 NEXT;
4870 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004871 if (ret != NULL) {
4872 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4873 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4874 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4875 else
4876 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4877 }
Owen Taylor3473f882001-02-23 17:55:21 +00004878 NEXT;
4879 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004880 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004881 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004882 cur = ret;
4883 /*
4884 * Some normalization:
4885 * (a | b* | c?)* == (a | b | c)*
4886 */
4887 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4888 if ((cur->c1 != NULL) &&
4889 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4890 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4891 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4892 if ((cur->c2 != NULL) &&
4893 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4894 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4895 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4896 cur = cur->c2;
4897 }
4898 }
Owen Taylor3473f882001-02-23 17:55:21 +00004899 NEXT;
4900 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004901 if (ret != NULL) {
4902 int found = 0;
4903
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004904 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4905 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4906 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004907 else
4908 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004909 /*
4910 * Some normalization:
4911 * (a | b*)+ == (a | b)*
4912 * (a | b?)+ == (a | b)*
4913 */
4914 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4915 if ((cur->c1 != NULL) &&
4916 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4917 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4918 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4919 found = 1;
4920 }
4921 if ((cur->c2 != NULL) &&
4922 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4923 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4924 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4925 found = 1;
4926 }
4927 cur = cur->c2;
4928 }
4929 if (found)
4930 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4931 }
Owen Taylor3473f882001-02-23 17:55:21 +00004932 NEXT;
4933 }
4934 return(ret);
4935}
4936
4937/**
4938 * xmlParseElementContentDecl:
4939 * @ctxt: an XML parser context
4940 * @name: the name of the element being defined.
4941 * @result: the Element Content pointer will be stored here if any
4942 *
4943 * parse the declaration for an Element content either Mixed or Children,
4944 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4945 *
4946 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4947 *
4948 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4949 */
4950
4951int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004952xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004953 xmlElementContentPtr *result) {
4954
4955 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004956 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004957 int res;
4958
4959 *result = NULL;
4960
4961 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004962 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004963 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004964 return(-1);
4965 }
4966 NEXT;
4967 GROW;
4968 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004969 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004970 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004971 res = XML_ELEMENT_TYPE_MIXED;
4972 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004973 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 res = XML_ELEMENT_TYPE_ELEMENT;
4975 }
Owen Taylor3473f882001-02-23 17:55:21 +00004976 SKIP_BLANKS;
4977 *result = tree;
4978 return(res);
4979}
4980
4981/**
4982 * xmlParseElementDecl:
4983 * @ctxt: an XML parser context
4984 *
4985 * parse an Element declaration.
4986 *
4987 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4988 *
4989 * [ VC: Unique Element Type Declaration ]
4990 * No element type may be declared more than once
4991 *
4992 * Returns the type of the element, or -1 in case of error
4993 */
4994int
4995xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004996 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004997 int ret = -1;
4998 xmlElementContentPtr content = NULL;
4999
5000 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005001 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005002 xmlParserInputPtr input = ctxt->input;
5003
5004 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005005 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5007 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
5009 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005010 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005012 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5013 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005014 return(-1);
5015 }
5016 while ((RAW == 0) && (ctxt->inputNr > 1))
5017 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005018 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5020 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005021 }
5022 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005023 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP(5);
5025 /*
5026 * Element must always be empty.
5027 */
5028 ret = XML_ELEMENT_TYPE_EMPTY;
5029 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5030 (NXT(2) == 'Y')) {
5031 SKIP(3);
5032 /*
5033 * Element is a generic container.
5034 */
5035 ret = XML_ELEMENT_TYPE_ANY;
5036 } else if (RAW == '(') {
5037 ret = xmlParseElementContentDecl(ctxt, name, &content);
5038 } else {
5039 /*
5040 * [ WFC: PEs in Internal Subset ] error handling.
5041 */
5042 if ((RAW == '%') && (ctxt->external == 0) &&
5043 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005044 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005045 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005046 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005047 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005048 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5049 }
Owen Taylor3473f882001-02-23 17:55:21 +00005050 return(-1);
5051 }
5052
5053 SKIP_BLANKS;
5054 /*
5055 * Pop-up of finished entities.
5056 */
5057 while ((RAW == 0) && (ctxt->inputNr > 1))
5058 xmlPopInput(ctxt);
5059 SKIP_BLANKS;
5060
5061 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005062 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005063 } else {
5064 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005067 }
5068
5069 NEXT;
5070 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5071 (ctxt->sax->elementDecl != NULL))
5072 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5073 content);
5074 }
5075 if (content != NULL) {
5076 xmlFreeElementContent(content);
5077 }
Owen Taylor3473f882001-02-23 17:55:21 +00005078 }
5079 return(ret);
5080}
5081
5082/**
Owen Taylor3473f882001-02-23 17:55:21 +00005083 * xmlParseConditionalSections
5084 * @ctxt: an XML parser context
5085 *
5086 * [61] conditionalSect ::= includeSect | ignoreSect
5087 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5088 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5089 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5090 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5091 */
5092
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005093static void
Owen Taylor3473f882001-02-23 17:55:21 +00005094xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5095 SKIP(3);
5096 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005097 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005098 SKIP(7);
5099 SKIP_BLANKS;
5100 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005101 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 } else {
5103 NEXT;
5104 }
5105 if (xmlParserDebugEntities) {
5106 if ((ctxt->input != NULL) && (ctxt->input->filename))
5107 xmlGenericError(xmlGenericErrorContext,
5108 "%s(%d): ", ctxt->input->filename,
5109 ctxt->input->line);
5110 xmlGenericError(xmlGenericErrorContext,
5111 "Entering INCLUDE Conditional Section\n");
5112 }
5113
5114 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5115 (NXT(2) != '>'))) {
5116 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005117 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005118
5119 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5120 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005121 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 NEXT;
5123 } else if (RAW == '%') {
5124 xmlParsePEReference(ctxt);
5125 } else
5126 xmlParseMarkupDecl(ctxt);
5127
5128 /*
5129 * Pop-up of finished entities.
5130 */
5131 while ((RAW == 0) && (ctxt->inputNr > 1))
5132 xmlPopInput(ctxt);
5133
Daniel Veillardfdc91562002-07-01 21:52:03 +00005134 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005135 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005136 break;
5137 }
5138 }
5139 if (xmlParserDebugEntities) {
5140 if ((ctxt->input != NULL) && (ctxt->input->filename))
5141 xmlGenericError(xmlGenericErrorContext,
5142 "%s(%d): ", ctxt->input->filename,
5143 ctxt->input->line);
5144 xmlGenericError(xmlGenericErrorContext,
5145 "Leaving INCLUDE Conditional Section\n");
5146 }
5147
Daniel Veillarda07050d2003-10-19 14:46:32 +00005148 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005149 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005150 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005151 int depth = 0;
5152
5153 SKIP(6);
5154 SKIP_BLANKS;
5155 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005156 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 } else {
5158 NEXT;
5159 }
5160 if (xmlParserDebugEntities) {
5161 if ((ctxt->input != NULL) && (ctxt->input->filename))
5162 xmlGenericError(xmlGenericErrorContext,
5163 "%s(%d): ", ctxt->input->filename,
5164 ctxt->input->line);
5165 xmlGenericError(xmlGenericErrorContext,
5166 "Entering IGNORE Conditional Section\n");
5167 }
5168
5169 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005170 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005171 * But disable SAX event generating DTD building in the meantime
5172 */
5173 state = ctxt->disableSAX;
5174 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005175 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005176 ctxt->instate = XML_PARSER_IGNORE;
5177
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005178 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005179 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5180 depth++;
5181 SKIP(3);
5182 continue;
5183 }
5184 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5185 if (--depth >= 0) SKIP(3);
5186 continue;
5187 }
5188 NEXT;
5189 continue;
5190 }
5191
5192 ctxt->disableSAX = state;
5193 ctxt->instate = instate;
5194
5195 if (xmlParserDebugEntities) {
5196 if ((ctxt->input != NULL) && (ctxt->input->filename))
5197 xmlGenericError(xmlGenericErrorContext,
5198 "%s(%d): ", ctxt->input->filename,
5199 ctxt->input->line);
5200 xmlGenericError(xmlGenericErrorContext,
5201 "Leaving IGNORE Conditional Section\n");
5202 }
5203
5204 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005205 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005206 }
5207
5208 if (RAW == 0)
5209 SHRINK;
5210
5211 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005212 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005213 } else {
5214 SKIP(3);
5215 }
5216}
5217
5218/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005219 * xmlParseMarkupDecl:
5220 * @ctxt: an XML parser context
5221 *
5222 * parse Markup declarations
5223 *
5224 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5225 * NotationDecl | PI | Comment
5226 *
5227 * [ VC: Proper Declaration/PE Nesting ]
5228 * Parameter-entity replacement text must be properly nested with
5229 * markup declarations. That is to say, if either the first character
5230 * or the last character of a markup declaration (markupdecl above) is
5231 * contained in the replacement text for a parameter-entity reference,
5232 * both must be contained in the same replacement text.
5233 *
5234 * [ WFC: PEs in Internal Subset ]
5235 * In the internal DTD subset, parameter-entity references can occur
5236 * only where markup declarations can occur, not within markup declarations.
5237 * (This does not apply to references that occur in external parameter
5238 * entities or to the external subset.)
5239 */
5240void
5241xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5242 GROW;
5243 xmlParseElementDecl(ctxt);
5244 xmlParseAttributeListDecl(ctxt);
5245 xmlParseEntityDecl(ctxt);
5246 xmlParseNotationDecl(ctxt);
5247 xmlParsePI(ctxt);
5248 xmlParseComment(ctxt);
5249 /*
5250 * This is only for internal subset. On external entities,
5251 * the replacement is done before parsing stage
5252 */
5253 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5254 xmlParsePEReference(ctxt);
5255
5256 /*
5257 * Conditional sections are allowed from entities included
5258 * by PE References in the internal subset.
5259 */
5260 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5261 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5262 xmlParseConditionalSections(ctxt);
5263 }
5264 }
5265
5266 ctxt->instate = XML_PARSER_DTD;
5267}
5268
5269/**
5270 * xmlParseTextDecl:
5271 * @ctxt: an XML parser context
5272 *
5273 * parse an XML declaration header for external entities
5274 *
5275 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5276 *
5277 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5278 */
5279
5280void
5281xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5282 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005283 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284
5285 /*
5286 * We know that '<?xml' is here.
5287 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005289 SKIP(5);
5290 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005291 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005292 return;
5293 }
5294
William M. Brack76e95df2003-10-18 16:20:14 +00005295 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005296 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5297 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005298 }
5299 SKIP_BLANKS;
5300
5301 /*
5302 * We may have the VersionInfo here.
5303 */
5304 version = xmlParseVersionInfo(ctxt);
5305 if (version == NULL)
5306 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005307 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005308 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5310 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005311 }
5312 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005313 ctxt->input->version = version;
5314
5315 /*
5316 * We must have the encoding declaration
5317 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005318 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005319 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5320 /*
5321 * The XML REC instructs us to stop parsing right here
5322 */
5323 return;
5324 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005325 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5326 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5327 "Missing encoding in text declaration\n");
5328 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005329
5330 SKIP_BLANKS;
5331 if ((RAW == '?') && (NXT(1) == '>')) {
5332 SKIP(2);
5333 } else if (RAW == '>') {
5334 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005335 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005336 NEXT;
5337 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005338 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005339 MOVETO_ENDTAG(CUR_PTR);
5340 NEXT;
5341 }
5342}
5343
5344/**
Owen Taylor3473f882001-02-23 17:55:21 +00005345 * xmlParseExternalSubset:
5346 * @ctxt: an XML parser context
5347 * @ExternalID: the external identifier
5348 * @SystemID: the system identifier (or URL)
5349 *
5350 * parse Markup declarations from an external subset
5351 *
5352 * [30] extSubset ::= textDecl? extSubsetDecl
5353 *
5354 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5355 */
5356void
5357xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5358 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005359 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005361 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005362 xmlParseTextDecl(ctxt);
5363 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5364 /*
5365 * The XML REC instructs us to stop parsing right here
5366 */
5367 ctxt->instate = XML_PARSER_EOF;
5368 return;
5369 }
5370 }
5371 if (ctxt->myDoc == NULL) {
5372 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5373 }
5374 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5375 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5376
5377 ctxt->instate = XML_PARSER_DTD;
5378 ctxt->external = 1;
5379 while (((RAW == '<') && (NXT(1) == '?')) ||
5380 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005381 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005382 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005383 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005384
5385 GROW;
5386 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5387 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005388 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005389 NEXT;
5390 } else if (RAW == '%') {
5391 xmlParsePEReference(ctxt);
5392 } else
5393 xmlParseMarkupDecl(ctxt);
5394
5395 /*
5396 * Pop-up of finished entities.
5397 */
5398 while ((RAW == 0) && (ctxt->inputNr > 1))
5399 xmlPopInput(ctxt);
5400
Daniel Veillardfdc91562002-07-01 21:52:03 +00005401 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005402 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005403 break;
5404 }
5405 }
5406
5407 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005408 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 }
5410
5411}
5412
5413/**
5414 * xmlParseReference:
5415 * @ctxt: an XML parser context
5416 *
5417 * parse and handle entity references in content, depending on the SAX
5418 * interface, this may end-up in a call to character() if this is a
5419 * CharRef, a predefined entity, if there is no reference() callback.
5420 * or if the parser was asked to switch to that mode.
5421 *
5422 * [67] Reference ::= EntityRef | CharRef
5423 */
5424void
5425xmlParseReference(xmlParserCtxtPtr ctxt) {
5426 xmlEntityPtr ent;
5427 xmlChar *val;
5428 if (RAW != '&') return;
5429
5430 if (NXT(1) == '#') {
5431 int i = 0;
5432 xmlChar out[10];
5433 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005434 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005435
5436 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5437 /*
5438 * So we are using non-UTF-8 buffers
5439 * Check that the char fit on 8bits, if not
5440 * generate a CharRef.
5441 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005442 if (value <= 0xFF) {
5443 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005444 out[1] = 0;
5445 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5446 (!ctxt->disableSAX))
5447 ctxt->sax->characters(ctxt->userData, out, 1);
5448 } else {
5449 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005450 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005452 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5454 (!ctxt->disableSAX))
5455 ctxt->sax->reference(ctxt->userData, out);
5456 }
5457 } else {
5458 /*
5459 * Just encode the value in UTF-8
5460 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005461 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005462 out[i] = 0;
5463 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5464 (!ctxt->disableSAX))
5465 ctxt->sax->characters(ctxt->userData, out, i);
5466 }
5467 } else {
5468 ent = xmlParseEntityRef(ctxt);
5469 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005470 if (!ctxt->wellFormed)
5471 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 if ((ent->name != NULL) &&
5473 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5474 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005475 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005476
5477
5478 /*
5479 * The first reference to the entity trigger a parsing phase
5480 * where the ent->children is filled with the result from
5481 * the parsing.
5482 */
5483 if (ent->children == NULL) {
5484 xmlChar *value;
5485 value = ent->content;
5486
5487 /*
5488 * Check that this entity is well formed
5489 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005490 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005491 (value[1] == 0) && (value[0] == '<') &&
5492 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5493 /*
5494 * DONE: get definite answer on this !!!
5495 * Lots of entity decls are used to declare a single
5496 * char
5497 * <!ENTITY lt "<">
5498 * Which seems to be valid since
5499 * 2.4: The ampersand character (&) and the left angle
5500 * bracket (<) may appear in their literal form only
5501 * when used ... They are also legal within the literal
5502 * entity value of an internal entity declaration;i
5503 * see "4.3.2 Well-Formed Parsed Entities".
5504 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5505 * Looking at the OASIS test suite and James Clark
5506 * tests, this is broken. However the XML REC uses
5507 * it. Is the XML REC not well-formed ????
5508 * This is a hack to avoid this problem
5509 *
5510 * ANSWER: since lt gt amp .. are already defined,
5511 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005512 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005513 * is lousy but acceptable.
5514 */
5515 list = xmlNewDocText(ctxt->myDoc, value);
5516 if (list != NULL) {
5517 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5518 (ent->children == NULL)) {
5519 ent->children = list;
5520 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005521 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005522 list->parent = (xmlNodePtr) ent;
5523 } else {
5524 xmlFreeNodeList(list);
5525 }
5526 } else if (list != NULL) {
5527 xmlFreeNodeList(list);
5528 }
5529 } else {
5530 /*
5531 * 4.3.2: An internal general parsed entity is well-formed
5532 * if its replacement text matches the production labeled
5533 * content.
5534 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005535
5536 void *user_data;
5537 /*
5538 * This is a bit hackish but this seems the best
5539 * way to make sure both SAX and DOM entity support
5540 * behaves okay.
5541 */
5542 if (ctxt->userData == ctxt)
5543 user_data = NULL;
5544 else
5545 user_data = ctxt->userData;
5546
Owen Taylor3473f882001-02-23 17:55:21 +00005547 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5548 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005549 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5550 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 ctxt->depth--;
5552 } else if (ent->etype ==
5553 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5554 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005555 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005556 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005557 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 ctxt->depth--;
5559 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005560 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005561 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5562 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005563 }
5564 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005565 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005566 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005567 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005568 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5569 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005570 (ent->children == NULL)) {
5571 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 if (ctxt->replaceEntities) {
5573 /*
5574 * Prune it directly in the generated document
5575 * except for single text nodes.
5576 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005577 if (((list->type == XML_TEXT_NODE) &&
5578 (list->next == NULL)) ||
5579 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005580 list->parent = (xmlNodePtr) ent;
5581 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005582 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005583 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005584 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 while (list != NULL) {
5586 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005587 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 if (list->next == NULL)
5589 ent->last = list;
5590 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005591 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005592 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005593#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005594 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5595 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005596#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005597 }
5598 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005599 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 while (list != NULL) {
5601 list->parent = (xmlNodePtr) ent;
5602 if (list->next == NULL)
5603 ent->last = list;
5604 list = list->next;
5605 }
Owen Taylor3473f882001-02-23 17:55:21 +00005606 }
5607 } else {
5608 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005609 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005610 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005611 } else if ((ret != XML_ERR_OK) &&
5612 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005613 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005614 } else if (list != NULL) {
5615 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005616 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
5618 }
5619 }
5620 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5621 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5622 /*
5623 * Create a node.
5624 */
5625 ctxt->sax->reference(ctxt->userData, ent->name);
5626 return;
5627 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005628 /*
5629 * There is a problem on the handling of _private for entities
5630 * (bug 155816): Should we copy the content of the field from
5631 * the entity (possibly overwriting some value set by the user
5632 * when a copy is created), should we leave it alone, or should
5633 * we try to take care of different situations? The problem
5634 * is exacerbated by the usage of this field by the xmlReader.
5635 * To fix this bug, we look at _private on the created node
5636 * and, if it's NULL, we copy in whatever was in the entity.
5637 * If it's not NULL we leave it alone. This is somewhat of a
5638 * hack - maybe we should have further tests to determine
5639 * what to do.
5640 */
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5642 /*
5643 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005644 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005645 * In the first occurrence list contains the replacement.
5646 * progressive == 2 means we are operating on the Reader
5647 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005648 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005649 if (((list == NULL) && (ent->owner == 0)) ||
5650 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005651 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005652
5653 /*
5654 * when operating on a reader, the entities definitions
5655 * are always owning the entities subtree.
5656 if (ctxt->parseMode == XML_PARSE_READER)
5657 ent->owner = 1;
5658 */
5659
Daniel Veillard62f313b2001-07-04 19:49:14 +00005660 cur = ent->children;
5661 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005662 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005663 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005664 if (nw->_private == NULL)
5665 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005666 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005667 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005668 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005669 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005670 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005671 if (cur == ent->last) {
5672 /*
5673 * needed to detect some strange empty
5674 * node cases in the reader tests
5675 */
5676 if ((ctxt->parseMode == XML_PARSE_READER) &&
5677 (nw->type == XML_ELEMENT_NODE) &&
5678 (nw->children == NULL))
5679 nw->extra = 1;
5680
Daniel Veillard62f313b2001-07-04 19:49:14 +00005681 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005682 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005683 cur = cur->next;
5684 }
Daniel Veillard81273902003-09-30 00:43:48 +00005685#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005686 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005687 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005688#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005689 } else if (list == NULL) {
5690 xmlNodePtr nw = NULL, cur, next, last,
5691 firstChild = NULL;
5692 /*
5693 * Copy the entity child list and make it the new
5694 * entity child list. The goal is to make sure any
5695 * ID or REF referenced will be the one from the
5696 * document content and not the entity copy.
5697 */
5698 cur = ent->children;
5699 ent->children = NULL;
5700 last = ent->last;
5701 ent->last = NULL;
5702 while (cur != NULL) {
5703 next = cur->next;
5704 cur->next = NULL;
5705 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005706 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005707 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005708 if (nw->_private == NULL)
5709 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005710 if (firstChild == NULL){
5711 firstChild = cur;
5712 }
5713 xmlAddChild((xmlNodePtr) ent, nw);
5714 xmlAddChild(ctxt->node, cur);
5715 }
5716 if (cur == last)
5717 break;
5718 cur = next;
5719 }
5720 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005721#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005722 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5723 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005724#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005725 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005726 const xmlChar *nbktext;
5727
Daniel Veillard62f313b2001-07-04 19:49:14 +00005728 /*
5729 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005730 * node with a possible previous text one which
5731 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005732 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005733 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5734 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005735 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005736 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005737 if ((ent->last != ent->children) &&
5738 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005739 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005740 xmlAddChildList(ctxt->node, ent->children);
5741 }
5742
Owen Taylor3473f882001-02-23 17:55:21 +00005743 /*
5744 * This is to avoid a nasty side effect, see
5745 * characters() in SAX.c
5746 */
5747 ctxt->nodemem = 0;
5748 ctxt->nodelen = 0;
5749 return;
5750 } else {
5751 /*
5752 * Probably running in SAX mode
5753 */
5754 xmlParserInputPtr input;
5755
5756 input = xmlNewEntityInputStream(ctxt, ent);
5757 xmlPushInput(ctxt, input);
5758 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005759 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5760 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005761 xmlParseTextDecl(ctxt);
5762 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5763 /*
5764 * The XML REC instructs us to stop parsing right here
5765 */
5766 ctxt->instate = XML_PARSER_EOF;
5767 return;
5768 }
5769 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005770 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5771 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 }
5773 }
5774 return;
5775 }
5776 }
5777 } else {
5778 val = ent->content;
5779 if (val == NULL) return;
5780 /*
5781 * inline the entity.
5782 */
5783 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5784 (!ctxt->disableSAX))
5785 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5786 }
5787 }
5788}
5789
5790/**
5791 * xmlParseEntityRef:
5792 * @ctxt: an XML parser context
5793 *
5794 * parse ENTITY references declarations
5795 *
5796 * [68] EntityRef ::= '&' Name ';'
5797 *
5798 * [ WFC: Entity Declared ]
5799 * In a document without any DTD, a document with only an internal DTD
5800 * subset which contains no parameter entity references, or a document
5801 * with "standalone='yes'", the Name given in the entity reference
5802 * must match that in an entity declaration, except that well-formed
5803 * documents need not declare any of the following entities: amp, lt,
5804 * gt, apos, quot. The declaration of a parameter entity must precede
5805 * any reference to it. Similarly, the declaration of a general entity
5806 * must precede any reference to it which appears in a default value in an
5807 * attribute-list declaration. Note that if entities are declared in the
5808 * external subset or in external parameter entities, a non-validating
5809 * processor is not obligated to read and process their declarations;
5810 * for such documents, the rule that an entity must be declared is a
5811 * well-formedness constraint only if standalone='yes'.
5812 *
5813 * [ WFC: Parsed Entity ]
5814 * An entity reference must not contain the name of an unparsed entity
5815 *
5816 * Returns the xmlEntityPtr if found, or NULL otherwise.
5817 */
5818xmlEntityPtr
5819xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005820 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 xmlEntityPtr ent = NULL;
5822
5823 GROW;
5824
5825 if (RAW == '&') {
5826 NEXT;
5827 name = xmlParseName(ctxt);
5828 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005829 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5830 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005831 } else {
5832 if (RAW == ';') {
5833 NEXT;
5834 /*
5835 * Ask first SAX for entity resolution, otherwise try the
5836 * predefined set.
5837 */
5838 if (ctxt->sax != NULL) {
5839 if (ctxt->sax->getEntity != NULL)
5840 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005841 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005842 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005843 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5844 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005845 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005846 }
Owen Taylor3473f882001-02-23 17:55:21 +00005847 }
5848 /*
5849 * [ WFC: Entity Declared ]
5850 * In a document without any DTD, a document with only an
5851 * internal DTD subset which contains no parameter entity
5852 * references, or a document with "standalone='yes'", the
5853 * Name given in the entity reference must match that in an
5854 * entity declaration, except that well-formed documents
5855 * need not declare any of the following entities: amp, lt,
5856 * gt, apos, quot.
5857 * The declaration of a parameter entity must precede any
5858 * reference to it.
5859 * Similarly, the declaration of a general entity must
5860 * precede any reference to it which appears in a default
5861 * value in an attribute-list declaration. Note that if
5862 * entities are declared in the external subset or in
5863 * external parameter entities, a non-validating processor
5864 * is not obligated to read and process their declarations;
5865 * for such documents, the rule that an entity must be
5866 * declared is a well-formedness constraint only if
5867 * standalone='yes'.
5868 */
5869 if (ent == NULL) {
5870 if ((ctxt->standalone == 1) ||
5871 ((ctxt->hasExternalSubset == 0) &&
5872 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005873 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005874 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005875 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005876 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005877 "Entity '%s' not defined\n", name);
5878 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005879 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005880 }
5881
5882 /*
5883 * [ WFC: Parsed Entity ]
5884 * An entity reference must not contain the name of an
5885 * unparsed entity
5886 */
5887 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005888 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005889 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 }
5891
5892 /*
5893 * [ WFC: No External Entity References ]
5894 * Attribute values cannot contain direct or indirect
5895 * entity references to external entities.
5896 */
5897 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5898 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005899 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5900 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005901 }
5902 /*
5903 * [ WFC: No < in Attribute Values ]
5904 * The replacement text of any entity referred to directly or
5905 * indirectly in an attribute value (other than "&lt;") must
5906 * not contain a <.
5907 */
5908 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5909 (ent != NULL) &&
5910 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5911 (ent->content != NULL) &&
5912 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005913 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005914 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005915 }
5916
5917 /*
5918 * Internal check, no parameter entities here ...
5919 */
5920 else {
5921 switch (ent->etype) {
5922 case XML_INTERNAL_PARAMETER_ENTITY:
5923 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005924 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5925 "Attempt to reference the parameter entity '%s'\n",
5926 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 break;
5928 default:
5929 break;
5930 }
5931 }
5932
5933 /*
5934 * [ WFC: No Recursion ]
5935 * A parsed entity must not contain a recursive reference
5936 * to itself, either directly or indirectly.
5937 * Done somewhere else
5938 */
5939
5940 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 }
Owen Taylor3473f882001-02-23 17:55:21 +00005943 }
5944 }
5945 return(ent);
5946}
5947
5948/**
5949 * xmlParseStringEntityRef:
5950 * @ctxt: an XML parser context
5951 * @str: a pointer to an index in the string
5952 *
5953 * parse ENTITY references declarations, but this version parses it from
5954 * a string value.
5955 *
5956 * [68] EntityRef ::= '&' Name ';'
5957 *
5958 * [ WFC: Entity Declared ]
5959 * In a document without any DTD, a document with only an internal DTD
5960 * subset which contains no parameter entity references, or a document
5961 * with "standalone='yes'", the Name given in the entity reference
5962 * must match that in an entity declaration, except that well-formed
5963 * documents need not declare any of the following entities: amp, lt,
5964 * gt, apos, quot. The declaration of a parameter entity must precede
5965 * any reference to it. Similarly, the declaration of a general entity
5966 * must precede any reference to it which appears in a default value in an
5967 * attribute-list declaration. Note that if entities are declared in the
5968 * external subset or in external parameter entities, a non-validating
5969 * processor is not obligated to read and process their declarations;
5970 * for such documents, the rule that an entity must be declared is a
5971 * well-formedness constraint only if standalone='yes'.
5972 *
5973 * [ WFC: Parsed Entity ]
5974 * An entity reference must not contain the name of an unparsed entity
5975 *
5976 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5977 * is updated to the current location in the string.
5978 */
5979xmlEntityPtr
5980xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5981 xmlChar *name;
5982 const xmlChar *ptr;
5983 xmlChar cur;
5984 xmlEntityPtr ent = NULL;
5985
5986 if ((str == NULL) || (*str == NULL))
5987 return(NULL);
5988 ptr = *str;
5989 cur = *ptr;
5990 if (cur == '&') {
5991 ptr++;
5992 cur = *ptr;
5993 name = xmlParseStringName(ctxt, &ptr);
5994 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5996 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005997 } else {
5998 if (*ptr == ';') {
5999 ptr++;
6000 /*
6001 * Ask first SAX for entity resolution, otherwise try the
6002 * predefined set.
6003 */
6004 if (ctxt->sax != NULL) {
6005 if (ctxt->sax->getEntity != NULL)
6006 ent = ctxt->sax->getEntity(ctxt->userData, name);
6007 if (ent == NULL)
6008 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006009 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006010 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006011 }
Owen Taylor3473f882001-02-23 17:55:21 +00006012 }
6013 /*
6014 * [ WFC: Entity Declared ]
6015 * In a document without any DTD, a document with only an
6016 * internal DTD subset which contains no parameter entity
6017 * references, or a document with "standalone='yes'", the
6018 * Name given in the entity reference must match that in an
6019 * entity declaration, except that well-formed documents
6020 * need not declare any of the following entities: amp, lt,
6021 * gt, apos, quot.
6022 * The declaration of a parameter entity must precede any
6023 * reference to it.
6024 * Similarly, the declaration of a general entity must
6025 * precede any reference to it which appears in a default
6026 * value in an attribute-list declaration. Note that if
6027 * entities are declared in the external subset or in
6028 * external parameter entities, a non-validating processor
6029 * is not obligated to read and process their declarations;
6030 * for such documents, the rule that an entity must be
6031 * declared is a well-formedness constraint only if
6032 * standalone='yes'.
6033 */
6034 if (ent == NULL) {
6035 if ((ctxt->standalone == 1) ||
6036 ((ctxt->hasExternalSubset == 0) &&
6037 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006038 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006039 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006040 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006041 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006042 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006043 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006045 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006046 }
6047
6048 /*
6049 * [ WFC: Parsed Entity ]
6050 * An entity reference must not contain the name of an
6051 * unparsed entity
6052 */
6053 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006054 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006055 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006056 }
6057
6058 /*
6059 * [ WFC: No External Entity References ]
6060 * Attribute values cannot contain direct or indirect
6061 * entity references to external entities.
6062 */
6063 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6064 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006065 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006066 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006067 }
6068 /*
6069 * [ WFC: No < in Attribute Values ]
6070 * The replacement text of any entity referred to directly or
6071 * indirectly in an attribute value (other than "&lt;") must
6072 * not contain a <.
6073 */
6074 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6075 (ent != NULL) &&
6076 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6077 (ent->content != NULL) &&
6078 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006079 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6080 "'<' in entity '%s' is not allowed in attributes values\n",
6081 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 }
6083
6084 /*
6085 * Internal check, no parameter entities here ...
6086 */
6087 else {
6088 switch (ent->etype) {
6089 case XML_INTERNAL_PARAMETER_ENTITY:
6090 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006091 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6092 "Attempt to reference the parameter entity '%s'\n",
6093 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 break;
6095 default:
6096 break;
6097 }
6098 }
6099
6100 /*
6101 * [ WFC: No Recursion ]
6102 * A parsed entity must not contain a recursive reference
6103 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006104 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006105 */
6106
6107 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006108 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006109 }
6110 xmlFree(name);
6111 }
6112 }
6113 *str = ptr;
6114 return(ent);
6115}
6116
6117/**
6118 * xmlParsePEReference:
6119 * @ctxt: an XML parser context
6120 *
6121 * parse PEReference declarations
6122 * The entity content is handled directly by pushing it's content as
6123 * a new input stream.
6124 *
6125 * [69] PEReference ::= '%' Name ';'
6126 *
6127 * [ WFC: No Recursion ]
6128 * A parsed entity must not contain a recursive
6129 * reference to itself, either directly or indirectly.
6130 *
6131 * [ WFC: Entity Declared ]
6132 * In a document without any DTD, a document with only an internal DTD
6133 * subset which contains no parameter entity references, or a document
6134 * with "standalone='yes'", ... ... The declaration of a parameter
6135 * entity must precede any reference to it...
6136 *
6137 * [ VC: Entity Declared ]
6138 * In a document with an external subset or external parameter entities
6139 * with "standalone='no'", ... ... The declaration of a parameter entity
6140 * must precede any reference to it...
6141 *
6142 * [ WFC: In DTD ]
6143 * Parameter-entity references may only appear in the DTD.
6144 * NOTE: misleading but this is handled.
6145 */
6146void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006147xmlParsePEReference(xmlParserCtxtPtr ctxt)
6148{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006149 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006150 xmlEntityPtr entity = NULL;
6151 xmlParserInputPtr input;
6152
6153 if (RAW == '%') {
6154 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006155 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006156 if (name == NULL) {
6157 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6158 "xmlParsePEReference: no name\n");
6159 } else {
6160 if (RAW == ';') {
6161 NEXT;
6162 if ((ctxt->sax != NULL) &&
6163 (ctxt->sax->getParameterEntity != NULL))
6164 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6165 name);
6166 if (entity == NULL) {
6167 /*
6168 * [ WFC: Entity Declared ]
6169 * In a document without any DTD, a document with only an
6170 * internal DTD subset which contains no parameter entity
6171 * references, or a document with "standalone='yes'", ...
6172 * ... The declaration of a parameter entity must precede
6173 * any reference to it...
6174 */
6175 if ((ctxt->standalone == 1) ||
6176 ((ctxt->hasExternalSubset == 0) &&
6177 (ctxt->hasPErefs == 0))) {
6178 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6179 "PEReference: %%%s; not found\n",
6180 name);
6181 } else {
6182 /*
6183 * [ VC: Entity Declared ]
6184 * In a document with an external subset or external
6185 * parameter entities with "standalone='no'", ...
6186 * ... The declaration of a parameter entity must
6187 * precede any reference to it...
6188 */
6189 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6190 "PEReference: %%%s; not found\n",
6191 name, NULL);
6192 ctxt->valid = 0;
6193 }
6194 } else {
6195 /*
6196 * Internal checking in case the entity quest barfed
6197 */
6198 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6199 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6200 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6201 "Internal: %%%s; is not a parameter entity\n",
6202 name, NULL);
6203 } else if (ctxt->input->free != deallocblankswrapper) {
6204 input =
6205 xmlNewBlanksWrapperInputStream(ctxt, entity);
6206 xmlPushInput(ctxt, input);
6207 } else {
6208 /*
6209 * TODO !!!
6210 * handle the extra spaces added before and after
6211 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6212 */
6213 input = xmlNewEntityInputStream(ctxt, entity);
6214 xmlPushInput(ctxt, input);
6215 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006216 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006217 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006218 xmlParseTextDecl(ctxt);
6219 if (ctxt->errNo ==
6220 XML_ERR_UNSUPPORTED_ENCODING) {
6221 /*
6222 * The XML REC instructs us to stop parsing
6223 * right here
6224 */
6225 ctxt->instate = XML_PARSER_EOF;
6226 return;
6227 }
6228 }
6229 }
6230 }
6231 ctxt->hasPErefs = 1;
6232 } else {
6233 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6234 }
6235 }
Owen Taylor3473f882001-02-23 17:55:21 +00006236 }
6237}
6238
6239/**
6240 * xmlParseStringPEReference:
6241 * @ctxt: an XML parser context
6242 * @str: a pointer to an index in the string
6243 *
6244 * parse PEReference declarations
6245 *
6246 * [69] PEReference ::= '%' Name ';'
6247 *
6248 * [ WFC: No Recursion ]
6249 * A parsed entity must not contain a recursive
6250 * reference to itself, either directly or indirectly.
6251 *
6252 * [ WFC: Entity Declared ]
6253 * In a document without any DTD, a document with only an internal DTD
6254 * subset which contains no parameter entity references, or a document
6255 * with "standalone='yes'", ... ... The declaration of a parameter
6256 * entity must precede any reference to it...
6257 *
6258 * [ VC: Entity Declared ]
6259 * In a document with an external subset or external parameter entities
6260 * with "standalone='no'", ... ... The declaration of a parameter entity
6261 * must precede any reference to it...
6262 *
6263 * [ WFC: In DTD ]
6264 * Parameter-entity references may only appear in the DTD.
6265 * NOTE: misleading but this is handled.
6266 *
6267 * Returns the string of the entity content.
6268 * str is updated to the current value of the index
6269 */
6270xmlEntityPtr
6271xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6272 const xmlChar *ptr;
6273 xmlChar cur;
6274 xmlChar *name;
6275 xmlEntityPtr entity = NULL;
6276
6277 if ((str == NULL) || (*str == NULL)) return(NULL);
6278 ptr = *str;
6279 cur = *ptr;
6280 if (cur == '%') {
6281 ptr++;
6282 cur = *ptr;
6283 name = xmlParseStringName(ctxt, &ptr);
6284 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006285 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6286 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006287 } else {
6288 cur = *ptr;
6289 if (cur == ';') {
6290 ptr++;
6291 cur = *ptr;
6292 if ((ctxt->sax != NULL) &&
6293 (ctxt->sax->getParameterEntity != NULL))
6294 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6295 name);
6296 if (entity == NULL) {
6297 /*
6298 * [ WFC: Entity Declared ]
6299 * In a document without any DTD, a document with only an
6300 * internal DTD subset which contains no parameter entity
6301 * references, or a document with "standalone='yes'", ...
6302 * ... The declaration of a parameter entity must precede
6303 * any reference to it...
6304 */
6305 if ((ctxt->standalone == 1) ||
6306 ((ctxt->hasExternalSubset == 0) &&
6307 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006309 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 } else {
6311 /*
6312 * [ VC: Entity Declared ]
6313 * In a document with an external subset or external
6314 * parameter entities with "standalone='no'", ...
6315 * ... The declaration of a parameter entity must
6316 * precede any reference to it...
6317 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006318 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6319 "PEReference: %%%s; not found\n",
6320 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006321 ctxt->valid = 0;
6322 }
6323 } else {
6324 /*
6325 * Internal checking in case the entity quest barfed
6326 */
6327 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6328 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006329 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6330 "%%%s; is not a parameter entity\n",
6331 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
6333 }
6334 ctxt->hasPErefs = 1;
6335 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006336 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 xmlFree(name);
6339 }
6340 }
6341 *str = ptr;
6342 return(entity);
6343}
6344
6345/**
6346 * xmlParseDocTypeDecl:
6347 * @ctxt: an XML parser context
6348 *
6349 * parse a DOCTYPE declaration
6350 *
6351 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6352 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6353 *
6354 * [ VC: Root Element Type ]
6355 * The Name in the document type declaration must match the element
6356 * type of the root element.
6357 */
6358
6359void
6360xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006361 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006362 xmlChar *ExternalID = NULL;
6363 xmlChar *URI = NULL;
6364
6365 /*
6366 * We know that '<!DOCTYPE' has been detected.
6367 */
6368 SKIP(9);
6369
6370 SKIP_BLANKS;
6371
6372 /*
6373 * Parse the DOCTYPE name.
6374 */
6375 name = xmlParseName(ctxt);
6376 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006377 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6378 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380 ctxt->intSubName = name;
6381
6382 SKIP_BLANKS;
6383
6384 /*
6385 * Check for SystemID and ExternalID
6386 */
6387 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6388
6389 if ((URI != NULL) || (ExternalID != NULL)) {
6390 ctxt->hasExternalSubset = 1;
6391 }
6392 ctxt->extSubURI = URI;
6393 ctxt->extSubSystem = ExternalID;
6394
6395 SKIP_BLANKS;
6396
6397 /*
6398 * Create and update the internal subset.
6399 */
6400 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6401 (!ctxt->disableSAX))
6402 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6403
6404 /*
6405 * Is there any internal subset declarations ?
6406 * they are handled separately in xmlParseInternalSubset()
6407 */
6408 if (RAW == '[')
6409 return;
6410
6411 /*
6412 * We should be at the end of the DOCTYPE declaration.
6413 */
6414 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006415 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006416 }
6417 NEXT;
6418}
6419
6420/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006421 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006422 * @ctxt: an XML parser context
6423 *
6424 * parse the internal subset declaration
6425 *
6426 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6427 */
6428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006429static void
Owen Taylor3473f882001-02-23 17:55:21 +00006430xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6431 /*
6432 * Is there any DTD definition ?
6433 */
6434 if (RAW == '[') {
6435 ctxt->instate = XML_PARSER_DTD;
6436 NEXT;
6437 /*
6438 * Parse the succession of Markup declarations and
6439 * PEReferences.
6440 * Subsequence (markupdecl | PEReference | S)*
6441 */
6442 while (RAW != ']') {
6443 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006444 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006445
6446 SKIP_BLANKS;
6447 xmlParseMarkupDecl(ctxt);
6448 xmlParsePEReference(ctxt);
6449
6450 /*
6451 * Pop-up of finished entities.
6452 */
6453 while ((RAW == 0) && (ctxt->inputNr > 1))
6454 xmlPopInput(ctxt);
6455
6456 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006457 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006458 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006459 break;
6460 }
6461 }
6462 if (RAW == ']') {
6463 NEXT;
6464 SKIP_BLANKS;
6465 }
6466 }
6467
6468 /*
6469 * We should be at the end of the DOCTYPE declaration.
6470 */
6471 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006473 }
6474 NEXT;
6475}
6476
Daniel Veillard81273902003-09-30 00:43:48 +00006477#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006478/**
6479 * xmlParseAttribute:
6480 * @ctxt: an XML parser context
6481 * @value: a xmlChar ** used to store the value of the attribute
6482 *
6483 * parse an attribute
6484 *
6485 * [41] Attribute ::= Name Eq AttValue
6486 *
6487 * [ WFC: No External Entity References ]
6488 * Attribute values cannot contain direct or indirect entity references
6489 * to external entities.
6490 *
6491 * [ WFC: No < in Attribute Values ]
6492 * The replacement text of any entity referred to directly or indirectly in
6493 * an attribute value (other than "&lt;") must not contain a <.
6494 *
6495 * [ VC: Attribute Value Type ]
6496 * The attribute must have been declared; the value must be of the type
6497 * declared for it.
6498 *
6499 * [25] Eq ::= S? '=' S?
6500 *
6501 * With namespace:
6502 *
6503 * [NS 11] Attribute ::= QName Eq AttValue
6504 *
6505 * Also the case QName == xmlns:??? is handled independently as a namespace
6506 * definition.
6507 *
6508 * Returns the attribute name, and the value in *value.
6509 */
6510
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006511const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006512xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006513 const xmlChar *name;
6514 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006515
6516 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006517 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006518 name = xmlParseName(ctxt);
6519 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006520 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006521 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006522 return(NULL);
6523 }
6524
6525 /*
6526 * read the value
6527 */
6528 SKIP_BLANKS;
6529 if (RAW == '=') {
6530 NEXT;
6531 SKIP_BLANKS;
6532 val = xmlParseAttValue(ctxt);
6533 ctxt->instate = XML_PARSER_CONTENT;
6534 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006535 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006536 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006537 return(NULL);
6538 }
6539
6540 /*
6541 * Check that xml:lang conforms to the specification
6542 * No more registered as an error, just generate a warning now
6543 * since this was deprecated in XML second edition
6544 */
6545 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6546 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006547 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6548 "Malformed value for xml:lang : %s\n",
6549 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006550 }
6551 }
6552
6553 /*
6554 * Check that xml:space conforms to the specification
6555 */
6556 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6557 if (xmlStrEqual(val, BAD_CAST "default"))
6558 *(ctxt->space) = 0;
6559 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6560 *(ctxt->space) = 1;
6561 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006562 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006563"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006564 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
6566 }
6567
6568 *value = val;
6569 return(name);
6570}
6571
6572/**
6573 * xmlParseStartTag:
6574 * @ctxt: an XML parser context
6575 *
6576 * parse a start of tag either for rule element or
6577 * EmptyElement. In both case we don't parse the tag closing chars.
6578 *
6579 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6580 *
6581 * [ WFC: Unique Att Spec ]
6582 * No attribute name may appear more than once in the same start-tag or
6583 * empty-element tag.
6584 *
6585 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6586 *
6587 * [ WFC: Unique Att Spec ]
6588 * No attribute name may appear more than once in the same start-tag or
6589 * empty-element tag.
6590 *
6591 * With namespace:
6592 *
6593 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6594 *
6595 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6596 *
6597 * Returns the element name parsed
6598 */
6599
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006600const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006601xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006602 const xmlChar *name;
6603 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006604 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006605 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006606 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006607 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006608 int i;
6609
6610 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006611 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006612
6613 name = xmlParseName(ctxt);
6614 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006616 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006617 return(NULL);
6618 }
6619
6620 /*
6621 * Now parse the attributes, it ends up with the ending
6622 *
6623 * (S Attribute)* S?
6624 */
6625 SKIP_BLANKS;
6626 GROW;
6627
Daniel Veillard21a0f912001-02-25 19:54:14 +00006628 while ((RAW != '>') &&
6629 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006630 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006631 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006632 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006633
6634 attname = xmlParseAttribute(ctxt, &attvalue);
6635 if ((attname != NULL) && (attvalue != NULL)) {
6636 /*
6637 * [ WFC: Unique Att Spec ]
6638 * No attribute name may appear more than once in the same
6639 * start-tag or empty-element tag.
6640 */
6641 for (i = 0; i < nbatts;i += 2) {
6642 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006643 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006644 xmlFree(attvalue);
6645 goto failed;
6646 }
6647 }
Owen Taylor3473f882001-02-23 17:55:21 +00006648 /*
6649 * Add the pair to atts
6650 */
6651 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006652 maxatts = 22; /* allow for 10 attrs by default */
6653 atts = (const xmlChar **)
6654 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006655 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006656 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006657 if (attvalue != NULL)
6658 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006659 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006660 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006661 ctxt->atts = atts;
6662 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006663 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006664 const xmlChar **n;
6665
Owen Taylor3473f882001-02-23 17:55:21 +00006666 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006667 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006668 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006669 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006670 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006671 if (attvalue != NULL)
6672 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006673 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006674 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006675 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006676 ctxt->atts = atts;
6677 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006678 }
6679 atts[nbatts++] = attname;
6680 atts[nbatts++] = attvalue;
6681 atts[nbatts] = NULL;
6682 atts[nbatts + 1] = NULL;
6683 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006684 if (attvalue != NULL)
6685 xmlFree(attvalue);
6686 }
6687
6688failed:
6689
Daniel Veillard3772de32002-12-17 10:31:45 +00006690 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006691 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6692 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006693 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6695 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006696 }
6697 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006698 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6699 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006700 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6701 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006702 break;
6703 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006704 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006705 GROW;
6706 }
6707
6708 /*
6709 * SAX: Start of Element !
6710 */
6711 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006712 (!ctxt->disableSAX)) {
6713 if (nbatts > 0)
6714 ctxt->sax->startElement(ctxt->userData, name, atts);
6715 else
6716 ctxt->sax->startElement(ctxt->userData, name, NULL);
6717 }
Owen Taylor3473f882001-02-23 17:55:21 +00006718
6719 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006720 /* Free only the content strings */
6721 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006722 if (atts[i] != NULL)
6723 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006724 }
6725 return(name);
6726}
6727
6728/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006729 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006730 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006731 * @line: line of the start tag
6732 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006733 *
6734 * parse an end of tag
6735 *
6736 * [42] ETag ::= '</' Name S? '>'
6737 *
6738 * With namespace
6739 *
6740 * [NS 9] ETag ::= '</' QName S? '>'
6741 */
6742
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006743static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006744xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006745 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006746
6747 GROW;
6748 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006749 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006750 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006751 return;
6752 }
6753 SKIP(2);
6754
Daniel Veillard46de64e2002-05-29 08:21:33 +00006755 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006756
6757 /*
6758 * We should definitely be at the ending "S? '>'" part
6759 */
6760 GROW;
6761 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006762 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006763 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006764 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006765 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006766
6767 /*
6768 * [ WFC: Element Type Match ]
6769 * The Name in an element's end-tag must match the element type in the
6770 * start-tag.
6771 *
6772 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006773 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006774 if (name == NULL) name = BAD_CAST "unparseable";
6775 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006776 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006777 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006778 }
6779
6780 /*
6781 * SAX: End of Tag
6782 */
6783 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6784 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006785 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006786
Daniel Veillarde57ec792003-09-10 10:50:59 +00006787 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006788 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006789 return;
6790}
6791
6792/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006793 * xmlParseEndTag:
6794 * @ctxt: an XML parser context
6795 *
6796 * parse an end of tag
6797 *
6798 * [42] ETag ::= '</' Name S? '>'
6799 *
6800 * With namespace
6801 *
6802 * [NS 9] ETag ::= '</' QName S? '>'
6803 */
6804
6805void
6806xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006807 xmlParseEndTag1(ctxt, 0);
6808}
Daniel Veillard81273902003-09-30 00:43:48 +00006809#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006810
6811/************************************************************************
6812 * *
6813 * SAX 2 specific operations *
6814 * *
6815 ************************************************************************/
6816
6817static const xmlChar *
6818xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6819 int len = 0, l;
6820 int c;
6821 int count = 0;
6822
6823 /*
6824 * Handler for more complex cases
6825 */
6826 GROW;
6827 c = CUR_CHAR(l);
6828 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006829 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006830 return(NULL);
6831 }
6832
6833 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006834 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006835 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006836 (IS_COMBINING(c)) ||
6837 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006838 if (count++ > 100) {
6839 count = 0;
6840 GROW;
6841 }
6842 len += l;
6843 NEXTL(l);
6844 c = CUR_CHAR(l);
6845 }
6846 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6847}
6848
6849/*
6850 * xmlGetNamespace:
6851 * @ctxt: an XML parser context
6852 * @prefix: the prefix to lookup
6853 *
6854 * Lookup the namespace name for the @prefix (which ca be NULL)
6855 * The prefix must come from the @ctxt->dict dictionnary
6856 *
6857 * Returns the namespace name or NULL if not bound
6858 */
6859static const xmlChar *
6860xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6861 int i;
6862
Daniel Veillarde57ec792003-09-10 10:50:59 +00006863 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006864 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006865 if (ctxt->nsTab[i] == prefix) {
6866 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6867 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006868 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006869 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006870 return(NULL);
6871}
6872
6873/**
6874 * xmlParseNCName:
6875 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006876 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006877 *
6878 * parse an XML name.
6879 *
6880 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6881 * CombiningChar | Extender
6882 *
6883 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6884 *
6885 * Returns the Name parsed or NULL
6886 */
6887
6888static const xmlChar *
6889xmlParseNCName(xmlParserCtxtPtr ctxt) {
6890 const xmlChar *in;
6891 const xmlChar *ret;
6892 int count = 0;
6893
6894 /*
6895 * Accelerator for simple ASCII names
6896 */
6897 in = ctxt->input->cur;
6898 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6899 ((*in >= 0x41) && (*in <= 0x5A)) ||
6900 (*in == '_')) {
6901 in++;
6902 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6903 ((*in >= 0x41) && (*in <= 0x5A)) ||
6904 ((*in >= 0x30) && (*in <= 0x39)) ||
6905 (*in == '_') || (*in == '-') ||
6906 (*in == '.'))
6907 in++;
6908 if ((*in > 0) && (*in < 0x80)) {
6909 count = in - ctxt->input->cur;
6910 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6911 ctxt->input->cur = in;
6912 ctxt->nbChars += count;
6913 ctxt->input->col += count;
6914 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006915 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006916 }
6917 return(ret);
6918 }
6919 }
6920 return(xmlParseNCNameComplex(ctxt));
6921}
6922
6923/**
6924 * xmlParseQName:
6925 * @ctxt: an XML parser context
6926 * @prefix: pointer to store the prefix part
6927 *
6928 * parse an XML Namespace QName
6929 *
6930 * [6] QName ::= (Prefix ':')? LocalPart
6931 * [7] Prefix ::= NCName
6932 * [8] LocalPart ::= NCName
6933 *
6934 * Returns the Name parsed or NULL
6935 */
6936
6937static const xmlChar *
6938xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6939 const xmlChar *l, *p;
6940
6941 GROW;
6942
6943 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006944 if (l == NULL) {
6945 if (CUR == ':') {
6946 l = xmlParseName(ctxt);
6947 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006948 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6949 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006950 *prefix = NULL;
6951 return(l);
6952 }
6953 }
6954 return(NULL);
6955 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006956 if (CUR == ':') {
6957 NEXT;
6958 p = l;
6959 l = xmlParseNCName(ctxt);
6960 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006961 xmlChar *tmp;
6962
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006963 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6964 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006965 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6966 p = xmlDictLookup(ctxt->dict, tmp, -1);
6967 if (tmp != NULL) xmlFree(tmp);
6968 *prefix = NULL;
6969 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006970 }
6971 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006972 xmlChar *tmp;
6973
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006974 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6975 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006976 NEXT;
6977 tmp = (xmlChar *) xmlParseName(ctxt);
6978 if (tmp != NULL) {
6979 tmp = xmlBuildQName(tmp, l, NULL, 0);
6980 l = xmlDictLookup(ctxt->dict, tmp, -1);
6981 if (tmp != NULL) xmlFree(tmp);
6982 *prefix = p;
6983 return(l);
6984 }
6985 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6986 l = xmlDictLookup(ctxt->dict, tmp, -1);
6987 if (tmp != NULL) xmlFree(tmp);
6988 *prefix = p;
6989 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006990 }
6991 *prefix = p;
6992 } else
6993 *prefix = NULL;
6994 return(l);
6995}
6996
6997/**
6998 * xmlParseQNameAndCompare:
6999 * @ctxt: an XML parser context
7000 * @name: the localname
7001 * @prefix: the prefix, if any.
7002 *
7003 * parse an XML name and compares for match
7004 * (specialized for endtag parsing)
7005 *
7006 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7007 * and the name for mismatch
7008 */
7009
7010static const xmlChar *
7011xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7012 xmlChar const *prefix) {
7013 const xmlChar *cmp = name;
7014 const xmlChar *in;
7015 const xmlChar *ret;
7016 const xmlChar *prefix2;
7017
7018 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7019
7020 GROW;
7021 in = ctxt->input->cur;
7022
7023 cmp = prefix;
7024 while (*in != 0 && *in == *cmp) {
7025 ++in;
7026 ++cmp;
7027 }
7028 if ((*cmp == 0) && (*in == ':')) {
7029 in++;
7030 cmp = name;
7031 while (*in != 0 && *in == *cmp) {
7032 ++in;
7033 ++cmp;
7034 }
William M. Brack76e95df2003-10-18 16:20:14 +00007035 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007036 /* success */
7037 ctxt->input->cur = in;
7038 return((const xmlChar*) 1);
7039 }
7040 }
7041 /*
7042 * all strings coms from the dictionary, equality can be done directly
7043 */
7044 ret = xmlParseQName (ctxt, &prefix2);
7045 if ((ret == name) && (prefix == prefix2))
7046 return((const xmlChar*) 1);
7047 return ret;
7048}
7049
7050/**
7051 * xmlParseAttValueInternal:
7052 * @ctxt: an XML parser context
7053 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007054 * @alloc: whether the attribute was reallocated as a new string
7055 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007056 *
7057 * parse a value for an attribute.
7058 * NOTE: if no normalization is needed, the routine will return pointers
7059 * directly from the data buffer.
7060 *
7061 * 3.3.3 Attribute-Value Normalization:
7062 * Before the value of an attribute is passed to the application or
7063 * checked for validity, the XML processor must normalize it as follows:
7064 * - a character reference is processed by appending the referenced
7065 * character to the attribute value
7066 * - an entity reference is processed by recursively processing the
7067 * replacement text of the entity
7068 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7069 * appending #x20 to the normalized value, except that only a single
7070 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7071 * parsed entity or the literal entity value of an internal parsed entity
7072 * - other characters are processed by appending them to the normalized value
7073 * If the declared value is not CDATA, then the XML processor must further
7074 * process the normalized attribute value by discarding any leading and
7075 * trailing space (#x20) characters, and by replacing sequences of space
7076 * (#x20) characters by a single space (#x20) character.
7077 * All attributes for which no declaration has been read should be treated
7078 * by a non-validating parser as if declared CDATA.
7079 *
7080 * Returns the AttValue parsed or NULL. The value has to be freed by the
7081 * caller if it was copied, this can be detected by val[*len] == 0.
7082 */
7083
7084static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007085xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7086 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007087{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007088 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007089 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007090 xmlChar *ret = NULL;
7091
7092 GROW;
7093 in = (xmlChar *) CUR_PTR;
7094 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007095 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007096 return (NULL);
7097 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007099
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007100 /*
7101 * try to handle in this routine the most common case where no
7102 * allocation of a new string is required and where content is
7103 * pure ASCII.
7104 */
7105 limit = *in++;
7106 end = ctxt->input->end;
7107 start = in;
7108 if (in >= end) {
7109 const xmlChar *oldbase = ctxt->input->base;
7110 GROW;
7111 if (oldbase != ctxt->input->base) {
7112 long delta = ctxt->input->base - oldbase;
7113 start = start + delta;
7114 in = in + delta;
7115 }
7116 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007118 if (normalize) {
7119 /*
7120 * Skip any leading spaces
7121 */
7122 while ((in < end) && (*in != limit) &&
7123 ((*in == 0x20) || (*in == 0x9) ||
7124 (*in == 0xA) || (*in == 0xD))) {
7125 in++;
7126 start = in;
7127 if (in >= end) {
7128 const xmlChar *oldbase = ctxt->input->base;
7129 GROW;
7130 if (oldbase != ctxt->input->base) {
7131 long delta = ctxt->input->base - oldbase;
7132 start = start + delta;
7133 in = in + delta;
7134 }
7135 end = ctxt->input->end;
7136 }
7137 }
7138 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7139 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7140 if ((*in++ == 0x20) && (*in == 0x20)) break;
7141 if (in >= end) {
7142 const xmlChar *oldbase = ctxt->input->base;
7143 GROW;
7144 if (oldbase != ctxt->input->base) {
7145 long delta = ctxt->input->base - oldbase;
7146 start = start + delta;
7147 in = in + delta;
7148 }
7149 end = ctxt->input->end;
7150 }
7151 }
7152 last = in;
7153 /*
7154 * skip the trailing blanks
7155 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007156 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007157 while ((in < end) && (*in != limit) &&
7158 ((*in == 0x20) || (*in == 0x9) ||
7159 (*in == 0xA) || (*in == 0xD))) {
7160 in++;
7161 if (in >= end) {
7162 const xmlChar *oldbase = ctxt->input->base;
7163 GROW;
7164 if (oldbase != ctxt->input->base) {
7165 long delta = ctxt->input->base - oldbase;
7166 start = start + delta;
7167 in = in + delta;
7168 last = last + delta;
7169 }
7170 end = ctxt->input->end;
7171 }
7172 }
7173 if (*in != limit) goto need_complex;
7174 } else {
7175 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7176 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7177 in++;
7178 if (in >= end) {
7179 const xmlChar *oldbase = ctxt->input->base;
7180 GROW;
7181 if (oldbase != ctxt->input->base) {
7182 long delta = ctxt->input->base - oldbase;
7183 start = start + delta;
7184 in = in + delta;
7185 }
7186 end = ctxt->input->end;
7187 }
7188 }
7189 last = in;
7190 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007191 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007192 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007193 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007194 *len = last - start;
7195 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007196 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007197 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007198 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007199 }
7200 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007201 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007202 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007203need_complex:
7204 if (alloc) *alloc = 1;
7205 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007206}
7207
7208/**
7209 * xmlParseAttribute2:
7210 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007211 * @pref: the element prefix
7212 * @elem: the element name
7213 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007214 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007215 * @len: an int * to save the length of the attribute
7216 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007217 *
7218 * parse an attribute in the new SAX2 framework.
7219 *
7220 * Returns the attribute name, and the value in *value, .
7221 */
7222
7223static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007224xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7225 const xmlChar *pref, const xmlChar *elem,
7226 const xmlChar **prefix, xmlChar **value,
7227 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007228 const xmlChar *name;
7229 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007230 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007231
7232 *value = NULL;
7233 GROW;
7234 name = xmlParseQName(ctxt, prefix);
7235 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007236 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7237 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007238 return(NULL);
7239 }
7240
7241 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007242 * get the type if needed
7243 */
7244 if (ctxt->attsSpecial != NULL) {
7245 int type;
7246
7247 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7248 pref, elem, *prefix, name);
7249 if (type != 0) normalize = 1;
7250 }
7251
7252 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007253 * read the value
7254 */
7255 SKIP_BLANKS;
7256 if (RAW == '=') {
7257 NEXT;
7258 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007259 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007260 ctxt->instate = XML_PARSER_CONTENT;
7261 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007262 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007263 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007264 return(NULL);
7265 }
7266
7267 /*
7268 * Check that xml:lang conforms to the specification
7269 * No more registered as an error, just generate a warning now
7270 * since this was deprecated in XML second edition
7271 */
7272 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7273 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007274 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7275 "Malformed value for xml:lang : %s\n",
7276 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007277 }
7278 }
7279
7280 /*
7281 * Check that xml:space conforms to the specification
7282 */
7283 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7284 if (xmlStrEqual(val, BAD_CAST "default"))
7285 *(ctxt->space) = 0;
7286 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7287 *(ctxt->space) = 1;
7288 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007289 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007290"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7291 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007292 }
7293 }
7294
7295 *value = val;
7296 return(name);
7297}
7298
7299/**
7300 * xmlParseStartTag2:
7301 * @ctxt: an XML parser context
7302 *
7303 * parse a start of tag either for rule element or
7304 * EmptyElement. In both case we don't parse the tag closing chars.
7305 * This routine is called when running SAX2 parsing
7306 *
7307 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7308 *
7309 * [ WFC: Unique Att Spec ]
7310 * No attribute name may appear more than once in the same start-tag or
7311 * empty-element tag.
7312 *
7313 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7314 *
7315 * [ WFC: Unique Att Spec ]
7316 * No attribute name may appear more than once in the same start-tag or
7317 * empty-element tag.
7318 *
7319 * With namespace:
7320 *
7321 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7322 *
7323 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7324 *
7325 * Returns the element name parsed
7326 */
7327
7328static const xmlChar *
7329xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007330 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331 const xmlChar *localname;
7332 const xmlChar *prefix;
7333 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007334 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007335 const xmlChar *nsname;
7336 xmlChar *attvalue;
7337 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007338 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007339 int nratts, nbatts, nbdef;
7340 int i, j, nbNs, attval;
7341 const xmlChar *base;
7342 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007343
7344 if (RAW != '<') return(NULL);
7345 NEXT1;
7346
7347 /*
7348 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7349 * point since the attribute values may be stored as pointers to
7350 * the buffer and calling SHRINK would destroy them !
7351 * The Shrinking is only possible once the full set of attribute
7352 * callbacks have been done.
7353 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007354reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007355 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007356 base = ctxt->input->base;
7357 cur = ctxt->input->cur - ctxt->input->base;
7358 nbatts = 0;
7359 nratts = 0;
7360 nbdef = 0;
7361 nbNs = 0;
7362 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007363
7364 localname = xmlParseQName(ctxt, &prefix);
7365 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7367 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 return(NULL);
7369 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007370 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371
7372 /*
7373 * Now parse the attributes, it ends up with the ending
7374 *
7375 * (S Attribute)* S?
7376 */
7377 SKIP_BLANKS;
7378 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007379 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380
7381 while ((RAW != '>') &&
7382 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007383 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007384 const xmlChar *q = CUR_PTR;
7385 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007388 attname = xmlParseAttribute2(ctxt, prefix, localname,
7389 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390 if ((attname != NULL) && (attvalue != NULL)) {
7391 if (len < 0) len = xmlStrlen(attvalue);
7392 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007393 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7394 xmlURIPtr uri;
7395
7396 if (*URL != 0) {
7397 uri = xmlParseURI((const char *) URL);
7398 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007399 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7400 "xmlns: %s not a valid URI\n",
7401 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007402 } else {
7403 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007404 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7405 "xmlns: URI %s is not absolute\n",
7406 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007407 }
7408 xmlFreeURI(uri);
7409 }
7410 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007411 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007412 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007413 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007414 for (j = 1;j <= nbNs;j++)
7415 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7416 break;
7417 if (j <= nbNs)
7418 xmlErrAttributeDup(ctxt, NULL, attname);
7419 else
7420 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007421 if (alloc != 0) xmlFree(attvalue);
7422 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007423 continue;
7424 }
7425 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007426 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7427 xmlURIPtr uri;
7428
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007429 if (attname == ctxt->str_xml) {
7430 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007431 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7432 "xml namespace prefix mapped to wrong URI\n",
7433 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007434 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007435 /*
7436 * Do not keep a namespace definition node
7437 */
7438 if (alloc != 0) xmlFree(attvalue);
7439 SKIP_BLANKS;
7440 continue;
7441 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007442 uri = xmlParseURI((const char *) URL);
7443 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007444 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7445 "xmlns:%s: '%s' is not a valid URI\n",
7446 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007447 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007448 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007449 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7450 "xmlns:%s: URI %s is not absolute\n",
7451 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007452 }
7453 xmlFreeURI(uri);
7454 }
7455
Daniel Veillard0fb18932003-09-07 09:14:37 +00007456 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007457 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007458 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007459 for (j = 1;j <= nbNs;j++)
7460 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7461 break;
7462 if (j <= nbNs)
7463 xmlErrAttributeDup(ctxt, aprefix, attname);
7464 else
7465 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007466 if (alloc != 0) xmlFree(attvalue);
7467 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007468 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007469 continue;
7470 }
7471
7472 /*
7473 * Add the pair to atts
7474 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007475 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7476 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477 if (attvalue[len] == 0)
7478 xmlFree(attvalue);
7479 goto failed;
7480 }
7481 maxatts = ctxt->maxatts;
7482 atts = ctxt->atts;
7483 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007484 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007485 atts[nbatts++] = attname;
7486 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007487 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 atts[nbatts++] = attvalue;
7489 attvalue += len;
7490 atts[nbatts++] = attvalue;
7491 /*
7492 * tag if some deallocation is needed
7493 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007494 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007495 } else {
7496 if ((attvalue != NULL) && (attvalue[len] == 0))
7497 xmlFree(attvalue);
7498 }
7499
7500failed:
7501
7502 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007503 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7505 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007506 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007507 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7508 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007509 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007510 }
7511 SKIP_BLANKS;
7512 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7513 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007515 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 break;
7517 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007519 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007520 }
7521
Daniel Veillard0fb18932003-09-07 09:14:37 +00007522 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007523 * The attributes defaulting
7524 */
7525 if (ctxt->attsDefault != NULL) {
7526 xmlDefAttrsPtr defaults;
7527
7528 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7529 if (defaults != NULL) {
7530 for (i = 0;i < defaults->nbAttrs;i++) {
7531 attname = defaults->values[4 * i];
7532 aprefix = defaults->values[4 * i + 1];
7533
7534 /*
7535 * special work for namespaces defaulted defs
7536 */
7537 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7538 /*
7539 * check that it's not a defined namespace
7540 */
7541 for (j = 1;j <= nbNs;j++)
7542 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7543 break;
7544 if (j <= nbNs) continue;
7545
7546 nsname = xmlGetNamespace(ctxt, NULL);
7547 if (nsname != defaults->values[4 * i + 2]) {
7548 if (nsPush(ctxt, NULL,
7549 defaults->values[4 * i + 2]) > 0)
7550 nbNs++;
7551 }
7552 } else if (aprefix == ctxt->str_xmlns) {
7553 /*
7554 * check that it's not a defined namespace
7555 */
7556 for (j = 1;j <= nbNs;j++)
7557 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7558 break;
7559 if (j <= nbNs) continue;
7560
7561 nsname = xmlGetNamespace(ctxt, attname);
7562 if (nsname != defaults->values[2]) {
7563 if (nsPush(ctxt, attname,
7564 defaults->values[4 * i + 2]) > 0)
7565 nbNs++;
7566 }
7567 } else {
7568 /*
7569 * check that it's not a defined attribute
7570 */
7571 for (j = 0;j < nbatts;j+=5) {
7572 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7573 break;
7574 }
7575 if (j < nbatts) continue;
7576
7577 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7578 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007579 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007580 }
7581 maxatts = ctxt->maxatts;
7582 atts = ctxt->atts;
7583 }
7584 atts[nbatts++] = attname;
7585 atts[nbatts++] = aprefix;
7586 if (aprefix == NULL)
7587 atts[nbatts++] = NULL;
7588 else
7589 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7590 atts[nbatts++] = defaults->values[4 * i + 2];
7591 atts[nbatts++] = defaults->values[4 * i + 3];
7592 nbdef++;
7593 }
7594 }
7595 }
7596 }
7597
Daniel Veillarde70c8772003-11-25 07:21:18 +00007598 /*
7599 * The attributes checkings
7600 */
7601 for (i = 0; i < nbatts;i += 5) {
7602 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7603 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7604 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7605 "Namespace prefix %s for %s on %s is not defined\n",
7606 atts[i + 1], atts[i], localname);
7607 }
7608 atts[i + 2] = nsname;
7609 /*
7610 * [ WFC: Unique Att Spec ]
7611 * No attribute name may appear more than once in the same
7612 * start-tag or empty-element tag.
7613 * As extended by the Namespace in XML REC.
7614 */
7615 for (j = 0; j < i;j += 5) {
7616 if (atts[i] == atts[j]) {
7617 if (atts[i+1] == atts[j+1]) {
7618 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7619 break;
7620 }
7621 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7622 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7623 "Namespaced Attribute %s in '%s' redefined\n",
7624 atts[i], nsname, NULL);
7625 break;
7626 }
7627 }
7628 }
7629 }
7630
Daniel Veillarde57ec792003-09-10 10:50:59 +00007631 nsname = xmlGetNamespace(ctxt, prefix);
7632 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007633 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7634 "Namespace prefix %s on %s is not defined\n",
7635 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007636 }
7637 *pref = prefix;
7638 *URI = nsname;
7639
7640 /*
7641 * SAX: Start of Element !
7642 */
7643 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7644 (!ctxt->disableSAX)) {
7645 if (nbNs > 0)
7646 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7647 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7648 nbatts / 5, nbdef, atts);
7649 else
7650 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7651 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7652 }
7653
7654 /*
7655 * Free up attribute allocated strings if needed
7656 */
7657 if (attval != 0) {
7658 for (i = 3,j = 0; j < nratts;i += 5,j++)
7659 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7660 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 }
7662
7663 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007664
7665base_changed:
7666 /*
7667 * the attribute strings are valid iif the base didn't changed
7668 */
7669 if (attval != 0) {
7670 for (i = 3,j = 0; j < nratts;i += 5,j++)
7671 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7672 xmlFree((xmlChar *) atts[i]);
7673 }
7674 ctxt->input->cur = ctxt->input->base + cur;
7675 if (ctxt->wellFormed == 1) {
7676 goto reparse;
7677 }
7678 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679}
7680
7681/**
7682 * xmlParseEndTag2:
7683 * @ctxt: an XML parser context
7684 * @line: line of the start tag
7685 * @nsNr: number of namespaces on the start tag
7686 *
7687 * parse an end of tag
7688 *
7689 * [42] ETag ::= '</' Name S? '>'
7690 *
7691 * With namespace
7692 *
7693 * [NS 9] ETag ::= '</' QName S? '>'
7694 */
7695
7696static void
7697xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007698 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007699 const xmlChar *name;
7700
7701 GROW;
7702 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007703 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 return;
7705 }
7706 SKIP(2);
7707
William M. Brack13dfa872004-09-18 04:52:08 +00007708 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007709 if (ctxt->input->cur[tlen] == '>') {
7710 ctxt->input->cur += tlen + 1;
7711 goto done;
7712 }
7713 ctxt->input->cur += tlen;
7714 name = (xmlChar*)1;
7715 } else {
7716 if (prefix == NULL)
7717 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7718 else
7719 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7720 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721
7722 /*
7723 * We should definitely be at the ending "S? '>'" part
7724 */
7725 GROW;
7726 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007727 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007728 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729 } else
7730 NEXT1;
7731
7732 /*
7733 * [ WFC: Element Type Match ]
7734 * The Name in an element's end-tag must match the element type in the
7735 * start-tag.
7736 *
7737 */
7738 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007739 if (name == NULL) name = BAD_CAST "unparseable";
7740 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007742 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743 }
7744
7745 /*
7746 * SAX: End of Tag
7747 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007748done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7750 (!ctxt->disableSAX))
7751 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7752
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 spacePop(ctxt);
7754 if (nsNr != 0)
7755 nsPop(ctxt, nsNr);
7756 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007757}
7758
7759/**
Owen Taylor3473f882001-02-23 17:55:21 +00007760 * xmlParseCDSect:
7761 * @ctxt: an XML parser context
7762 *
7763 * Parse escaped pure raw content.
7764 *
7765 * [18] CDSect ::= CDStart CData CDEnd
7766 *
7767 * [19] CDStart ::= '<![CDATA['
7768 *
7769 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7770 *
7771 * [21] CDEnd ::= ']]>'
7772 */
7773void
7774xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7775 xmlChar *buf = NULL;
7776 int len = 0;
7777 int size = XML_PARSER_BUFFER_SIZE;
7778 int r, rl;
7779 int s, sl;
7780 int cur, l;
7781 int count = 0;
7782
Daniel Veillard8f597c32003-10-06 08:19:27 +00007783 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007784 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007785 SKIP(9);
7786 } else
7787 return;
7788
7789 ctxt->instate = XML_PARSER_CDATA_SECTION;
7790 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007791 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007792 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007793 ctxt->instate = XML_PARSER_CONTENT;
7794 return;
7795 }
7796 NEXTL(rl);
7797 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007798 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007799 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007800 ctxt->instate = XML_PARSER_CONTENT;
7801 return;
7802 }
7803 NEXTL(sl);
7804 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007806 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007807 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007808 return;
7809 }
William M. Brack871611b2003-10-18 04:53:14 +00007810 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007811 ((r != ']') || (s != ']') || (cur != '>'))) {
7812 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007813 xmlChar *tmp;
7814
Owen Taylor3473f882001-02-23 17:55:21 +00007815 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007816 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7817 if (tmp == NULL) {
7818 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007819 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007820 return;
7821 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007822 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007823 }
7824 COPY_BUF(rl,buf,len,r);
7825 r = s;
7826 rl = sl;
7827 s = cur;
7828 sl = l;
7829 count++;
7830 if (count > 50) {
7831 GROW;
7832 count = 0;
7833 }
7834 NEXTL(l);
7835 cur = CUR_CHAR(l);
7836 }
7837 buf[len] = 0;
7838 ctxt->instate = XML_PARSER_CONTENT;
7839 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007840 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007841 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007842 xmlFree(buf);
7843 return;
7844 }
7845 NEXTL(l);
7846
7847 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007848 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007849 */
7850 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7851 if (ctxt->sax->cdataBlock != NULL)
7852 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007853 else if (ctxt->sax->characters != NULL)
7854 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007855 }
7856 xmlFree(buf);
7857}
7858
7859/**
7860 * xmlParseContent:
7861 * @ctxt: an XML parser context
7862 *
7863 * Parse a content:
7864 *
7865 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7866 */
7867
7868void
7869xmlParseContent(xmlParserCtxtPtr ctxt) {
7870 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007871 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007872 ((RAW != '<') || (NXT(1) != '/'))) {
7873 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007874 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007875 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007876
7877 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007878 * First case : a Processing Instruction.
7879 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007880 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007881 xmlParsePI(ctxt);
7882 }
7883
7884 /*
7885 * Second case : a CDSection
7886 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007887 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007888 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007889 xmlParseCDSect(ctxt);
7890 }
7891
7892 /*
7893 * Third case : a comment
7894 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007895 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007896 (NXT(2) == '-') && (NXT(3) == '-')) {
7897 xmlParseComment(ctxt);
7898 ctxt->instate = XML_PARSER_CONTENT;
7899 }
7900
7901 /*
7902 * Fourth case : a sub-element.
7903 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007904 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007905 xmlParseElement(ctxt);
7906 }
7907
7908 /*
7909 * Fifth case : a reference. If if has not been resolved,
7910 * parsing returns it's Name, create the node
7911 */
7912
Daniel Veillard21a0f912001-02-25 19:54:14 +00007913 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007914 xmlParseReference(ctxt);
7915 }
7916
7917 /*
7918 * Last case, text. Note that References are handled directly.
7919 */
7920 else {
7921 xmlParseCharData(ctxt, 0);
7922 }
7923
7924 GROW;
7925 /*
7926 * Pop-up of finished entities.
7927 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007928 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007929 xmlPopInput(ctxt);
7930 SHRINK;
7931
Daniel Veillardfdc91562002-07-01 21:52:03 +00007932 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007933 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7934 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007935 ctxt->instate = XML_PARSER_EOF;
7936 break;
7937 }
7938 }
7939}
7940
7941/**
7942 * xmlParseElement:
7943 * @ctxt: an XML parser context
7944 *
7945 * parse an XML element, this is highly recursive
7946 *
7947 * [39] element ::= EmptyElemTag | STag content ETag
7948 *
7949 * [ WFC: Element Type Match ]
7950 * The Name in an element's end-tag must match the element type in the
7951 * start-tag.
7952 *
Owen Taylor3473f882001-02-23 17:55:21 +00007953 */
7954
7955void
7956xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007957 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958 const xmlChar *prefix;
7959 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007960 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007961 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007962 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007963 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007964
7965 /* Capture start position */
7966 if (ctxt->record_info) {
7967 node_info.begin_pos = ctxt->input->consumed +
7968 (CUR_PTR - ctxt->input->base);
7969 node_info.begin_line = ctxt->input->line;
7970 }
7971
7972 if (ctxt->spaceNr == 0)
7973 spacePush(ctxt, -1);
7974 else
7975 spacePush(ctxt, *ctxt->space);
7976
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007977 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007978#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007979 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007980#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007981 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007982#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 else
7984 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007985#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007986 if (name == NULL) {
7987 spacePop(ctxt);
7988 return;
7989 }
7990 namePush(ctxt, name);
7991 ret = ctxt->node;
7992
Daniel Veillard4432df22003-09-28 18:58:27 +00007993#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007994 /*
7995 * [ VC: Root Element Type ]
7996 * The Name in the document type declaration must match the element
7997 * type of the root element.
7998 */
7999 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8000 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8001 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008002#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008003
8004 /*
8005 * Check for an Empty Element.
8006 */
8007 if ((RAW == '/') && (NXT(1) == '>')) {
8008 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008009 if (ctxt->sax2) {
8010 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8011 (!ctxt->disableSAX))
8012 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008013#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008014 } else {
8015 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8016 (!ctxt->disableSAX))
8017 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008018#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008019 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008020 namePop(ctxt);
8021 spacePop(ctxt);
8022 if (nsNr != ctxt->nsNr)
8023 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008024 if ( ret != NULL && ctxt->record_info ) {
8025 node_info.end_pos = ctxt->input->consumed +
8026 (CUR_PTR - ctxt->input->base);
8027 node_info.end_line = ctxt->input->line;
8028 node_info.node = ret;
8029 xmlParserAddNodeInfo(ctxt, &node_info);
8030 }
8031 return;
8032 }
8033 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008034 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008035 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008036 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8037 "Couldn't find end of Start Tag %s line %d\n",
8038 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008039
8040 /*
8041 * end of parsing of this node.
8042 */
8043 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008045 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008046 if (nsNr != ctxt->nsNr)
8047 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008048
8049 /*
8050 * Capture end position and add node
8051 */
8052 if ( ret != NULL && ctxt->record_info ) {
8053 node_info.end_pos = ctxt->input->consumed +
8054 (CUR_PTR - ctxt->input->base);
8055 node_info.end_line = ctxt->input->line;
8056 node_info.node = ret;
8057 xmlParserAddNodeInfo(ctxt, &node_info);
8058 }
8059 return;
8060 }
8061
8062 /*
8063 * Parse the content of the element:
8064 */
8065 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008066 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008067 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008068 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008069 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008070
8071 /*
8072 * end of parsing of this node.
8073 */
8074 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008075 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008076 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008077 if (nsNr != ctxt->nsNr)
8078 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 return;
8080 }
8081
8082 /*
8083 * parse the end of tag: '</' should be here.
8084 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008085 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008086 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008087 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008088 }
8089#ifdef LIBXML_SAX1_ENABLED
8090 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008091 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008092#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008093
8094 /*
8095 * Capture end position and add node
8096 */
8097 if ( ret != NULL && ctxt->record_info ) {
8098 node_info.end_pos = ctxt->input->consumed +
8099 (CUR_PTR - ctxt->input->base);
8100 node_info.end_line = ctxt->input->line;
8101 node_info.node = ret;
8102 xmlParserAddNodeInfo(ctxt, &node_info);
8103 }
8104}
8105
8106/**
8107 * xmlParseVersionNum:
8108 * @ctxt: an XML parser context
8109 *
8110 * parse the XML version value.
8111 *
8112 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8113 *
8114 * Returns the string giving the XML version number, or NULL
8115 */
8116xmlChar *
8117xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8118 xmlChar *buf = NULL;
8119 int len = 0;
8120 int size = 10;
8121 xmlChar cur;
8122
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008123 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008126 return(NULL);
8127 }
8128 cur = CUR;
8129 while (((cur >= 'a') && (cur <= 'z')) ||
8130 ((cur >= 'A') && (cur <= 'Z')) ||
8131 ((cur >= '0') && (cur <= '9')) ||
8132 (cur == '_') || (cur == '.') ||
8133 (cur == ':') || (cur == '-')) {
8134 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008135 xmlChar *tmp;
8136
Owen Taylor3473f882001-02-23 17:55:21 +00008137 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008138 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8139 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008140 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008141 return(NULL);
8142 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008143 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008144 }
8145 buf[len++] = cur;
8146 NEXT;
8147 cur=CUR;
8148 }
8149 buf[len] = 0;
8150 return(buf);
8151}
8152
8153/**
8154 * xmlParseVersionInfo:
8155 * @ctxt: an XML parser context
8156 *
8157 * parse the XML version.
8158 *
8159 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8160 *
8161 * [25] Eq ::= S? '=' S?
8162 *
8163 * Returns the version string, e.g. "1.0"
8164 */
8165
8166xmlChar *
8167xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8168 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008169
Daniel Veillarda07050d2003-10-19 14:46:32 +00008170 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008171 SKIP(7);
8172 SKIP_BLANKS;
8173 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008174 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008175 return(NULL);
8176 }
8177 NEXT;
8178 SKIP_BLANKS;
8179 if (RAW == '"') {
8180 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008181 version = xmlParseVersionNum(ctxt);
8182 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008183 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008184 } else
8185 NEXT;
8186 } else if (RAW == '\''){
8187 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008188 version = xmlParseVersionNum(ctxt);
8189 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008190 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008191 } else
8192 NEXT;
8193 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008195 }
8196 }
8197 return(version);
8198}
8199
8200/**
8201 * xmlParseEncName:
8202 * @ctxt: an XML parser context
8203 *
8204 * parse the XML encoding name
8205 *
8206 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8207 *
8208 * Returns the encoding name value or NULL
8209 */
8210xmlChar *
8211xmlParseEncName(xmlParserCtxtPtr ctxt) {
8212 xmlChar *buf = NULL;
8213 int len = 0;
8214 int size = 10;
8215 xmlChar cur;
8216
8217 cur = CUR;
8218 if (((cur >= 'a') && (cur <= 'z')) ||
8219 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008220 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008221 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008222 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008223 return(NULL);
8224 }
8225
8226 buf[len++] = cur;
8227 NEXT;
8228 cur = CUR;
8229 while (((cur >= 'a') && (cur <= 'z')) ||
8230 ((cur >= 'A') && (cur <= 'Z')) ||
8231 ((cur >= '0') && (cur <= '9')) ||
8232 (cur == '.') || (cur == '_') ||
8233 (cur == '-')) {
8234 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008235 xmlChar *tmp;
8236
Owen Taylor3473f882001-02-23 17:55:21 +00008237 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008238 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8239 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008240 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008241 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008242 return(NULL);
8243 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008244 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008245 }
8246 buf[len++] = cur;
8247 NEXT;
8248 cur = CUR;
8249 if (cur == 0) {
8250 SHRINK;
8251 GROW;
8252 cur = CUR;
8253 }
8254 }
8255 buf[len] = 0;
8256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008257 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008258 }
8259 return(buf);
8260}
8261
8262/**
8263 * xmlParseEncodingDecl:
8264 * @ctxt: an XML parser context
8265 *
8266 * parse the XML encoding declaration
8267 *
8268 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8269 *
8270 * this setups the conversion filters.
8271 *
8272 * Returns the encoding value or NULL
8273 */
8274
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008275const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008276xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8277 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008278
8279 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008280 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008281 SKIP(8);
8282 SKIP_BLANKS;
8283 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008284 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008285 return(NULL);
8286 }
8287 NEXT;
8288 SKIP_BLANKS;
8289 if (RAW == '"') {
8290 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008291 encoding = xmlParseEncName(ctxt);
8292 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008293 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008294 } else
8295 NEXT;
8296 } else if (RAW == '\''){
8297 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008298 encoding = xmlParseEncName(ctxt);
8299 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008301 } else
8302 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008303 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008304 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008305 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008306 /*
8307 * UTF-16 encoding stwich has already taken place at this stage,
8308 * more over the little-endian/big-endian selection is already done
8309 */
8310 if ((encoding != NULL) &&
8311 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8312 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008313 if (ctxt->encoding != NULL)
8314 xmlFree((xmlChar *) ctxt->encoding);
8315 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008316 }
8317 /*
8318 * UTF-8 encoding is handled natively
8319 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008320 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008321 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8322 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008323 if (ctxt->encoding != NULL)
8324 xmlFree((xmlChar *) ctxt->encoding);
8325 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008326 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008327 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008328 xmlCharEncodingHandlerPtr handler;
8329
8330 if (ctxt->input->encoding != NULL)
8331 xmlFree((xmlChar *) ctxt->input->encoding);
8332 ctxt->input->encoding = encoding;
8333
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008334 handler = xmlFindCharEncodingHandler((const char *) encoding);
8335 if (handler != NULL) {
8336 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008337 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008338 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008339 "Unsupported encoding %s\n", encoding);
8340 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008341 }
8342 }
8343 }
8344 return(encoding);
8345}
8346
8347/**
8348 * xmlParseSDDecl:
8349 * @ctxt: an XML parser context
8350 *
8351 * parse the XML standalone declaration
8352 *
8353 * [32] SDDecl ::= S 'standalone' Eq
8354 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8355 *
8356 * [ VC: Standalone Document Declaration ]
8357 * TODO The standalone document declaration must have the value "no"
8358 * if any external markup declarations contain declarations of:
8359 * - attributes with default values, if elements to which these
8360 * attributes apply appear in the document without specifications
8361 * of values for these attributes, or
8362 * - entities (other than amp, lt, gt, apos, quot), if references
8363 * to those entities appear in the document, or
8364 * - attributes with values subject to normalization, where the
8365 * attribute appears in the document with a value which will change
8366 * as a result of normalization, or
8367 * - element types with element content, if white space occurs directly
8368 * within any instance of those types.
8369 *
8370 * Returns 1 if standalone, 0 otherwise
8371 */
8372
8373int
8374xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8375 int standalone = -1;
8376
8377 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008378 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008379 SKIP(10);
8380 SKIP_BLANKS;
8381 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008382 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008383 return(standalone);
8384 }
8385 NEXT;
8386 SKIP_BLANKS;
8387 if (RAW == '\''){
8388 NEXT;
8389 if ((RAW == 'n') && (NXT(1) == 'o')) {
8390 standalone = 0;
8391 SKIP(2);
8392 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8393 (NXT(2) == 's')) {
8394 standalone = 1;
8395 SKIP(3);
8396 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008397 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008398 }
8399 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 } else
8402 NEXT;
8403 } else if (RAW == '"'){
8404 NEXT;
8405 if ((RAW == 'n') && (NXT(1) == 'o')) {
8406 standalone = 0;
8407 SKIP(2);
8408 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8409 (NXT(2) == 's')) {
8410 standalone = 1;
8411 SKIP(3);
8412 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008413 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008414 }
8415 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 } else
8418 NEXT;
8419 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008420 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008421 }
8422 }
8423 return(standalone);
8424}
8425
8426/**
8427 * xmlParseXMLDecl:
8428 * @ctxt: an XML parser context
8429 *
8430 * parse an XML declaration header
8431 *
8432 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8433 */
8434
8435void
8436xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8437 xmlChar *version;
8438
8439 /*
8440 * We know that '<?xml' is here.
8441 */
8442 SKIP(5);
8443
William M. Brack76e95df2003-10-18 16:20:14 +00008444 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008445 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8446 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008447 }
8448 SKIP_BLANKS;
8449
8450 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008451 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008452 */
8453 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008454 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008455 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008456 } else {
8457 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8458 /*
8459 * TODO: Blueberry should be detected here
8460 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008461 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8462 "Unsupported version '%s'\n",
8463 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008464 }
8465 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008466 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008467 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008468 }
Owen Taylor3473f882001-02-23 17:55:21 +00008469
8470 /*
8471 * We may have the encoding declaration
8472 */
William M. Brack76e95df2003-10-18 16:20:14 +00008473 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008474 if ((RAW == '?') && (NXT(1) == '>')) {
8475 SKIP(2);
8476 return;
8477 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008478 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008479 }
8480 xmlParseEncodingDecl(ctxt);
8481 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8482 /*
8483 * The XML REC instructs us to stop parsing right here
8484 */
8485 return;
8486 }
8487
8488 /*
8489 * We may have the standalone status.
8490 */
William M. Brack76e95df2003-10-18 16:20:14 +00008491 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008492 if ((RAW == '?') && (NXT(1) == '>')) {
8493 SKIP(2);
8494 return;
8495 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008497 }
8498 SKIP_BLANKS;
8499 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8500
8501 SKIP_BLANKS;
8502 if ((RAW == '?') && (NXT(1) == '>')) {
8503 SKIP(2);
8504 } else if (RAW == '>') {
8505 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008507 NEXT;
8508 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008509 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008510 MOVETO_ENDTAG(CUR_PTR);
8511 NEXT;
8512 }
8513}
8514
8515/**
8516 * xmlParseMisc:
8517 * @ctxt: an XML parser context
8518 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008519 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008520 *
8521 * [27] Misc ::= Comment | PI | S
8522 */
8523
8524void
8525xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008526 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008527 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008528 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008529 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008530 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008531 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008532 NEXT;
8533 } else
8534 xmlParseComment(ctxt);
8535 }
8536}
8537
8538/**
8539 * xmlParseDocument:
8540 * @ctxt: an XML parser context
8541 *
8542 * parse an XML document (and build a tree if using the standard SAX
8543 * interface).
8544 *
8545 * [1] document ::= prolog element Misc*
8546 *
8547 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8548 *
8549 * Returns 0, -1 in case of error. the parser context is augmented
8550 * as a result of the parsing.
8551 */
8552
8553int
8554xmlParseDocument(xmlParserCtxtPtr ctxt) {
8555 xmlChar start[4];
8556 xmlCharEncoding enc;
8557
8558 xmlInitParser();
8559
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008560 if ((ctxt == NULL) || (ctxt->input == NULL))
8561 return(-1);
8562
Owen Taylor3473f882001-02-23 17:55:21 +00008563 GROW;
8564
8565 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 * SAX: detecting the level.
8567 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008568 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569
8570 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008571 * SAX: beginning of the document processing.
8572 */
8573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8574 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8575
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008576 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8577 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008578 /*
8579 * Get the 4 first bytes and decode the charset
8580 * if enc != XML_CHAR_ENCODING_NONE
8581 * plug some encoding conversion routines.
8582 */
8583 start[0] = RAW;
8584 start[1] = NXT(1);
8585 start[2] = NXT(2);
8586 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008587 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008588 if (enc != XML_CHAR_ENCODING_NONE) {
8589 xmlSwitchEncoding(ctxt, enc);
8590 }
Owen Taylor3473f882001-02-23 17:55:21 +00008591 }
8592
8593
8594 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008595 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008596 }
8597
8598 /*
8599 * Check for the XMLDecl in the Prolog.
8600 */
8601 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008602 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008603
8604 /*
8605 * Note that we will switch encoding on the fly.
8606 */
8607 xmlParseXMLDecl(ctxt);
8608 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8609 /*
8610 * The XML REC instructs us to stop parsing right here
8611 */
8612 return(-1);
8613 }
8614 ctxt->standalone = ctxt->input->standalone;
8615 SKIP_BLANKS;
8616 } else {
8617 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8618 }
8619 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8620 ctxt->sax->startDocument(ctxt->userData);
8621
8622 /*
8623 * The Misc part of the Prolog
8624 */
8625 GROW;
8626 xmlParseMisc(ctxt);
8627
8628 /*
8629 * Then possibly doc type declaration(s) and more Misc
8630 * (doctypedecl Misc*)?
8631 */
8632 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008633 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008634
8635 ctxt->inSubset = 1;
8636 xmlParseDocTypeDecl(ctxt);
8637 if (RAW == '[') {
8638 ctxt->instate = XML_PARSER_DTD;
8639 xmlParseInternalSubset(ctxt);
8640 }
8641
8642 /*
8643 * Create and update the external subset.
8644 */
8645 ctxt->inSubset = 2;
8646 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8647 (!ctxt->disableSAX))
8648 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8649 ctxt->extSubSystem, ctxt->extSubURI);
8650 ctxt->inSubset = 0;
8651
8652
8653 ctxt->instate = XML_PARSER_PROLOG;
8654 xmlParseMisc(ctxt);
8655 }
8656
8657 /*
8658 * Time to start parsing the tree itself
8659 */
8660 GROW;
8661 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008662 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8663 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008664 } else {
8665 ctxt->instate = XML_PARSER_CONTENT;
8666 xmlParseElement(ctxt);
8667 ctxt->instate = XML_PARSER_EPILOG;
8668
8669
8670 /*
8671 * The Misc part at the end
8672 */
8673 xmlParseMisc(ctxt);
8674
Daniel Veillard561b7f82002-03-20 21:55:57 +00008675 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008676 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008677 }
8678 ctxt->instate = XML_PARSER_EOF;
8679 }
8680
8681 /*
8682 * SAX: end of the document processing.
8683 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008684 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008685 ctxt->sax->endDocument(ctxt->userData);
8686
Daniel Veillard5997aca2002-03-18 18:36:20 +00008687 /*
8688 * Remove locally kept entity definitions if the tree was not built
8689 */
8690 if ((ctxt->myDoc != NULL) &&
8691 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8692 xmlFreeDoc(ctxt->myDoc);
8693 ctxt->myDoc = NULL;
8694 }
8695
Daniel Veillardc7612992002-02-17 22:47:37 +00008696 if (! ctxt->wellFormed) {
8697 ctxt->valid = 0;
8698 return(-1);
8699 }
Owen Taylor3473f882001-02-23 17:55:21 +00008700 return(0);
8701}
8702
8703/**
8704 * xmlParseExtParsedEnt:
8705 * @ctxt: an XML parser context
8706 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008707 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008708 * An external general parsed entity is well-formed if it matches the
8709 * production labeled extParsedEnt.
8710 *
8711 * [78] extParsedEnt ::= TextDecl? content
8712 *
8713 * Returns 0, -1 in case of error. the parser context is augmented
8714 * as a result of the parsing.
8715 */
8716
8717int
8718xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8719 xmlChar start[4];
8720 xmlCharEncoding enc;
8721
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008722 if ((ctxt == NULL) || (ctxt->input == NULL))
8723 return(-1);
8724
Owen Taylor3473f882001-02-23 17:55:21 +00008725 xmlDefaultSAXHandlerInit();
8726
Daniel Veillard309f81d2003-09-23 09:02:53 +00008727 xmlDetectSAX2(ctxt);
8728
Owen Taylor3473f882001-02-23 17:55:21 +00008729 GROW;
8730
8731 /*
8732 * SAX: beginning of the document processing.
8733 */
8734 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8735 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8736
8737 /*
8738 * Get the 4 first bytes and decode the charset
8739 * if enc != XML_CHAR_ENCODING_NONE
8740 * plug some encoding conversion routines.
8741 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008742 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8743 start[0] = RAW;
8744 start[1] = NXT(1);
8745 start[2] = NXT(2);
8746 start[3] = NXT(3);
8747 enc = xmlDetectCharEncoding(start, 4);
8748 if (enc != XML_CHAR_ENCODING_NONE) {
8749 xmlSwitchEncoding(ctxt, enc);
8750 }
Owen Taylor3473f882001-02-23 17:55:21 +00008751 }
8752
8753
8754 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008755 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008756 }
8757
8758 /*
8759 * Check for the XMLDecl in the Prolog.
8760 */
8761 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008762 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008763
8764 /*
8765 * Note that we will switch encoding on the fly.
8766 */
8767 xmlParseXMLDecl(ctxt);
8768 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8769 /*
8770 * The XML REC instructs us to stop parsing right here
8771 */
8772 return(-1);
8773 }
8774 SKIP_BLANKS;
8775 } else {
8776 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8777 }
8778 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8779 ctxt->sax->startDocument(ctxt->userData);
8780
8781 /*
8782 * Doing validity checking on chunk doesn't make sense
8783 */
8784 ctxt->instate = XML_PARSER_CONTENT;
8785 ctxt->validate = 0;
8786 ctxt->loadsubset = 0;
8787 ctxt->depth = 0;
8788
8789 xmlParseContent(ctxt);
8790
8791 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008792 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008793 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008794 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008795 }
8796
8797 /*
8798 * SAX: end of the document processing.
8799 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008800 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008801 ctxt->sax->endDocument(ctxt->userData);
8802
8803 if (! ctxt->wellFormed) return(-1);
8804 return(0);
8805}
8806
Daniel Veillard73b013f2003-09-30 12:36:01 +00008807#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008808/************************************************************************
8809 * *
8810 * Progressive parsing interfaces *
8811 * *
8812 ************************************************************************/
8813
8814/**
8815 * xmlParseLookupSequence:
8816 * @ctxt: an XML parser context
8817 * @first: the first char to lookup
8818 * @next: the next char to lookup or zero
8819 * @third: the next char to lookup or zero
8820 *
8821 * Try to find if a sequence (first, next, third) or just (first next) or
8822 * (first) is available in the input stream.
8823 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8824 * to avoid rescanning sequences of bytes, it DOES change the state of the
8825 * parser, do not use liberally.
8826 *
8827 * Returns the index to the current parsing point if the full sequence
8828 * is available, -1 otherwise.
8829 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008830static int
Owen Taylor3473f882001-02-23 17:55:21 +00008831xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8832 xmlChar next, xmlChar third) {
8833 int base, len;
8834 xmlParserInputPtr in;
8835 const xmlChar *buf;
8836
8837 in = ctxt->input;
8838 if (in == NULL) return(-1);
8839 base = in->cur - in->base;
8840 if (base < 0) return(-1);
8841 if (ctxt->checkIndex > base)
8842 base = ctxt->checkIndex;
8843 if (in->buf == NULL) {
8844 buf = in->base;
8845 len = in->length;
8846 } else {
8847 buf = in->buf->buffer->content;
8848 len = in->buf->buffer->use;
8849 }
8850 /* take into account the sequence length */
8851 if (third) len -= 2;
8852 else if (next) len --;
8853 for (;base < len;base++) {
8854 if (buf[base] == first) {
8855 if (third != 0) {
8856 if ((buf[base + 1] != next) ||
8857 (buf[base + 2] != third)) continue;
8858 } else if (next != 0) {
8859 if (buf[base + 1] != next) continue;
8860 }
8861 ctxt->checkIndex = 0;
8862#ifdef DEBUG_PUSH
8863 if (next == 0)
8864 xmlGenericError(xmlGenericErrorContext,
8865 "PP: lookup '%c' found at %d\n",
8866 first, base);
8867 else if (third == 0)
8868 xmlGenericError(xmlGenericErrorContext,
8869 "PP: lookup '%c%c' found at %d\n",
8870 first, next, base);
8871 else
8872 xmlGenericError(xmlGenericErrorContext,
8873 "PP: lookup '%c%c%c' found at %d\n",
8874 first, next, third, base);
8875#endif
8876 return(base - (in->cur - in->base));
8877 }
8878 }
8879 ctxt->checkIndex = base;
8880#ifdef DEBUG_PUSH
8881 if (next == 0)
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: lookup '%c' failed\n", first);
8884 else if (third == 0)
8885 xmlGenericError(xmlGenericErrorContext,
8886 "PP: lookup '%c%c' failed\n", first, next);
8887 else
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: lookup '%c%c%c' failed\n", first, next, third);
8890#endif
8891 return(-1);
8892}
8893
8894/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008895 * xmlParseGetLasts:
8896 * @ctxt: an XML parser context
8897 * @lastlt: pointer to store the last '<' from the input
8898 * @lastgt: pointer to store the last '>' from the input
8899 *
8900 * Lookup the last < and > in the current chunk
8901 */
8902static void
8903xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8904 const xmlChar **lastgt) {
8905 const xmlChar *tmp;
8906
8907 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8908 xmlGenericError(xmlGenericErrorContext,
8909 "Internal error: xmlParseGetLasts\n");
8910 return;
8911 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008912 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008913 tmp = ctxt->input->end;
8914 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008915 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008916 if (tmp < ctxt->input->base) {
8917 *lastlt = NULL;
8918 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008919 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008920 *lastlt = tmp;
8921 tmp++;
8922 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8923 if (*tmp == '\'') {
8924 tmp++;
8925 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8926 if (tmp < ctxt->input->end) tmp++;
8927 } else if (*tmp == '"') {
8928 tmp++;
8929 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8930 if (tmp < ctxt->input->end) tmp++;
8931 } else
8932 tmp++;
8933 }
8934 if (tmp < ctxt->input->end)
8935 *lastgt = tmp;
8936 else {
8937 tmp = *lastlt;
8938 tmp--;
8939 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8940 if (tmp >= ctxt->input->base)
8941 *lastgt = tmp;
8942 else
8943 *lastgt = NULL;
8944 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008945 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008946 } else {
8947 *lastlt = NULL;
8948 *lastgt = NULL;
8949 }
8950}
8951/**
Owen Taylor3473f882001-02-23 17:55:21 +00008952 * xmlParseTryOrFinish:
8953 * @ctxt: an XML parser context
8954 * @terminate: last chunk indicator
8955 *
8956 * Try to progress on parsing
8957 *
8958 * Returns zero if no parsing was possible
8959 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008960static int
Owen Taylor3473f882001-02-23 17:55:21 +00008961xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8962 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008963 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008964 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008965 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008966
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008967 if (ctxt->input == NULL)
8968 return(0);
8969
Owen Taylor3473f882001-02-23 17:55:21 +00008970#ifdef DEBUG_PUSH
8971 switch (ctxt->instate) {
8972 case XML_PARSER_EOF:
8973 xmlGenericError(xmlGenericErrorContext,
8974 "PP: try EOF\n"); break;
8975 case XML_PARSER_START:
8976 xmlGenericError(xmlGenericErrorContext,
8977 "PP: try START\n"); break;
8978 case XML_PARSER_MISC:
8979 xmlGenericError(xmlGenericErrorContext,
8980 "PP: try MISC\n");break;
8981 case XML_PARSER_COMMENT:
8982 xmlGenericError(xmlGenericErrorContext,
8983 "PP: try COMMENT\n");break;
8984 case XML_PARSER_PROLOG:
8985 xmlGenericError(xmlGenericErrorContext,
8986 "PP: try PROLOG\n");break;
8987 case XML_PARSER_START_TAG:
8988 xmlGenericError(xmlGenericErrorContext,
8989 "PP: try START_TAG\n");break;
8990 case XML_PARSER_CONTENT:
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: try CONTENT\n");break;
8993 case XML_PARSER_CDATA_SECTION:
8994 xmlGenericError(xmlGenericErrorContext,
8995 "PP: try CDATA_SECTION\n");break;
8996 case XML_PARSER_END_TAG:
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: try END_TAG\n");break;
8999 case XML_PARSER_ENTITY_DECL:
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: try ENTITY_DECL\n");break;
9002 case XML_PARSER_ENTITY_VALUE:
9003 xmlGenericError(xmlGenericErrorContext,
9004 "PP: try ENTITY_VALUE\n");break;
9005 case XML_PARSER_ATTRIBUTE_VALUE:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: try ATTRIBUTE_VALUE\n");break;
9008 case XML_PARSER_DTD:
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: try DTD\n");break;
9011 case XML_PARSER_EPILOG:
9012 xmlGenericError(xmlGenericErrorContext,
9013 "PP: try EPILOG\n");break;
9014 case XML_PARSER_PI:
9015 xmlGenericError(xmlGenericErrorContext,
9016 "PP: try PI\n");break;
9017 case XML_PARSER_IGNORE:
9018 xmlGenericError(xmlGenericErrorContext,
9019 "PP: try IGNORE\n");break;
9020 }
9021#endif
9022
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009023 if ((ctxt->input != NULL) &&
9024 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009025 xmlSHRINK(ctxt);
9026 ctxt->checkIndex = 0;
9027 }
9028 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009029
Daniel Veillarda880b122003-04-21 21:36:41 +00009030 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009031 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9032 return(0);
9033
9034
Owen Taylor3473f882001-02-23 17:55:21 +00009035 /*
9036 * Pop-up of finished entities.
9037 */
9038 while ((RAW == 0) && (ctxt->inputNr > 1))
9039 xmlPopInput(ctxt);
9040
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009041 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009042 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009043 avail = ctxt->input->length -
9044 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009045 else {
9046 /*
9047 * If we are operating on converted input, try to flush
9048 * remainng chars to avoid them stalling in the non-converted
9049 * buffer.
9050 */
9051 if ((ctxt->input->buf->raw != NULL) &&
9052 (ctxt->input->buf->raw->use > 0)) {
9053 int base = ctxt->input->base -
9054 ctxt->input->buf->buffer->content;
9055 int current = ctxt->input->cur - ctxt->input->base;
9056
9057 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9058 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9059 ctxt->input->cur = ctxt->input->base + current;
9060 ctxt->input->end =
9061 &ctxt->input->buf->buffer->content[
9062 ctxt->input->buf->buffer->use];
9063 }
9064 avail = ctxt->input->buf->buffer->use -
9065 (ctxt->input->cur - ctxt->input->base);
9066 }
Owen Taylor3473f882001-02-23 17:55:21 +00009067 if (avail < 1)
9068 goto done;
9069 switch (ctxt->instate) {
9070 case XML_PARSER_EOF:
9071 /*
9072 * Document parsing is done !
9073 */
9074 goto done;
9075 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009076 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9077 xmlChar start[4];
9078 xmlCharEncoding enc;
9079
9080 /*
9081 * Very first chars read from the document flow.
9082 */
9083 if (avail < 4)
9084 goto done;
9085
9086 /*
9087 * Get the 4 first bytes and decode the charset
9088 * if enc != XML_CHAR_ENCODING_NONE
9089 * plug some encoding conversion routines.
9090 */
9091 start[0] = RAW;
9092 start[1] = NXT(1);
9093 start[2] = NXT(2);
9094 start[3] = NXT(3);
9095 enc = xmlDetectCharEncoding(start, 4);
9096 if (enc != XML_CHAR_ENCODING_NONE) {
9097 xmlSwitchEncoding(ctxt, enc);
9098 }
9099 break;
9100 }
Owen Taylor3473f882001-02-23 17:55:21 +00009101
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009102 if (avail < 2)
9103 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009104 cur = ctxt->input->cur[0];
9105 next = ctxt->input->cur[1];
9106 if (cur == 0) {
9107 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9108 ctxt->sax->setDocumentLocator(ctxt->userData,
9109 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009110 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009111 ctxt->instate = XML_PARSER_EOF;
9112#ifdef DEBUG_PUSH
9113 xmlGenericError(xmlGenericErrorContext,
9114 "PP: entering EOF\n");
9115#endif
9116 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9117 ctxt->sax->endDocument(ctxt->userData);
9118 goto done;
9119 }
9120 if ((cur == '<') && (next == '?')) {
9121 /* PI or XML decl */
9122 if (avail < 5) return(ret);
9123 if ((!terminate) &&
9124 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9125 return(ret);
9126 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9127 ctxt->sax->setDocumentLocator(ctxt->userData,
9128 &xmlDefaultSAXLocator);
9129 if ((ctxt->input->cur[2] == 'x') &&
9130 (ctxt->input->cur[3] == 'm') &&
9131 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009132 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009133 ret += 5;
9134#ifdef DEBUG_PUSH
9135 xmlGenericError(xmlGenericErrorContext,
9136 "PP: Parsing XML Decl\n");
9137#endif
9138 xmlParseXMLDecl(ctxt);
9139 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9140 /*
9141 * The XML REC instructs us to stop parsing right
9142 * here
9143 */
9144 ctxt->instate = XML_PARSER_EOF;
9145 return(0);
9146 }
9147 ctxt->standalone = ctxt->input->standalone;
9148 if ((ctxt->encoding == NULL) &&
9149 (ctxt->input->encoding != NULL))
9150 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9151 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9152 (!ctxt->disableSAX))
9153 ctxt->sax->startDocument(ctxt->userData);
9154 ctxt->instate = XML_PARSER_MISC;
9155#ifdef DEBUG_PUSH
9156 xmlGenericError(xmlGenericErrorContext,
9157 "PP: entering MISC\n");
9158#endif
9159 } else {
9160 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9161 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9162 (!ctxt->disableSAX))
9163 ctxt->sax->startDocument(ctxt->userData);
9164 ctxt->instate = XML_PARSER_MISC;
9165#ifdef DEBUG_PUSH
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: entering MISC\n");
9168#endif
9169 }
9170 } else {
9171 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9172 ctxt->sax->setDocumentLocator(ctxt->userData,
9173 &xmlDefaultSAXLocator);
9174 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009175 if (ctxt->version == NULL) {
9176 xmlErrMemory(ctxt, NULL);
9177 break;
9178 }
Owen Taylor3473f882001-02-23 17:55:21 +00009179 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9180 (!ctxt->disableSAX))
9181 ctxt->sax->startDocument(ctxt->userData);
9182 ctxt->instate = XML_PARSER_MISC;
9183#ifdef DEBUG_PUSH
9184 xmlGenericError(xmlGenericErrorContext,
9185 "PP: entering MISC\n");
9186#endif
9187 }
9188 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009189 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009190 const xmlChar *name;
9191 const xmlChar *prefix;
9192 const xmlChar *URI;
9193 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009194
9195 if ((avail < 2) && (ctxt->inputNr == 1))
9196 goto done;
9197 cur = ctxt->input->cur[0];
9198 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009199 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009200 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009201 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9202 ctxt->sax->endDocument(ctxt->userData);
9203 goto done;
9204 }
9205 if (!terminate) {
9206 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009207 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009208 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009209 goto done;
9210 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9211 goto done;
9212 }
9213 }
9214 if (ctxt->spaceNr == 0)
9215 spacePush(ctxt, -1);
9216 else
9217 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009218#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009219 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009220#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009221 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009222#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009223 else
9224 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009225#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009226 if (name == NULL) {
9227 spacePop(ctxt);
9228 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9230 ctxt->sax->endDocument(ctxt->userData);
9231 goto done;
9232 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009233#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009234 /*
9235 * [ VC: Root Element Type ]
9236 * The Name in the document type declaration must match
9237 * the element type of the root element.
9238 */
9239 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9240 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9241 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009242#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009243
9244 /*
9245 * Check for an Empty Element.
9246 */
9247 if ((RAW == '/') && (NXT(1) == '>')) {
9248 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009249
9250 if (ctxt->sax2) {
9251 if ((ctxt->sax != NULL) &&
9252 (ctxt->sax->endElementNs != NULL) &&
9253 (!ctxt->disableSAX))
9254 ctxt->sax->endElementNs(ctxt->userData, name,
9255 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009256#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009257 } else {
9258 if ((ctxt->sax != NULL) &&
9259 (ctxt->sax->endElement != NULL) &&
9260 (!ctxt->disableSAX))
9261 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009262#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009264 spacePop(ctxt);
9265 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009266 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009267 } else {
9268 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009269 }
9270 break;
9271 }
9272 if (RAW == '>') {
9273 NEXT;
9274 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009275 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009276 "Couldn't find end of Start Tag %s\n",
9277 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009278 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009279 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009280 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009281 if (ctxt->sax2)
9282 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009283#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009284 else
9285 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009286#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009287
Daniel Veillarda880b122003-04-21 21:36:41 +00009288 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009289 break;
9290 }
9291 case XML_PARSER_CONTENT: {
9292 const xmlChar *test;
9293 unsigned int cons;
9294 if ((avail < 2) && (ctxt->inputNr == 1))
9295 goto done;
9296 cur = ctxt->input->cur[0];
9297 next = ctxt->input->cur[1];
9298
9299 test = CUR_PTR;
9300 cons = ctxt->input->consumed;
9301 if ((cur == '<') && (next == '/')) {
9302 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009303 break;
9304 } else if ((cur == '<') && (next == '?')) {
9305 if ((!terminate) &&
9306 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9307 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009308 xmlParsePI(ctxt);
9309 } else if ((cur == '<') && (next != '!')) {
9310 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 break;
9312 } else if ((cur == '<') && (next == '!') &&
9313 (ctxt->input->cur[2] == '-') &&
9314 (ctxt->input->cur[3] == '-')) {
9315 if ((!terminate) &&
9316 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9317 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009318 xmlParseComment(ctxt);
9319 ctxt->instate = XML_PARSER_CONTENT;
9320 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9321 (ctxt->input->cur[2] == '[') &&
9322 (ctxt->input->cur[3] == 'C') &&
9323 (ctxt->input->cur[4] == 'D') &&
9324 (ctxt->input->cur[5] == 'A') &&
9325 (ctxt->input->cur[6] == 'T') &&
9326 (ctxt->input->cur[7] == 'A') &&
9327 (ctxt->input->cur[8] == '[')) {
9328 SKIP(9);
9329 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009330 break;
9331 } else if ((cur == '<') && (next == '!') &&
9332 (avail < 9)) {
9333 goto done;
9334 } else if (cur == '&') {
9335 if ((!terminate) &&
9336 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9337 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009338 xmlParseReference(ctxt);
9339 } else {
9340 /* TODO Avoid the extra copy, handle directly !!! */
9341 /*
9342 * Goal of the following test is:
9343 * - minimize calls to the SAX 'character' callback
9344 * when they are mergeable
9345 * - handle an problem for isBlank when we only parse
9346 * a sequence of blank chars and the next one is
9347 * not available to check against '<' presence.
9348 * - tries to homogenize the differences in SAX
9349 * callbacks between the push and pull versions
9350 * of the parser.
9351 */
9352 if ((ctxt->inputNr == 1) &&
9353 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9354 if (!terminate) {
9355 if (ctxt->progressive) {
9356 if ((lastlt == NULL) ||
9357 (ctxt->input->cur > lastlt))
9358 goto done;
9359 } else if (xmlParseLookupSequence(ctxt,
9360 '<', 0, 0) < 0) {
9361 goto done;
9362 }
9363 }
9364 }
9365 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009366 xmlParseCharData(ctxt, 0);
9367 }
9368 /*
9369 * Pop-up of finished entities.
9370 */
9371 while ((RAW == 0) && (ctxt->inputNr > 1))
9372 xmlPopInput(ctxt);
9373 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009374 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9375 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 ctxt->instate = XML_PARSER_EOF;
9377 break;
9378 }
9379 break;
9380 }
9381 case XML_PARSER_END_TAG:
9382 if (avail < 2)
9383 goto done;
9384 if (!terminate) {
9385 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009386 /* > can be found unescaped in attribute values */
9387 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009388 goto done;
9389 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9390 goto done;
9391 }
9392 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 if (ctxt->sax2) {
9394 xmlParseEndTag2(ctxt,
9395 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9396 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009397 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009398 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009399 }
9400#ifdef LIBXML_SAX1_ENABLED
9401 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009402 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009403#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009404 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009405 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 } else {
9407 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009408 }
9409 break;
9410 case XML_PARSER_CDATA_SECTION: {
9411 /*
9412 * The Push mode need to have the SAX callback for
9413 * cdataBlock merge back contiguous callbacks.
9414 */
9415 int base;
9416
9417 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9418 if (base < 0) {
9419 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9421 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009422 ctxt->sax->cdataBlock(ctxt->userData,
9423 ctxt->input->cur,
9424 XML_PARSER_BIG_BUFFER_SIZE);
9425 else if (ctxt->sax->characters != NULL)
9426 ctxt->sax->characters(ctxt->userData,
9427 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009428 XML_PARSER_BIG_BUFFER_SIZE);
9429 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009430 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009431 ctxt->checkIndex = 0;
9432 }
9433 goto done;
9434 } else {
9435 if ((ctxt->sax != NULL) && (base > 0) &&
9436 (!ctxt->disableSAX)) {
9437 if (ctxt->sax->cdataBlock != NULL)
9438 ctxt->sax->cdataBlock(ctxt->userData,
9439 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009440 else if (ctxt->sax->characters != NULL)
9441 ctxt->sax->characters(ctxt->userData,
9442 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009443 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009444 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009445 ctxt->checkIndex = 0;
9446 ctxt->instate = XML_PARSER_CONTENT;
9447#ifdef DEBUG_PUSH
9448 xmlGenericError(xmlGenericErrorContext,
9449 "PP: entering CONTENT\n");
9450#endif
9451 }
9452 break;
9453 }
Owen Taylor3473f882001-02-23 17:55:21 +00009454 case XML_PARSER_MISC:
9455 SKIP_BLANKS;
9456 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009457 avail = ctxt->input->length -
9458 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009459 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 avail = ctxt->input->buf->buffer->use -
9461 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009462 if (avail < 2)
9463 goto done;
9464 cur = ctxt->input->cur[0];
9465 next = ctxt->input->cur[1];
9466 if ((cur == '<') && (next == '?')) {
9467 if ((!terminate) &&
9468 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9469 goto done;
9470#ifdef DEBUG_PUSH
9471 xmlGenericError(xmlGenericErrorContext,
9472 "PP: Parsing PI\n");
9473#endif
9474 xmlParsePI(ctxt);
9475 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 (ctxt->input->cur[2] == '-') &&
9477 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009478 if ((!terminate) &&
9479 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9480 goto done;
9481#ifdef DEBUG_PUSH
9482 xmlGenericError(xmlGenericErrorContext,
9483 "PP: Parsing Comment\n");
9484#endif
9485 xmlParseComment(ctxt);
9486 ctxt->instate = XML_PARSER_MISC;
9487 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009488 (ctxt->input->cur[2] == 'D') &&
9489 (ctxt->input->cur[3] == 'O') &&
9490 (ctxt->input->cur[4] == 'C') &&
9491 (ctxt->input->cur[5] == 'T') &&
9492 (ctxt->input->cur[6] == 'Y') &&
9493 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009494 (ctxt->input->cur[8] == 'E')) {
9495 if ((!terminate) &&
9496 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9497 goto done;
9498#ifdef DEBUG_PUSH
9499 xmlGenericError(xmlGenericErrorContext,
9500 "PP: Parsing internal subset\n");
9501#endif
9502 ctxt->inSubset = 1;
9503 xmlParseDocTypeDecl(ctxt);
9504 if (RAW == '[') {
9505 ctxt->instate = XML_PARSER_DTD;
9506#ifdef DEBUG_PUSH
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: entering DTD\n");
9509#endif
9510 } else {
9511 /*
9512 * Create and update the external subset.
9513 */
9514 ctxt->inSubset = 2;
9515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9516 (ctxt->sax->externalSubset != NULL))
9517 ctxt->sax->externalSubset(ctxt->userData,
9518 ctxt->intSubName, ctxt->extSubSystem,
9519 ctxt->extSubURI);
9520 ctxt->inSubset = 0;
9521 ctxt->instate = XML_PARSER_PROLOG;
9522#ifdef DEBUG_PUSH
9523 xmlGenericError(xmlGenericErrorContext,
9524 "PP: entering PROLOG\n");
9525#endif
9526 }
9527 } else if ((cur == '<') && (next == '!') &&
9528 (avail < 9)) {
9529 goto done;
9530 } else {
9531 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009532 ctxt->progressive = 1;
9533 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009534#ifdef DEBUG_PUSH
9535 xmlGenericError(xmlGenericErrorContext,
9536 "PP: entering START_TAG\n");
9537#endif
9538 }
9539 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009540 case XML_PARSER_PROLOG:
9541 SKIP_BLANKS;
9542 if (ctxt->input->buf == NULL)
9543 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9544 else
9545 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9546 if (avail < 2)
9547 goto done;
9548 cur = ctxt->input->cur[0];
9549 next = ctxt->input->cur[1];
9550 if ((cur == '<') && (next == '?')) {
9551 if ((!terminate) &&
9552 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9553 goto done;
9554#ifdef DEBUG_PUSH
9555 xmlGenericError(xmlGenericErrorContext,
9556 "PP: Parsing PI\n");
9557#endif
9558 xmlParsePI(ctxt);
9559 } else if ((cur == '<') && (next == '!') &&
9560 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9561 if ((!terminate) &&
9562 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9563 goto done;
9564#ifdef DEBUG_PUSH
9565 xmlGenericError(xmlGenericErrorContext,
9566 "PP: Parsing Comment\n");
9567#endif
9568 xmlParseComment(ctxt);
9569 ctxt->instate = XML_PARSER_PROLOG;
9570 } else if ((cur == '<') && (next == '!') &&
9571 (avail < 4)) {
9572 goto done;
9573 } else {
9574 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009575 if (ctxt->progressive == 0)
9576 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009577 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009578#ifdef DEBUG_PUSH
9579 xmlGenericError(xmlGenericErrorContext,
9580 "PP: entering START_TAG\n");
9581#endif
9582 }
9583 break;
9584 case XML_PARSER_EPILOG:
9585 SKIP_BLANKS;
9586 if (ctxt->input->buf == NULL)
9587 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9588 else
9589 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9590 if (avail < 2)
9591 goto done;
9592 cur = ctxt->input->cur[0];
9593 next = ctxt->input->cur[1];
9594 if ((cur == '<') && (next == '?')) {
9595 if ((!terminate) &&
9596 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9597 goto done;
9598#ifdef DEBUG_PUSH
9599 xmlGenericError(xmlGenericErrorContext,
9600 "PP: Parsing PI\n");
9601#endif
9602 xmlParsePI(ctxt);
9603 ctxt->instate = XML_PARSER_EPILOG;
9604 } else if ((cur == '<') && (next == '!') &&
9605 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9606 if ((!terminate) &&
9607 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9608 goto done;
9609#ifdef DEBUG_PUSH
9610 xmlGenericError(xmlGenericErrorContext,
9611 "PP: Parsing Comment\n");
9612#endif
9613 xmlParseComment(ctxt);
9614 ctxt->instate = XML_PARSER_EPILOG;
9615 } else if ((cur == '<') && (next == '!') &&
9616 (avail < 4)) {
9617 goto done;
9618 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009619 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009620 ctxt->instate = XML_PARSER_EOF;
9621#ifdef DEBUG_PUSH
9622 xmlGenericError(xmlGenericErrorContext,
9623 "PP: entering EOF\n");
9624#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009625 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009626 ctxt->sax->endDocument(ctxt->userData);
9627 goto done;
9628 }
9629 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009630 case XML_PARSER_DTD: {
9631 /*
9632 * Sorry but progressive parsing of the internal subset
9633 * is not expected to be supported. We first check that
9634 * the full content of the internal subset is available and
9635 * the parsing is launched only at that point.
9636 * Internal subset ends up with "']' S? '>'" in an unescaped
9637 * section and not in a ']]>' sequence which are conditional
9638 * sections (whoever argued to keep that crap in XML deserve
9639 * a place in hell !).
9640 */
9641 int base, i;
9642 xmlChar *buf;
9643 xmlChar quote = 0;
9644
9645 base = ctxt->input->cur - ctxt->input->base;
9646 if (base < 0) return(0);
9647 if (ctxt->checkIndex > base)
9648 base = ctxt->checkIndex;
9649 buf = ctxt->input->buf->buffer->content;
9650 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9651 base++) {
9652 if (quote != 0) {
9653 if (buf[base] == quote)
9654 quote = 0;
9655 continue;
9656 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009657 if ((quote == 0) && (buf[base] == '<')) {
9658 int found = 0;
9659 /* special handling of comments */
9660 if (((unsigned int) base + 4 <
9661 ctxt->input->buf->buffer->use) &&
9662 (buf[base + 1] == '!') &&
9663 (buf[base + 2] == '-') &&
9664 (buf[base + 3] == '-')) {
9665 for (;(unsigned int) base + 3 <
9666 ctxt->input->buf->buffer->use; base++) {
9667 if ((buf[base] == '-') &&
9668 (buf[base + 1] == '-') &&
9669 (buf[base + 2] == '>')) {
9670 found = 1;
9671 base += 2;
9672 break;
9673 }
9674 }
9675 if (!found)
9676 break;
9677 continue;
9678 }
9679 }
Owen Taylor3473f882001-02-23 17:55:21 +00009680 if (buf[base] == '"') {
9681 quote = '"';
9682 continue;
9683 }
9684 if (buf[base] == '\'') {
9685 quote = '\'';
9686 continue;
9687 }
9688 if (buf[base] == ']') {
9689 if ((unsigned int) base +1 >=
9690 ctxt->input->buf->buffer->use)
9691 break;
9692 if (buf[base + 1] == ']') {
9693 /* conditional crap, skip both ']' ! */
9694 base++;
9695 continue;
9696 }
9697 for (i = 0;
9698 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9699 i++) {
9700 if (buf[base + i] == '>')
9701 goto found_end_int_subset;
9702 }
9703 break;
9704 }
9705 }
9706 /*
9707 * We didn't found the end of the Internal subset
9708 */
9709 if (quote == 0)
9710 ctxt->checkIndex = base;
9711#ifdef DEBUG_PUSH
9712 if (next == 0)
9713 xmlGenericError(xmlGenericErrorContext,
9714 "PP: lookup of int subset end filed\n");
9715#endif
9716 goto done;
9717
9718found_end_int_subset:
9719 xmlParseInternalSubset(ctxt);
9720 ctxt->inSubset = 2;
9721 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9722 (ctxt->sax->externalSubset != NULL))
9723 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9724 ctxt->extSubSystem, ctxt->extSubURI);
9725 ctxt->inSubset = 0;
9726 ctxt->instate = XML_PARSER_PROLOG;
9727 ctxt->checkIndex = 0;
9728#ifdef DEBUG_PUSH
9729 xmlGenericError(xmlGenericErrorContext,
9730 "PP: entering PROLOG\n");
9731#endif
9732 break;
9733 }
9734 case XML_PARSER_COMMENT:
9735 xmlGenericError(xmlGenericErrorContext,
9736 "PP: internal error, state == COMMENT\n");
9737 ctxt->instate = XML_PARSER_CONTENT;
9738#ifdef DEBUG_PUSH
9739 xmlGenericError(xmlGenericErrorContext,
9740 "PP: entering CONTENT\n");
9741#endif
9742 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009743 case XML_PARSER_IGNORE:
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: internal error, state == IGNORE");
9746 ctxt->instate = XML_PARSER_DTD;
9747#ifdef DEBUG_PUSH
9748 xmlGenericError(xmlGenericErrorContext,
9749 "PP: entering DTD\n");
9750#endif
9751 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009752 case XML_PARSER_PI:
9753 xmlGenericError(xmlGenericErrorContext,
9754 "PP: internal error, state == PI\n");
9755 ctxt->instate = XML_PARSER_CONTENT;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: entering CONTENT\n");
9759#endif
9760 break;
9761 case XML_PARSER_ENTITY_DECL:
9762 xmlGenericError(xmlGenericErrorContext,
9763 "PP: internal error, state == ENTITY_DECL\n");
9764 ctxt->instate = XML_PARSER_DTD;
9765#ifdef DEBUG_PUSH
9766 xmlGenericError(xmlGenericErrorContext,
9767 "PP: entering DTD\n");
9768#endif
9769 break;
9770 case XML_PARSER_ENTITY_VALUE:
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: internal error, state == ENTITY_VALUE\n");
9773 ctxt->instate = XML_PARSER_CONTENT;
9774#ifdef DEBUG_PUSH
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: entering DTD\n");
9777#endif
9778 break;
9779 case XML_PARSER_ATTRIBUTE_VALUE:
9780 xmlGenericError(xmlGenericErrorContext,
9781 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9782 ctxt->instate = XML_PARSER_START_TAG;
9783#ifdef DEBUG_PUSH
9784 xmlGenericError(xmlGenericErrorContext,
9785 "PP: entering START_TAG\n");
9786#endif
9787 break;
9788 case XML_PARSER_SYSTEM_LITERAL:
9789 xmlGenericError(xmlGenericErrorContext,
9790 "PP: internal error, state == SYSTEM_LITERAL\n");
9791 ctxt->instate = XML_PARSER_START_TAG;
9792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: entering START_TAG\n");
9795#endif
9796 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009797 case XML_PARSER_PUBLIC_LITERAL:
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: internal error, state == PUBLIC_LITERAL\n");
9800 ctxt->instate = XML_PARSER_START_TAG;
9801#ifdef DEBUG_PUSH
9802 xmlGenericError(xmlGenericErrorContext,
9803 "PP: entering START_TAG\n");
9804#endif
9805 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009806 }
9807 }
9808done:
9809#ifdef DEBUG_PUSH
9810 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9811#endif
9812 return(ret);
9813}
9814
9815/**
Owen Taylor3473f882001-02-23 17:55:21 +00009816 * xmlParseChunk:
9817 * @ctxt: an XML parser context
9818 * @chunk: an char array
9819 * @size: the size in byte of the chunk
9820 * @terminate: last chunk indicator
9821 *
9822 * Parse a Chunk of memory
9823 *
9824 * Returns zero if no error, the xmlParserErrors otherwise.
9825 */
9826int
9827xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9828 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009829 if (ctxt == NULL)
9830 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009831 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9832 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009833 if (ctxt->instate == XML_PARSER_START)
9834 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009835 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9836 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9837 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9838 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009839 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009840
William M. Bracka3215c72004-07-31 16:24:01 +00009841 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9842 if (res < 0) {
9843 ctxt->errNo = XML_PARSER_EOF;
9844 ctxt->disableSAX = 1;
9845 return (XML_PARSER_EOF);
9846 }
Owen Taylor3473f882001-02-23 17:55:21 +00009847 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9848 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009849 ctxt->input->end =
9850 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009851#ifdef DEBUG_PUSH
9852 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9853#endif
9854
Owen Taylor3473f882001-02-23 17:55:21 +00009855 } else if (ctxt->instate != XML_PARSER_EOF) {
9856 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9857 xmlParserInputBufferPtr in = ctxt->input->buf;
9858 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9859 (in->raw != NULL)) {
9860 int nbchars;
9861
9862 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9863 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009864 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009865 xmlGenericError(xmlGenericErrorContext,
9866 "xmlParseChunk: encoder error\n");
9867 return(XML_ERR_INVALID_ENCODING);
9868 }
9869 }
9870 }
9871 }
9872 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009873 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9874 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 if (terminate) {
9876 /*
9877 * Check for termination
9878 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009879 int avail = 0;
9880
9881 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009882 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009883 avail = ctxt->input->length -
9884 (ctxt->input->cur - ctxt->input->base);
9885 else
9886 avail = ctxt->input->buf->buffer->use -
9887 (ctxt->input->cur - ctxt->input->base);
9888 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009889
Owen Taylor3473f882001-02-23 17:55:21 +00009890 if ((ctxt->instate != XML_PARSER_EOF) &&
9891 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009892 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009893 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009894 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009895 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009896 }
Owen Taylor3473f882001-02-23 17:55:21 +00009897 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009899 ctxt->sax->endDocument(ctxt->userData);
9900 }
9901 ctxt->instate = XML_PARSER_EOF;
9902 }
9903 return((xmlParserErrors) ctxt->errNo);
9904}
9905
9906/************************************************************************
9907 * *
9908 * I/O front end functions to the parser *
9909 * *
9910 ************************************************************************/
9911
9912/**
9913 * xmlStopParser:
9914 * @ctxt: an XML parser context
9915 *
9916 * Blocks further parser processing
9917 */
9918void
9919xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009920 if (ctxt == NULL)
9921 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009922 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009923 ctxt->disableSAX = 1;
William M. Brack230c5502004-12-20 16:18:49 +00009924 if (ctxt->input != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009925 ctxt->input->cur = BAD_CAST"";
William M. Brack230c5502004-12-20 16:18:49 +00009926 ctxt->input->base = ctxt->input->cur;
9927 }
Owen Taylor3473f882001-02-23 17:55:21 +00009928}
9929
9930/**
9931 * xmlCreatePushParserCtxt:
9932 * @sax: a SAX handler
9933 * @user_data: The user data returned on SAX callbacks
9934 * @chunk: a pointer to an array of chars
9935 * @size: number of chars in the array
9936 * @filename: an optional file name or URI
9937 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009938 * Create a parser context for using the XML parser in push mode.
9939 * If @buffer and @size are non-NULL, the data is used to detect
9940 * the encoding. The remaining characters will be parsed so they
9941 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009942 * To allow content encoding detection, @size should be >= 4
9943 * The value of @filename is used for fetching external entities
9944 * and error/warning reports.
9945 *
9946 * Returns the new parser context or NULL
9947 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009948
Owen Taylor3473f882001-02-23 17:55:21 +00009949xmlParserCtxtPtr
9950xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9951 const char *chunk, int size, const char *filename) {
9952 xmlParserCtxtPtr ctxt;
9953 xmlParserInputPtr inputStream;
9954 xmlParserInputBufferPtr buf;
9955 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9956
9957 /*
9958 * plug some encoding conversion routines
9959 */
9960 if ((chunk != NULL) && (size >= 4))
9961 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9962
9963 buf = xmlAllocParserInputBuffer(enc);
9964 if (buf == NULL) return(NULL);
9965
9966 ctxt = xmlNewParserCtxt();
9967 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009968 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009969 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009970 return(NULL);
9971 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009972 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009973 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9974 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009975 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009976 xmlFreeParserInputBuffer(buf);
9977 xmlFreeParserCtxt(ctxt);
9978 return(NULL);
9979 }
Owen Taylor3473f882001-02-23 17:55:21 +00009980 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009981#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009982 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009983#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009984 xmlFree(ctxt->sax);
9985 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9986 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009987 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009988 xmlFreeParserInputBuffer(buf);
9989 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009990 return(NULL);
9991 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +00009992 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
9993 if (sax->initialized == XML_SAX2_MAGIC)
9994 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9995 else
9996 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +00009997 if (user_data != NULL)
9998 ctxt->userData = user_data;
9999 }
10000 if (filename == NULL) {
10001 ctxt->directory = NULL;
10002 } else {
10003 ctxt->directory = xmlParserGetDirectory(filename);
10004 }
10005
10006 inputStream = xmlNewInputStream(ctxt);
10007 if (inputStream == NULL) {
10008 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010009 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010010 return(NULL);
10011 }
10012
10013 if (filename == NULL)
10014 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010015 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010016 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010017 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010018 if (inputStream->filename == NULL) {
10019 xmlFreeParserCtxt(ctxt);
10020 xmlFreeParserInputBuffer(buf);
10021 return(NULL);
10022 }
10023 }
Owen Taylor3473f882001-02-23 17:55:21 +000010024 inputStream->buf = buf;
10025 inputStream->base = inputStream->buf->buffer->content;
10026 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010027 inputStream->end =
10028 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010029
10030 inputPush(ctxt, inputStream);
10031
10032 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10033 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010034 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10035 int cur = ctxt->input->cur - ctxt->input->base;
10036
Owen Taylor3473f882001-02-23 17:55:21 +000010037 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010038
10039 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10040 ctxt->input->cur = ctxt->input->base + cur;
10041 ctxt->input->end =
10042 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010043#ifdef DEBUG_PUSH
10044 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10045#endif
10046 }
10047
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010048 if (enc != XML_CHAR_ENCODING_NONE) {
10049 xmlSwitchEncoding(ctxt, enc);
10050 }
10051
Owen Taylor3473f882001-02-23 17:55:21 +000010052 return(ctxt);
10053}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010054#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056/**
10057 * xmlCreateIOParserCtxt:
10058 * @sax: a SAX handler
10059 * @user_data: The user data returned on SAX callbacks
10060 * @ioread: an I/O read function
10061 * @ioclose: an I/O close function
10062 * @ioctx: an I/O handler
10063 * @enc: the charset encoding if known
10064 *
10065 * Create a parser context for using the XML parser with an existing
10066 * I/O stream
10067 *
10068 * Returns the new parser context or NULL
10069 */
10070xmlParserCtxtPtr
10071xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10072 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10073 void *ioctx, xmlCharEncoding enc) {
10074 xmlParserCtxtPtr ctxt;
10075 xmlParserInputPtr inputStream;
10076 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010077
10078 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010079
10080 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10081 if (buf == NULL) return(NULL);
10082
10083 ctxt = xmlNewParserCtxt();
10084 if (ctxt == NULL) {
10085 xmlFree(buf);
10086 return(NULL);
10087 }
10088 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010089#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010090 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010091#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010092 xmlFree(ctxt->sax);
10093 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10094 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010095 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010096 xmlFree(ctxt);
10097 return(NULL);
10098 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010099 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10100 if (sax->initialized == XML_SAX2_MAGIC)
10101 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10102 else
10103 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010104 if (user_data != NULL)
10105 ctxt->userData = user_data;
10106 }
10107
10108 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10109 if (inputStream == NULL) {
10110 xmlFreeParserCtxt(ctxt);
10111 return(NULL);
10112 }
10113 inputPush(ctxt, inputStream);
10114
10115 return(ctxt);
10116}
10117
Daniel Veillard4432df22003-09-28 18:58:27 +000010118#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010119/************************************************************************
10120 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010121 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010122 * *
10123 ************************************************************************/
10124
10125/**
10126 * xmlIOParseDTD:
10127 * @sax: the SAX handler block or NULL
10128 * @input: an Input Buffer
10129 * @enc: the charset encoding if known
10130 *
10131 * Load and parse a DTD
10132 *
10133 * Returns the resulting xmlDtdPtr or NULL in case of error.
10134 * @input will be freed at parsing end.
10135 */
10136
10137xmlDtdPtr
10138xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10139 xmlCharEncoding enc) {
10140 xmlDtdPtr ret = NULL;
10141 xmlParserCtxtPtr ctxt;
10142 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010143 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010144
10145 if (input == NULL)
10146 return(NULL);
10147
10148 ctxt = xmlNewParserCtxt();
10149 if (ctxt == NULL) {
10150 return(NULL);
10151 }
10152
10153 /*
10154 * Set-up the SAX context
10155 */
10156 if (sax != NULL) {
10157 if (ctxt->sax != NULL)
10158 xmlFree(ctxt->sax);
10159 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010160 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010162 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010163
10164 /*
10165 * generate a parser input from the I/O handler
10166 */
10167
Daniel Veillard43caefb2003-12-07 19:32:22 +000010168 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010169 if (pinput == NULL) {
10170 if (sax != NULL) ctxt->sax = NULL;
10171 xmlFreeParserCtxt(ctxt);
10172 return(NULL);
10173 }
10174
10175 /*
10176 * plug some encoding conversion routines here.
10177 */
10178 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010179 if (enc != XML_CHAR_ENCODING_NONE) {
10180 xmlSwitchEncoding(ctxt, enc);
10181 }
Owen Taylor3473f882001-02-23 17:55:21 +000010182
10183 pinput->filename = NULL;
10184 pinput->line = 1;
10185 pinput->col = 1;
10186 pinput->base = ctxt->input->cur;
10187 pinput->cur = ctxt->input->cur;
10188 pinput->free = NULL;
10189
10190 /*
10191 * let's parse that entity knowing it's an external subset.
10192 */
10193 ctxt->inSubset = 2;
10194 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10195 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10196 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010197
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010198 if ((enc == XML_CHAR_ENCODING_NONE) &&
10199 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010200 /*
10201 * Get the 4 first bytes and decode the charset
10202 * if enc != XML_CHAR_ENCODING_NONE
10203 * plug some encoding conversion routines.
10204 */
10205 start[0] = RAW;
10206 start[1] = NXT(1);
10207 start[2] = NXT(2);
10208 start[3] = NXT(3);
10209 enc = xmlDetectCharEncoding(start, 4);
10210 if (enc != XML_CHAR_ENCODING_NONE) {
10211 xmlSwitchEncoding(ctxt, enc);
10212 }
10213 }
10214
Owen Taylor3473f882001-02-23 17:55:21 +000010215 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10216
10217 if (ctxt->myDoc != NULL) {
10218 if (ctxt->wellFormed) {
10219 ret = ctxt->myDoc->extSubset;
10220 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010221 if (ret != NULL) {
10222 xmlNodePtr tmp;
10223
10224 ret->doc = NULL;
10225 tmp = ret->children;
10226 while (tmp != NULL) {
10227 tmp->doc = NULL;
10228 tmp = tmp->next;
10229 }
10230 }
Owen Taylor3473f882001-02-23 17:55:21 +000010231 } else {
10232 ret = NULL;
10233 }
10234 xmlFreeDoc(ctxt->myDoc);
10235 ctxt->myDoc = NULL;
10236 }
10237 if (sax != NULL) ctxt->sax = NULL;
10238 xmlFreeParserCtxt(ctxt);
10239
10240 return(ret);
10241}
10242
10243/**
10244 * xmlSAXParseDTD:
10245 * @sax: the SAX handler block
10246 * @ExternalID: a NAME* containing the External ID of the DTD
10247 * @SystemID: a NAME* containing the URL to the DTD
10248 *
10249 * Load and parse an external subset.
10250 *
10251 * Returns the resulting xmlDtdPtr or NULL in case of error.
10252 */
10253
10254xmlDtdPtr
10255xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10256 const xmlChar *SystemID) {
10257 xmlDtdPtr ret = NULL;
10258 xmlParserCtxtPtr ctxt;
10259 xmlParserInputPtr input = NULL;
10260 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010261 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010262
10263 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10264
10265 ctxt = xmlNewParserCtxt();
10266 if (ctxt == NULL) {
10267 return(NULL);
10268 }
10269
10270 /*
10271 * Set-up the SAX context
10272 */
10273 if (sax != NULL) {
10274 if (ctxt->sax != NULL)
10275 xmlFree(ctxt->sax);
10276 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010277 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010278 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010279
10280 /*
10281 * Canonicalise the system ID
10282 */
10283 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010284 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010285 xmlFreeParserCtxt(ctxt);
10286 return(NULL);
10287 }
Owen Taylor3473f882001-02-23 17:55:21 +000010288
10289 /*
10290 * Ask the Entity resolver to load the damn thing
10291 */
10292
10293 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010294 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010295 if (input == NULL) {
10296 if (sax != NULL) ctxt->sax = NULL;
10297 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010298 if (systemIdCanonic != NULL)
10299 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010300 return(NULL);
10301 }
10302
10303 /*
10304 * plug some encoding conversion routines here.
10305 */
10306 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010307 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10308 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10309 xmlSwitchEncoding(ctxt, enc);
10310 }
Owen Taylor3473f882001-02-23 17:55:21 +000010311
10312 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010313 input->filename = (char *) systemIdCanonic;
10314 else
10315 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 input->line = 1;
10317 input->col = 1;
10318 input->base = ctxt->input->cur;
10319 input->cur = ctxt->input->cur;
10320 input->free = NULL;
10321
10322 /*
10323 * let's parse that entity knowing it's an external subset.
10324 */
10325 ctxt->inSubset = 2;
10326 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10327 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10328 ExternalID, SystemID);
10329 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10330
10331 if (ctxt->myDoc != NULL) {
10332 if (ctxt->wellFormed) {
10333 ret = ctxt->myDoc->extSubset;
10334 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010335 if (ret != NULL) {
10336 xmlNodePtr tmp;
10337
10338 ret->doc = NULL;
10339 tmp = ret->children;
10340 while (tmp != NULL) {
10341 tmp->doc = NULL;
10342 tmp = tmp->next;
10343 }
10344 }
Owen Taylor3473f882001-02-23 17:55:21 +000010345 } else {
10346 ret = NULL;
10347 }
10348 xmlFreeDoc(ctxt->myDoc);
10349 ctxt->myDoc = NULL;
10350 }
10351 if (sax != NULL) ctxt->sax = NULL;
10352 xmlFreeParserCtxt(ctxt);
10353
10354 return(ret);
10355}
10356
Daniel Veillard4432df22003-09-28 18:58:27 +000010357
Owen Taylor3473f882001-02-23 17:55:21 +000010358/**
10359 * xmlParseDTD:
10360 * @ExternalID: a NAME* containing the External ID of the DTD
10361 * @SystemID: a NAME* containing the URL to the DTD
10362 *
10363 * Load and parse an external subset.
10364 *
10365 * Returns the resulting xmlDtdPtr or NULL in case of error.
10366 */
10367
10368xmlDtdPtr
10369xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10370 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10371}
Daniel Veillard4432df22003-09-28 18:58:27 +000010372#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010373
10374/************************************************************************
10375 * *
10376 * Front ends when parsing an Entity *
10377 * *
10378 ************************************************************************/
10379
10380/**
Owen Taylor3473f882001-02-23 17:55:21 +000010381 * xmlParseCtxtExternalEntity:
10382 * @ctx: the existing parsing context
10383 * @URL: the URL for the entity to load
10384 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010385 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010386 *
10387 * Parse an external general entity within an existing parsing context
10388 * An external general parsed entity is well-formed if it matches the
10389 * production labeled extParsedEnt.
10390 *
10391 * [78] extParsedEnt ::= TextDecl? content
10392 *
10393 * Returns 0 if the entity is well formed, -1 in case of args problem and
10394 * the parser error code otherwise
10395 */
10396
10397int
10398xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010399 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010400 xmlParserCtxtPtr ctxt;
10401 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010402 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010403 xmlSAXHandlerPtr oldsax = NULL;
10404 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010405 xmlChar start[4];
10406 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010407
Daniel Veillardce682bc2004-11-05 17:22:25 +000010408 if (ctx == NULL) return(-1);
10409
Owen Taylor3473f882001-02-23 17:55:21 +000010410 if (ctx->depth > 40) {
10411 return(XML_ERR_ENTITY_LOOP);
10412 }
10413
Daniel Veillardcda96922001-08-21 10:56:31 +000010414 if (lst != NULL)
10415 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010416 if ((URL == NULL) && (ID == NULL))
10417 return(-1);
10418 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10419 return(-1);
10420
10421
10422 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10423 if (ctxt == NULL) return(-1);
10424 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010425 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010426 oldsax = ctxt->sax;
10427 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010428 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010429 newDoc = xmlNewDoc(BAD_CAST "1.0");
10430 if (newDoc == NULL) {
10431 xmlFreeParserCtxt(ctxt);
10432 return(-1);
10433 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010434 if (ctx->myDoc->dict) {
10435 newDoc->dict = ctx->myDoc->dict;
10436 xmlDictReference(newDoc->dict);
10437 }
Owen Taylor3473f882001-02-23 17:55:21 +000010438 if (ctx->myDoc != NULL) {
10439 newDoc->intSubset = ctx->myDoc->intSubset;
10440 newDoc->extSubset = ctx->myDoc->extSubset;
10441 }
10442 if (ctx->myDoc->URL != NULL) {
10443 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10444 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010445 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10446 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010447 ctxt->sax = oldsax;
10448 xmlFreeParserCtxt(ctxt);
10449 newDoc->intSubset = NULL;
10450 newDoc->extSubset = NULL;
10451 xmlFreeDoc(newDoc);
10452 return(-1);
10453 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010454 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010455 nodePush(ctxt, newDoc->children);
10456 if (ctx->myDoc == NULL) {
10457 ctxt->myDoc = newDoc;
10458 } else {
10459 ctxt->myDoc = ctx->myDoc;
10460 newDoc->children->doc = ctx->myDoc;
10461 }
10462
Daniel Veillard87a764e2001-06-20 17:41:10 +000010463 /*
10464 * Get the 4 first bytes and decode the charset
10465 * if enc != XML_CHAR_ENCODING_NONE
10466 * plug some encoding conversion routines.
10467 */
10468 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010469 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10470 start[0] = RAW;
10471 start[1] = NXT(1);
10472 start[2] = NXT(2);
10473 start[3] = NXT(3);
10474 enc = xmlDetectCharEncoding(start, 4);
10475 if (enc != XML_CHAR_ENCODING_NONE) {
10476 xmlSwitchEncoding(ctxt, enc);
10477 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010478 }
10479
Owen Taylor3473f882001-02-23 17:55:21 +000010480 /*
10481 * Parse a possible text declaration first
10482 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010483 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010484 xmlParseTextDecl(ctxt);
10485 }
10486
10487 /*
10488 * Doing validity checking on chunk doesn't make sense
10489 */
10490 ctxt->instate = XML_PARSER_CONTENT;
10491 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010492 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010493 ctxt->loadsubset = ctx->loadsubset;
10494 ctxt->depth = ctx->depth + 1;
10495 ctxt->replaceEntities = ctx->replaceEntities;
10496 if (ctxt->validate) {
10497 ctxt->vctxt.error = ctx->vctxt.error;
10498 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010499 } else {
10500 ctxt->vctxt.error = NULL;
10501 ctxt->vctxt.warning = NULL;
10502 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010503 ctxt->vctxt.nodeTab = NULL;
10504 ctxt->vctxt.nodeNr = 0;
10505 ctxt->vctxt.nodeMax = 0;
10506 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010507 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10508 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010509 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10510 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10511 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010512 ctxt->dictNames = ctx->dictNames;
10513 ctxt->attsDefault = ctx->attsDefault;
10514 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010515 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010516
10517 xmlParseContent(ctxt);
10518
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010519 ctx->validate = ctxt->validate;
10520 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010521 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010522 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010523 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010524 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010525 }
10526 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010527 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010528 }
10529
10530 if (!ctxt->wellFormed) {
10531 if (ctxt->errNo == 0)
10532 ret = 1;
10533 else
10534 ret = ctxt->errNo;
10535 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010536 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010537 xmlNodePtr cur;
10538
10539 /*
10540 * Return the newly created nodeset after unlinking it from
10541 * they pseudo parent.
10542 */
10543 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010544 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010545 while (cur != NULL) {
10546 cur->parent = NULL;
10547 cur = cur->next;
10548 }
10549 newDoc->children->children = NULL;
10550 }
10551 ret = 0;
10552 }
10553 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010554 ctxt->dict = NULL;
10555 ctxt->attsDefault = NULL;
10556 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010557 xmlFreeParserCtxt(ctxt);
10558 newDoc->intSubset = NULL;
10559 newDoc->extSubset = NULL;
10560 xmlFreeDoc(newDoc);
10561
10562 return(ret);
10563}
10564
10565/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010566 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010567 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010568 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010569 * @sax: the SAX handler bloc (possibly NULL)
10570 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10571 * @depth: Used for loop detection, use 0
10572 * @URL: the URL for the entity to load
10573 * @ID: the System ID for the entity to load
10574 * @list: the return value for the set of parsed nodes
10575 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010576 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010577 *
10578 * Returns 0 if the entity is well formed, -1 in case of args problem and
10579 * the parser error code otherwise
10580 */
10581
Daniel Veillard7d515752003-09-26 19:12:37 +000010582static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010583xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10584 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010585 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010586 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010587 xmlParserCtxtPtr ctxt;
10588 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010589 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010590 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010591 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010592 xmlChar start[4];
10593 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010594
10595 if (depth > 40) {
10596 return(XML_ERR_ENTITY_LOOP);
10597 }
10598
10599
10600
10601 if (list != NULL)
10602 *list = NULL;
10603 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010604 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010605 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010606 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010607
10608
10609 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010610 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010611 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010612 if (oldctxt != NULL) {
10613 ctxt->_private = oldctxt->_private;
10614 ctxt->loadsubset = oldctxt->loadsubset;
10615 ctxt->validate = oldctxt->validate;
10616 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010617 ctxt->record_info = oldctxt->record_info;
10618 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10619 ctxt->node_seq.length = oldctxt->node_seq.length;
10620 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010621 } else {
10622 /*
10623 * Doing validity checking on chunk without context
10624 * doesn't make sense
10625 */
10626 ctxt->_private = NULL;
10627 ctxt->validate = 0;
10628 ctxt->external = 2;
10629 ctxt->loadsubset = 0;
10630 }
Owen Taylor3473f882001-02-23 17:55:21 +000010631 if (sax != NULL) {
10632 oldsax = ctxt->sax;
10633 ctxt->sax = sax;
10634 if (user_data != NULL)
10635 ctxt->userData = user_data;
10636 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010637 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010638 newDoc = xmlNewDoc(BAD_CAST "1.0");
10639 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010640 ctxt->node_seq.maximum = 0;
10641 ctxt->node_seq.length = 0;
10642 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010643 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010644 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010645 }
10646 if (doc != NULL) {
10647 newDoc->intSubset = doc->intSubset;
10648 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010649 newDoc->dict = doc->dict;
10650 } else if (oldctxt != NULL) {
10651 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010652 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010653 xmlDictReference(newDoc->dict);
10654
Owen Taylor3473f882001-02-23 17:55:21 +000010655 if (doc->URL != NULL) {
10656 newDoc->URL = xmlStrdup(doc->URL);
10657 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010658 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10659 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010660 if (sax != NULL)
10661 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010662 ctxt->node_seq.maximum = 0;
10663 ctxt->node_seq.length = 0;
10664 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010665 xmlFreeParserCtxt(ctxt);
10666 newDoc->intSubset = NULL;
10667 newDoc->extSubset = NULL;
10668 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010669 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010670 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010671 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010672 nodePush(ctxt, newDoc->children);
10673 if (doc == NULL) {
10674 ctxt->myDoc = newDoc;
10675 } else {
10676 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010677 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010678 }
10679
Daniel Veillard87a764e2001-06-20 17:41:10 +000010680 /*
10681 * Get the 4 first bytes and decode the charset
10682 * if enc != XML_CHAR_ENCODING_NONE
10683 * plug some encoding conversion routines.
10684 */
10685 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010686 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10687 start[0] = RAW;
10688 start[1] = NXT(1);
10689 start[2] = NXT(2);
10690 start[3] = NXT(3);
10691 enc = xmlDetectCharEncoding(start, 4);
10692 if (enc != XML_CHAR_ENCODING_NONE) {
10693 xmlSwitchEncoding(ctxt, enc);
10694 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010695 }
10696
Owen Taylor3473f882001-02-23 17:55:21 +000010697 /*
10698 * Parse a possible text declaration first
10699 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010700 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010701 xmlParseTextDecl(ctxt);
10702 }
10703
Owen Taylor3473f882001-02-23 17:55:21 +000010704 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010705 ctxt->depth = depth;
10706
10707 xmlParseContent(ctxt);
10708
Daniel Veillard561b7f82002-03-20 21:55:57 +000010709 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010710 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010711 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010712 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010713 }
10714 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010715 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010716 }
10717
10718 if (!ctxt->wellFormed) {
10719 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010720 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010721 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010722 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010723 } else {
10724 if (list != NULL) {
10725 xmlNodePtr cur;
10726
10727 /*
10728 * Return the newly created nodeset after unlinking it from
10729 * they pseudo parent.
10730 */
10731 cur = newDoc->children->children;
10732 *list = cur;
10733 while (cur != NULL) {
10734 cur->parent = NULL;
10735 cur = cur->next;
10736 }
10737 newDoc->children->children = NULL;
10738 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010739 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010740 }
10741 if (sax != NULL)
10742 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010743 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10744 oldctxt->node_seq.length = ctxt->node_seq.length;
10745 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010746 ctxt->node_seq.maximum = 0;
10747 ctxt->node_seq.length = 0;
10748 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010749 xmlFreeParserCtxt(ctxt);
10750 newDoc->intSubset = NULL;
10751 newDoc->extSubset = NULL;
10752 xmlFreeDoc(newDoc);
10753
10754 return(ret);
10755}
10756
Daniel Veillard81273902003-09-30 00:43:48 +000010757#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010758/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010759 * xmlParseExternalEntity:
10760 * @doc: the document the chunk pertains to
10761 * @sax: the SAX handler bloc (possibly NULL)
10762 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10763 * @depth: Used for loop detection, use 0
10764 * @URL: the URL for the entity to load
10765 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010766 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010767 *
10768 * Parse an external general entity
10769 * An external general parsed entity is well-formed if it matches the
10770 * production labeled extParsedEnt.
10771 *
10772 * [78] extParsedEnt ::= TextDecl? content
10773 *
10774 * Returns 0 if the entity is well formed, -1 in case of args problem and
10775 * the parser error code otherwise
10776 */
10777
10778int
10779xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010780 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010781 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010782 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010783}
10784
10785/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010786 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010787 * @doc: the document the chunk pertains to
10788 * @sax: the SAX handler bloc (possibly NULL)
10789 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10790 * @depth: Used for loop detection, use 0
10791 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010792 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010793 *
10794 * Parse a well-balanced chunk of an XML document
10795 * called by the parser
10796 * The allowed sequence for the Well Balanced Chunk is the one defined by
10797 * the content production in the XML grammar:
10798 *
10799 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10800 *
10801 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10802 * the parser error code otherwise
10803 */
10804
10805int
10806xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010807 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010808 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10809 depth, string, lst, 0 );
10810}
Daniel Veillard81273902003-09-30 00:43:48 +000010811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010812
10813/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010814 * xmlParseBalancedChunkMemoryInternal:
10815 * @oldctxt: the existing parsing context
10816 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10817 * @user_data: the user data field for the parser context
10818 * @lst: the return value for the set of parsed nodes
10819 *
10820 *
10821 * Parse a well-balanced chunk of an XML document
10822 * called by the parser
10823 * The allowed sequence for the Well Balanced Chunk is the one defined by
10824 * the content production in the XML grammar:
10825 *
10826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10827 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010828 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10829 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010830 *
10831 * In case recover is set to 1, the nodelist will not be empty even if
10832 * the parsed chunk is not well balanced.
10833 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010834static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010835xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10836 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10837 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010838 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010839 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010840 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010841 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010842 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010843 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010844 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010845
10846 if (oldctxt->depth > 40) {
10847 return(XML_ERR_ENTITY_LOOP);
10848 }
10849
10850
10851 if (lst != NULL)
10852 *lst = NULL;
10853 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010854 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010855
10856 size = xmlStrlen(string);
10857
10858 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010859 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010860 if (user_data != NULL)
10861 ctxt->userData = user_data;
10862 else
10863 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010864 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10865 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010866 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10867 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10868 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010869
10870 oldsax = ctxt->sax;
10871 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010872 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010873 ctxt->replaceEntities = oldctxt->replaceEntities;
10874 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010875
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010876 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010877 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010878 newDoc = xmlNewDoc(BAD_CAST "1.0");
10879 if (newDoc == NULL) {
10880 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010881 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010882 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010883 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010884 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010885 newDoc->dict = ctxt->dict;
10886 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010887 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010888 } else {
10889 ctxt->myDoc = oldctxt->myDoc;
10890 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010891 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010892 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010893 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10894 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010895 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010896 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010897 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010898 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010899 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010900 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010901 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010902 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010903 ctxt->myDoc->children = NULL;
10904 ctxt->myDoc->last = NULL;
10905 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010906 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010907 ctxt->instate = XML_PARSER_CONTENT;
10908 ctxt->depth = oldctxt->depth + 1;
10909
Daniel Veillard328f48c2002-11-15 15:24:34 +000010910 ctxt->validate = 0;
10911 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010912 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10913 /*
10914 * ID/IDREF registration will be done in xmlValidateElement below
10915 */
10916 ctxt->loadsubset |= XML_SKIP_IDS;
10917 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010918 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010919 ctxt->attsDefault = oldctxt->attsDefault;
10920 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010921
Daniel Veillard68e9e742002-11-16 15:35:11 +000010922 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010923 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010924 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010925 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010926 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010927 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010928 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010929 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010930 }
10931
10932 if (!ctxt->wellFormed) {
10933 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010934 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010935 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010936 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010937 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010938 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010939 }
10940
William M. Brack7b9154b2003-09-27 19:23:50 +000010941 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010942 xmlNodePtr cur;
10943
10944 /*
10945 * Return the newly created nodeset after unlinking it from
10946 * they pseudo parent.
10947 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010948 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010949 *lst = cur;
10950 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010951#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010952 if (oldctxt->validate && oldctxt->wellFormed &&
10953 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10954 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10955 oldctxt->myDoc, cur);
10956 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010957#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010958 cur->parent = NULL;
10959 cur = cur->next;
10960 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010961 ctxt->myDoc->children->children = NULL;
10962 }
10963 if (ctxt->myDoc != NULL) {
10964 xmlFreeNode(ctxt->myDoc->children);
10965 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010966 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010967 }
10968
10969 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010970 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010971 ctxt->attsDefault = NULL;
10972 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010973 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010974 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010975 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010976 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010977
10978 return(ret);
10979}
10980
Daniel Veillard29b17482004-08-16 00:39:03 +000010981/**
10982 * xmlParseInNodeContext:
10983 * @node: the context node
10984 * @data: the input string
10985 * @datalen: the input string length in bytes
10986 * @options: a combination of xmlParserOption
10987 * @lst: the return value for the set of parsed nodes
10988 *
10989 * Parse a well-balanced chunk of an XML document
10990 * within the context (DTD, namespaces, etc ...) of the given node.
10991 *
10992 * The allowed sequence for the data is a Well Balanced Chunk defined by
10993 * the content production in the XML grammar:
10994 *
10995 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10996 *
10997 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10998 * error code otherwise
10999 */
11000xmlParserErrors
11001xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11002 int options, xmlNodePtr *lst) {
11003#ifdef SAX2
11004 xmlParserCtxtPtr ctxt;
11005 xmlDocPtr doc = NULL;
11006 xmlNodePtr fake, cur;
11007 int nsnr = 0;
11008
11009 xmlParserErrors ret = XML_ERR_OK;
11010
11011 /*
11012 * check all input parameters, grab the document
11013 */
11014 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11015 return(XML_ERR_INTERNAL_ERROR);
11016 switch (node->type) {
11017 case XML_ELEMENT_NODE:
11018 case XML_ATTRIBUTE_NODE:
11019 case XML_TEXT_NODE:
11020 case XML_CDATA_SECTION_NODE:
11021 case XML_ENTITY_REF_NODE:
11022 case XML_PI_NODE:
11023 case XML_COMMENT_NODE:
11024 case XML_DOCUMENT_NODE:
11025 case XML_HTML_DOCUMENT_NODE:
11026 break;
11027 default:
11028 return(XML_ERR_INTERNAL_ERROR);
11029
11030 }
11031 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11032 (node->type != XML_DOCUMENT_NODE) &&
11033 (node->type != XML_HTML_DOCUMENT_NODE))
11034 node = node->parent;
11035 if (node == NULL)
11036 return(XML_ERR_INTERNAL_ERROR);
11037 if (node->type == XML_ELEMENT_NODE)
11038 doc = node->doc;
11039 else
11040 doc = (xmlDocPtr) node;
11041 if (doc == NULL)
11042 return(XML_ERR_INTERNAL_ERROR);
11043
11044 /*
11045 * allocate a context and set-up everything not related to the
11046 * node position in the tree
11047 */
11048 if (doc->type == XML_DOCUMENT_NODE)
11049 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11050#ifdef LIBXML_HTML_ENABLED
11051 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11052 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11053#endif
11054 else
11055 return(XML_ERR_INTERNAL_ERROR);
11056
11057 if (ctxt == NULL)
11058 return(XML_ERR_NO_MEMORY);
11059 fake = xmlNewComment(NULL);
11060 if (fake == NULL) {
11061 xmlFreeParserCtxt(ctxt);
11062 return(XML_ERR_NO_MEMORY);
11063 }
11064 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011065
11066 /*
11067 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11068 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11069 * we must wait until the last moment to free the original one.
11070 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011071 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011072 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011073 xmlDictFree(ctxt->dict);
11074 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011075 } else
11076 options |= XML_PARSE_NODICT;
11077
11078 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011079 xmlDetectSAX2(ctxt);
11080 ctxt->myDoc = doc;
11081
11082 if (node->type == XML_ELEMENT_NODE) {
11083 nodePush(ctxt, node);
11084 /*
11085 * initialize the SAX2 namespaces stack
11086 */
11087 cur = node;
11088 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11089 xmlNsPtr ns = cur->nsDef;
11090 const xmlChar *iprefix, *ihref;
11091
11092 while (ns != NULL) {
11093 if (ctxt->dict) {
11094 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11095 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11096 } else {
11097 iprefix = ns->prefix;
11098 ihref = ns->href;
11099 }
11100
11101 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11102 nsPush(ctxt, iprefix, ihref);
11103 nsnr++;
11104 }
11105 ns = ns->next;
11106 }
11107 cur = cur->parent;
11108 }
11109 ctxt->instate = XML_PARSER_CONTENT;
11110 }
11111
11112 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11113 /*
11114 * ID/IDREF registration will be done in xmlValidateElement below
11115 */
11116 ctxt->loadsubset |= XML_SKIP_IDS;
11117 }
11118
11119 xmlParseContent(ctxt);
11120 nsPop(ctxt, nsnr);
11121 if ((RAW == '<') && (NXT(1) == '/')) {
11122 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11123 } else if (RAW != 0) {
11124 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11125 }
11126 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11127 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11128 ctxt->wellFormed = 0;
11129 }
11130
11131 if (!ctxt->wellFormed) {
11132 if (ctxt->errNo == 0)
11133 ret = XML_ERR_INTERNAL_ERROR;
11134 else
11135 ret = (xmlParserErrors)ctxt->errNo;
11136 } else {
11137 ret = XML_ERR_OK;
11138 }
11139
11140 /*
11141 * Return the newly created nodeset after unlinking it from
11142 * the pseudo sibling.
11143 */
11144
11145 cur = fake->next;
11146 fake->next = NULL;
11147 node->last = fake;
11148
11149 if (cur != NULL) {
11150 cur->prev = NULL;
11151 }
11152
11153 *lst = cur;
11154
11155 while (cur != NULL) {
11156 cur->parent = NULL;
11157 cur = cur->next;
11158 }
11159
11160 xmlUnlinkNode(fake);
11161 xmlFreeNode(fake);
11162
11163
11164 if (ret != XML_ERR_OK) {
11165 xmlFreeNodeList(*lst);
11166 *lst = NULL;
11167 }
William M. Brackc3f81342004-10-03 01:22:44 +000011168
William M. Brackb7b54de2004-10-06 16:38:01 +000011169 if (doc->dict != NULL)
11170 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011171 xmlFreeParserCtxt(ctxt);
11172
11173 return(ret);
11174#else /* !SAX2 */
11175 return(XML_ERR_INTERNAL_ERROR);
11176#endif
11177}
11178
Daniel Veillard81273902003-09-30 00:43:48 +000011179#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011180/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011181 * xmlParseBalancedChunkMemoryRecover:
11182 * @doc: the document the chunk pertains to
11183 * @sax: the SAX handler bloc (possibly NULL)
11184 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11185 * @depth: Used for loop detection, use 0
11186 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11187 * @lst: the return value for the set of parsed nodes
11188 * @recover: return nodes even if the data is broken (use 0)
11189 *
11190 *
11191 * Parse a well-balanced chunk of an XML document
11192 * called by the parser
11193 * The allowed sequence for the Well Balanced Chunk is the one defined by
11194 * the content production in the XML grammar:
11195 *
11196 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11197 *
11198 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11199 * the parser error code otherwise
11200 *
11201 * In case recover is set to 1, the nodelist will not be empty even if
11202 * the parsed chunk is not well balanced.
11203 */
11204int
11205xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11206 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11207 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011208 xmlParserCtxtPtr ctxt;
11209 xmlDocPtr newDoc;
11210 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011211 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011212 int size;
11213 int ret = 0;
11214
11215 if (depth > 40) {
11216 return(XML_ERR_ENTITY_LOOP);
11217 }
11218
11219
Daniel Veillardcda96922001-08-21 10:56:31 +000011220 if (lst != NULL)
11221 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011222 if (string == NULL)
11223 return(-1);
11224
11225 size = xmlStrlen(string);
11226
11227 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11228 if (ctxt == NULL) return(-1);
11229 ctxt->userData = ctxt;
11230 if (sax != NULL) {
11231 oldsax = ctxt->sax;
11232 ctxt->sax = sax;
11233 if (user_data != NULL)
11234 ctxt->userData = user_data;
11235 }
11236 newDoc = xmlNewDoc(BAD_CAST "1.0");
11237 if (newDoc == NULL) {
11238 xmlFreeParserCtxt(ctxt);
11239 return(-1);
11240 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011241 if ((doc != NULL) && (doc->dict != NULL)) {
11242 xmlDictFree(ctxt->dict);
11243 ctxt->dict = doc->dict;
11244 xmlDictReference(ctxt->dict);
11245 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11246 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11247 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11248 ctxt->dictNames = 1;
11249 } else {
11250 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11251 }
Owen Taylor3473f882001-02-23 17:55:21 +000011252 if (doc != NULL) {
11253 newDoc->intSubset = doc->intSubset;
11254 newDoc->extSubset = doc->extSubset;
11255 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011256 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11257 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011258 if (sax != NULL)
11259 ctxt->sax = oldsax;
11260 xmlFreeParserCtxt(ctxt);
11261 newDoc->intSubset = NULL;
11262 newDoc->extSubset = NULL;
11263 xmlFreeDoc(newDoc);
11264 return(-1);
11265 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011266 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11267 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011268 if (doc == NULL) {
11269 ctxt->myDoc = newDoc;
11270 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011271 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011272 newDoc->children->doc = doc;
11273 }
11274 ctxt->instate = XML_PARSER_CONTENT;
11275 ctxt->depth = depth;
11276
11277 /*
11278 * Doing validity checking on chunk doesn't make sense
11279 */
11280 ctxt->validate = 0;
11281 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011282 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011283
Daniel Veillardb39bc392002-10-26 19:29:51 +000011284 if ( doc != NULL ){
11285 content = doc->children;
11286 doc->children = NULL;
11287 xmlParseContent(ctxt);
11288 doc->children = content;
11289 }
11290 else {
11291 xmlParseContent(ctxt);
11292 }
Owen Taylor3473f882001-02-23 17:55:21 +000011293 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011295 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011296 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011297 }
11298 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011299 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011300 }
11301
11302 if (!ctxt->wellFormed) {
11303 if (ctxt->errNo == 0)
11304 ret = 1;
11305 else
11306 ret = ctxt->errNo;
11307 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011308 ret = 0;
11309 }
11310
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011311 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11312 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011313
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011314 /*
11315 * Return the newly created nodeset after unlinking it from
11316 * they pseudo parent.
11317 */
11318 cur = newDoc->children->children;
11319 *lst = cur;
11320 while (cur != NULL) {
11321 xmlSetTreeDoc(cur, doc);
11322 cur->parent = NULL;
11323 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011324 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011325 newDoc->children->children = NULL;
11326 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011327
Owen Taylor3473f882001-02-23 17:55:21 +000011328 if (sax != NULL)
11329 ctxt->sax = oldsax;
11330 xmlFreeParserCtxt(ctxt);
11331 newDoc->intSubset = NULL;
11332 newDoc->extSubset = NULL;
11333 xmlFreeDoc(newDoc);
11334
11335 return(ret);
11336}
11337
11338/**
11339 * xmlSAXParseEntity:
11340 * @sax: the SAX handler block
11341 * @filename: the filename
11342 *
11343 * parse an XML external entity out of context and build a tree.
11344 * It use the given SAX function block to handle the parsing callback.
11345 * If sax is NULL, fallback to the default DOM tree building routines.
11346 *
11347 * [78] extParsedEnt ::= TextDecl? content
11348 *
11349 * This correspond to a "Well Balanced" chunk
11350 *
11351 * Returns the resulting document tree
11352 */
11353
11354xmlDocPtr
11355xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11356 xmlDocPtr ret;
11357 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011358
11359 ctxt = xmlCreateFileParserCtxt(filename);
11360 if (ctxt == NULL) {
11361 return(NULL);
11362 }
11363 if (sax != NULL) {
11364 if (ctxt->sax != NULL)
11365 xmlFree(ctxt->sax);
11366 ctxt->sax = sax;
11367 ctxt->userData = NULL;
11368 }
11369
Owen Taylor3473f882001-02-23 17:55:21 +000011370 xmlParseExtParsedEnt(ctxt);
11371
11372 if (ctxt->wellFormed)
11373 ret = ctxt->myDoc;
11374 else {
11375 ret = NULL;
11376 xmlFreeDoc(ctxt->myDoc);
11377 ctxt->myDoc = NULL;
11378 }
11379 if (sax != NULL)
11380 ctxt->sax = NULL;
11381 xmlFreeParserCtxt(ctxt);
11382
11383 return(ret);
11384}
11385
11386/**
11387 * xmlParseEntity:
11388 * @filename: the filename
11389 *
11390 * parse an XML external entity out of context and build a tree.
11391 *
11392 * [78] extParsedEnt ::= TextDecl? content
11393 *
11394 * This correspond to a "Well Balanced" chunk
11395 *
11396 * Returns the resulting document tree
11397 */
11398
11399xmlDocPtr
11400xmlParseEntity(const char *filename) {
11401 return(xmlSAXParseEntity(NULL, filename));
11402}
Daniel Veillard81273902003-09-30 00:43:48 +000011403#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011404
11405/**
11406 * xmlCreateEntityParserCtxt:
11407 * @URL: the entity URL
11408 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011409 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011410 *
11411 * Create a parser context for an external entity
11412 * Automatic support for ZLIB/Compress compressed document is provided
11413 * by default if found at compile-time.
11414 *
11415 * Returns the new parser context or NULL
11416 */
11417xmlParserCtxtPtr
11418xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11419 const xmlChar *base) {
11420 xmlParserCtxtPtr ctxt;
11421 xmlParserInputPtr inputStream;
11422 char *directory = NULL;
11423 xmlChar *uri;
11424
11425 ctxt = xmlNewParserCtxt();
11426 if (ctxt == NULL) {
11427 return(NULL);
11428 }
11429
11430 uri = xmlBuildURI(URL, base);
11431
11432 if (uri == NULL) {
11433 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11434 if (inputStream == NULL) {
11435 xmlFreeParserCtxt(ctxt);
11436 return(NULL);
11437 }
11438
11439 inputPush(ctxt, inputStream);
11440
11441 if ((ctxt->directory == NULL) && (directory == NULL))
11442 directory = xmlParserGetDirectory((char *)URL);
11443 if ((ctxt->directory == NULL) && (directory != NULL))
11444 ctxt->directory = directory;
11445 } else {
11446 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11447 if (inputStream == NULL) {
11448 xmlFree(uri);
11449 xmlFreeParserCtxt(ctxt);
11450 return(NULL);
11451 }
11452
11453 inputPush(ctxt, inputStream);
11454
11455 if ((ctxt->directory == NULL) && (directory == NULL))
11456 directory = xmlParserGetDirectory((char *)uri);
11457 if ((ctxt->directory == NULL) && (directory != NULL))
11458 ctxt->directory = directory;
11459 xmlFree(uri);
11460 }
Owen Taylor3473f882001-02-23 17:55:21 +000011461 return(ctxt);
11462}
11463
11464/************************************************************************
11465 * *
11466 * Front ends when parsing from a file *
11467 * *
11468 ************************************************************************/
11469
11470/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011471 * xmlCreateURLParserCtxt:
11472 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011473 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011474 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011475 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011476 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011477 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011478 *
11479 * Returns the new parser context or NULL
11480 */
11481xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011482xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011483{
11484 xmlParserCtxtPtr ctxt;
11485 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011486 char *directory = NULL;
11487
Owen Taylor3473f882001-02-23 17:55:21 +000011488 ctxt = xmlNewParserCtxt();
11489 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011490 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011491 return(NULL);
11492 }
11493
Daniel Veillard61b93382003-11-03 14:28:31 +000011494 if (options != 0)
11495 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011496
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011497 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011498 if (inputStream == NULL) {
11499 xmlFreeParserCtxt(ctxt);
11500 return(NULL);
11501 }
11502
Owen Taylor3473f882001-02-23 17:55:21 +000011503 inputPush(ctxt, inputStream);
11504 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011505 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011506 if ((ctxt->directory == NULL) && (directory != NULL))
11507 ctxt->directory = directory;
11508
11509 return(ctxt);
11510}
11511
Daniel Veillard61b93382003-11-03 14:28:31 +000011512/**
11513 * xmlCreateFileParserCtxt:
11514 * @filename: the filename
11515 *
11516 * Create a parser context for a file content.
11517 * Automatic support for ZLIB/Compress compressed document is provided
11518 * by default if found at compile-time.
11519 *
11520 * Returns the new parser context or NULL
11521 */
11522xmlParserCtxtPtr
11523xmlCreateFileParserCtxt(const char *filename)
11524{
11525 return(xmlCreateURLParserCtxt(filename, 0));
11526}
11527
Daniel Veillard81273902003-09-30 00:43:48 +000011528#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011529/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011530 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011531 * @sax: the SAX handler block
11532 * @filename: the filename
11533 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11534 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011535 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011536 *
11537 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11538 * compressed document is provided by default if found at compile-time.
11539 * It use the given SAX function block to handle the parsing callback.
11540 * If sax is NULL, fallback to the default DOM tree building routines.
11541 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011542 * User data (void *) is stored within the parser context in the
11543 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011544 *
Owen Taylor3473f882001-02-23 17:55:21 +000011545 * Returns the resulting document tree
11546 */
11547
11548xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011549xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11550 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011551 xmlDocPtr ret;
11552 xmlParserCtxtPtr ctxt;
11553 char *directory = NULL;
11554
Daniel Veillard635ef722001-10-29 11:48:19 +000011555 xmlInitParser();
11556
Owen Taylor3473f882001-02-23 17:55:21 +000011557 ctxt = xmlCreateFileParserCtxt(filename);
11558 if (ctxt == NULL) {
11559 return(NULL);
11560 }
11561 if (sax != NULL) {
11562 if (ctxt->sax != NULL)
11563 xmlFree(ctxt->sax);
11564 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011565 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011566 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011567 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011568 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011569 }
Owen Taylor3473f882001-02-23 17:55:21 +000011570
11571 if ((ctxt->directory == NULL) && (directory == NULL))
11572 directory = xmlParserGetDirectory(filename);
11573 if ((ctxt->directory == NULL) && (directory != NULL))
11574 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11575
Daniel Veillarddad3f682002-11-17 16:47:27 +000011576 ctxt->recovery = recovery;
11577
Owen Taylor3473f882001-02-23 17:55:21 +000011578 xmlParseDocument(ctxt);
11579
William M. Brackc07329e2003-09-08 01:57:30 +000011580 if ((ctxt->wellFormed) || recovery) {
11581 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011582 if (ret != NULL) {
11583 if (ctxt->input->buf->compressed > 0)
11584 ret->compression = 9;
11585 else
11586 ret->compression = ctxt->input->buf->compressed;
11587 }
William M. Brackc07329e2003-09-08 01:57:30 +000011588 }
Owen Taylor3473f882001-02-23 17:55:21 +000011589 else {
11590 ret = NULL;
11591 xmlFreeDoc(ctxt->myDoc);
11592 ctxt->myDoc = NULL;
11593 }
11594 if (sax != NULL)
11595 ctxt->sax = NULL;
11596 xmlFreeParserCtxt(ctxt);
11597
11598 return(ret);
11599}
11600
11601/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011602 * xmlSAXParseFile:
11603 * @sax: the SAX handler block
11604 * @filename: the filename
11605 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11606 * documents
11607 *
11608 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11609 * compressed document is provided by default if found at compile-time.
11610 * It use the given SAX function block to handle the parsing callback.
11611 * If sax is NULL, fallback to the default DOM tree building routines.
11612 *
11613 * Returns the resulting document tree
11614 */
11615
11616xmlDocPtr
11617xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11618 int recovery) {
11619 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11620}
11621
11622/**
Owen Taylor3473f882001-02-23 17:55:21 +000011623 * xmlRecoverDoc:
11624 * @cur: a pointer to an array of xmlChar
11625 *
11626 * parse an XML in-memory document and build a tree.
11627 * In the case the document is not Well Formed, a tree is built anyway
11628 *
11629 * Returns the resulting document tree
11630 */
11631
11632xmlDocPtr
11633xmlRecoverDoc(xmlChar *cur) {
11634 return(xmlSAXParseDoc(NULL, cur, 1));
11635}
11636
11637/**
11638 * xmlParseFile:
11639 * @filename: the filename
11640 *
11641 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11642 * compressed document is provided by default if found at compile-time.
11643 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011644 * Returns the resulting document tree if the file was wellformed,
11645 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011646 */
11647
11648xmlDocPtr
11649xmlParseFile(const char *filename) {
11650 return(xmlSAXParseFile(NULL, filename, 0));
11651}
11652
11653/**
11654 * xmlRecoverFile:
11655 * @filename: the filename
11656 *
11657 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11658 * compressed document is provided by default if found at compile-time.
11659 * In the case the document is not Well Formed, a tree is built anyway
11660 *
11661 * Returns the resulting document tree
11662 */
11663
11664xmlDocPtr
11665xmlRecoverFile(const char *filename) {
11666 return(xmlSAXParseFile(NULL, filename, 1));
11667}
11668
11669
11670/**
11671 * xmlSetupParserForBuffer:
11672 * @ctxt: an XML parser context
11673 * @buffer: a xmlChar * buffer
11674 * @filename: a file name
11675 *
11676 * Setup the parser context to parse a new buffer; Clears any prior
11677 * contents from the parser context. The buffer parameter must not be
11678 * NULL, but the filename parameter can be
11679 */
11680void
11681xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11682 const char* filename)
11683{
11684 xmlParserInputPtr input;
11685
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011686 if ((ctxt == NULL) || (buffer == NULL))
11687 return;
11688
Owen Taylor3473f882001-02-23 17:55:21 +000011689 input = xmlNewInputStream(ctxt);
11690 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011691 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011692 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011693 return;
11694 }
11695
11696 xmlClearParserCtxt(ctxt);
11697 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011698 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011699 input->base = buffer;
11700 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011701 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011702 inputPush(ctxt, input);
11703}
11704
11705/**
11706 * xmlSAXUserParseFile:
11707 * @sax: a SAX handler
11708 * @user_data: The user data returned on SAX callbacks
11709 * @filename: a file name
11710 *
11711 * parse an XML file and call the given SAX handler routines.
11712 * Automatic support for ZLIB/Compress compressed document is provided
11713 *
11714 * Returns 0 in case of success or a error number otherwise
11715 */
11716int
11717xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11718 const char *filename) {
11719 int ret = 0;
11720 xmlParserCtxtPtr ctxt;
11721
11722 ctxt = xmlCreateFileParserCtxt(filename);
11723 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011724#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011725 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011726#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011727 xmlFree(ctxt->sax);
11728 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011729 xmlDetectSAX2(ctxt);
11730
Owen Taylor3473f882001-02-23 17:55:21 +000011731 if (user_data != NULL)
11732 ctxt->userData = user_data;
11733
11734 xmlParseDocument(ctxt);
11735
11736 if (ctxt->wellFormed)
11737 ret = 0;
11738 else {
11739 if (ctxt->errNo != 0)
11740 ret = ctxt->errNo;
11741 else
11742 ret = -1;
11743 }
11744 if (sax != NULL)
11745 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011746 if (ctxt->myDoc != NULL) {
11747 xmlFreeDoc(ctxt->myDoc);
11748 ctxt->myDoc = NULL;
11749 }
Owen Taylor3473f882001-02-23 17:55:21 +000011750 xmlFreeParserCtxt(ctxt);
11751
11752 return ret;
11753}
Daniel Veillard81273902003-09-30 00:43:48 +000011754#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011755
11756/************************************************************************
11757 * *
11758 * Front ends when parsing from memory *
11759 * *
11760 ************************************************************************/
11761
11762/**
11763 * xmlCreateMemoryParserCtxt:
11764 * @buffer: a pointer to a char array
11765 * @size: the size of the array
11766 *
11767 * Create a parser context for an XML in-memory document.
11768 *
11769 * Returns the new parser context or NULL
11770 */
11771xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011772xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011773 xmlParserCtxtPtr ctxt;
11774 xmlParserInputPtr input;
11775 xmlParserInputBufferPtr buf;
11776
11777 if (buffer == NULL)
11778 return(NULL);
11779 if (size <= 0)
11780 return(NULL);
11781
11782 ctxt = xmlNewParserCtxt();
11783 if (ctxt == NULL)
11784 return(NULL);
11785
Daniel Veillard53350552003-09-18 13:35:51 +000011786 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011787 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011788 if (buf == NULL) {
11789 xmlFreeParserCtxt(ctxt);
11790 return(NULL);
11791 }
Owen Taylor3473f882001-02-23 17:55:21 +000011792
11793 input = xmlNewInputStream(ctxt);
11794 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011795 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011796 xmlFreeParserCtxt(ctxt);
11797 return(NULL);
11798 }
11799
11800 input->filename = NULL;
11801 input->buf = buf;
11802 input->base = input->buf->buffer->content;
11803 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011804 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011805
11806 inputPush(ctxt, input);
11807 return(ctxt);
11808}
11809
Daniel Veillard81273902003-09-30 00:43:48 +000011810#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011811/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011812 * xmlSAXParseMemoryWithData:
11813 * @sax: the SAX handler block
11814 * @buffer: an pointer to a char array
11815 * @size: the size of the array
11816 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11817 * documents
11818 * @data: the userdata
11819 *
11820 * parse an XML in-memory block and use the given SAX function block
11821 * to handle the parsing callback. If sax is NULL, fallback to the default
11822 * DOM tree building routines.
11823 *
11824 * User data (void *) is stored within the parser context in the
11825 * context's _private member, so it is available nearly everywhere in libxml
11826 *
11827 * Returns the resulting document tree
11828 */
11829
11830xmlDocPtr
11831xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11832 int size, int recovery, void *data) {
11833 xmlDocPtr ret;
11834 xmlParserCtxtPtr ctxt;
11835
11836 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11837 if (ctxt == NULL) return(NULL);
11838 if (sax != NULL) {
11839 if (ctxt->sax != NULL)
11840 xmlFree(ctxt->sax);
11841 ctxt->sax = sax;
11842 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011843 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011844 if (data!=NULL) {
11845 ctxt->_private=data;
11846 }
11847
Daniel Veillardadba5f12003-04-04 16:09:01 +000011848 ctxt->recovery = recovery;
11849
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011850 xmlParseDocument(ctxt);
11851
11852 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11853 else {
11854 ret = NULL;
11855 xmlFreeDoc(ctxt->myDoc);
11856 ctxt->myDoc = NULL;
11857 }
11858 if (sax != NULL)
11859 ctxt->sax = NULL;
11860 xmlFreeParserCtxt(ctxt);
11861
11862 return(ret);
11863}
11864
11865/**
Owen Taylor3473f882001-02-23 17:55:21 +000011866 * xmlSAXParseMemory:
11867 * @sax: the SAX handler block
11868 * @buffer: an pointer to a char array
11869 * @size: the size of the array
11870 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11871 * documents
11872 *
11873 * parse an XML in-memory block and use the given SAX function block
11874 * to handle the parsing callback. If sax is NULL, fallback to the default
11875 * DOM tree building routines.
11876 *
11877 * Returns the resulting document tree
11878 */
11879xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011880xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11881 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011882 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011883}
11884
11885/**
11886 * xmlParseMemory:
11887 * @buffer: an pointer to a char array
11888 * @size: the size of the array
11889 *
11890 * parse an XML in-memory block and build a tree.
11891 *
11892 * Returns the resulting document tree
11893 */
11894
Daniel Veillard50822cb2001-07-26 20:05:51 +000011895xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011896 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11897}
11898
11899/**
11900 * xmlRecoverMemory:
11901 * @buffer: an pointer to a char array
11902 * @size: the size of the array
11903 *
11904 * parse an XML in-memory block and build a tree.
11905 * In the case the document is not Well Formed, a tree is built anyway
11906 *
11907 * Returns the resulting document tree
11908 */
11909
Daniel Veillard50822cb2001-07-26 20:05:51 +000011910xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011911 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11912}
11913
11914/**
11915 * xmlSAXUserParseMemory:
11916 * @sax: a SAX handler
11917 * @user_data: The user data returned on SAX callbacks
11918 * @buffer: an in-memory XML document input
11919 * @size: the length of the XML document in bytes
11920 *
11921 * A better SAX parsing routine.
11922 * parse an XML in-memory buffer and call the given SAX handler routines.
11923 *
11924 * Returns 0 in case of success or a error number otherwise
11925 */
11926int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011927 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011928 int ret = 0;
11929 xmlParserCtxtPtr ctxt;
11930 xmlSAXHandlerPtr oldsax = NULL;
11931
Daniel Veillard9e923512002-08-14 08:48:52 +000011932 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011933 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11934 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011935 oldsax = ctxt->sax;
11936 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011937 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011938 if (user_data != NULL)
11939 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011940
11941 xmlParseDocument(ctxt);
11942
11943 if (ctxt->wellFormed)
11944 ret = 0;
11945 else {
11946 if (ctxt->errNo != 0)
11947 ret = ctxt->errNo;
11948 else
11949 ret = -1;
11950 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011951 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000011952 if (ctxt->myDoc != NULL) {
11953 xmlFreeDoc(ctxt->myDoc);
11954 ctxt->myDoc = NULL;
11955 }
Owen Taylor3473f882001-02-23 17:55:21 +000011956 xmlFreeParserCtxt(ctxt);
11957
11958 return ret;
11959}
Daniel Veillard81273902003-09-30 00:43:48 +000011960#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011961
11962/**
11963 * xmlCreateDocParserCtxt:
11964 * @cur: a pointer to an array of xmlChar
11965 *
11966 * Creates a parser context for an XML in-memory document.
11967 *
11968 * Returns the new parser context or NULL
11969 */
11970xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011971xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011972 int len;
11973
11974 if (cur == NULL)
11975 return(NULL);
11976 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011977 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011978}
11979
Daniel Veillard81273902003-09-30 00:43:48 +000011980#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011981/**
11982 * xmlSAXParseDoc:
11983 * @sax: the SAX handler block
11984 * @cur: a pointer to an array of xmlChar
11985 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11986 * documents
11987 *
11988 * parse an XML in-memory document and build a tree.
11989 * It use the given SAX function block to handle the parsing callback.
11990 * If sax is NULL, fallback to the default DOM tree building routines.
11991 *
11992 * Returns the resulting document tree
11993 */
11994
11995xmlDocPtr
11996xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11997 xmlDocPtr ret;
11998 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000011999 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012000
Daniel Veillard38936062004-11-04 17:45:11 +000012001 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012002
12003
12004 ctxt = xmlCreateDocParserCtxt(cur);
12005 if (ctxt == NULL) return(NULL);
12006 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012007 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012008 ctxt->sax = sax;
12009 ctxt->userData = NULL;
12010 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012011 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012012
12013 xmlParseDocument(ctxt);
12014 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12015 else {
12016 ret = NULL;
12017 xmlFreeDoc(ctxt->myDoc);
12018 ctxt->myDoc = NULL;
12019 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012020 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012021 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012022 xmlFreeParserCtxt(ctxt);
12023
12024 return(ret);
12025}
12026
12027/**
12028 * xmlParseDoc:
12029 * @cur: a pointer to an array of xmlChar
12030 *
12031 * parse an XML in-memory document and build a tree.
12032 *
12033 * Returns the resulting document tree
12034 */
12035
12036xmlDocPtr
12037xmlParseDoc(xmlChar *cur) {
12038 return(xmlSAXParseDoc(NULL, cur, 0));
12039}
Daniel Veillard81273902003-09-30 00:43:48 +000012040#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012041
Daniel Veillard81273902003-09-30 00:43:48 +000012042#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012043/************************************************************************
12044 * *
12045 * Specific function to keep track of entities references *
12046 * and used by the XSLT debugger *
12047 * *
12048 ************************************************************************/
12049
12050static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12051
12052/**
12053 * xmlAddEntityReference:
12054 * @ent : A valid entity
12055 * @firstNode : A valid first node for children of entity
12056 * @lastNode : A valid last node of children entity
12057 *
12058 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12059 */
12060static void
12061xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12062 xmlNodePtr lastNode)
12063{
12064 if (xmlEntityRefFunc != NULL) {
12065 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12066 }
12067}
12068
12069
12070/**
12071 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012072 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012073 *
12074 * Set the function to call call back when a xml reference has been made
12075 */
12076void
12077xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12078{
12079 xmlEntityRefFunc = func;
12080}
Daniel Veillard81273902003-09-30 00:43:48 +000012081#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012082
12083/************************************************************************
12084 * *
12085 * Miscellaneous *
12086 * *
12087 ************************************************************************/
12088
12089#ifdef LIBXML_XPATH_ENABLED
12090#include <libxml/xpath.h>
12091#endif
12092
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012093extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012094static int xmlParserInitialized = 0;
12095
12096/**
12097 * xmlInitParser:
12098 *
12099 * Initialization function for the XML parser.
12100 * This is not reentrant. Call once before processing in case of
12101 * use in multithreaded programs.
12102 */
12103
12104void
12105xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012106 if (xmlParserInitialized != 0)
12107 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012108
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012109 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12110 (xmlGenericError == NULL))
12111 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012112 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012113 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012114 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012115 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012116 xmlDefaultSAXHandlerInit();
12117 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012118#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012119 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012120#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012121#ifdef LIBXML_HTML_ENABLED
12122 htmlInitAutoClose();
12123 htmlDefaultSAXHandlerInit();
12124#endif
12125#ifdef LIBXML_XPATH_ENABLED
12126 xmlXPathInit();
12127#endif
12128 xmlParserInitialized = 1;
12129}
12130
12131/**
12132 * xmlCleanupParser:
12133 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012134 * Cleanup function for the XML library. It tries to reclaim all
12135 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012136 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012137 * function should not prevent reusing the library but one should
12138 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012139 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012140 */
12141
12142void
12143xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012144 if (!xmlParserInitialized)
12145 return;
12146
Owen Taylor3473f882001-02-23 17:55:21 +000012147 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012148#ifdef LIBXML_CATALOG_ENABLED
12149 xmlCatalogCleanup();
12150#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012151 xmlCleanupInputCallbacks();
12152#ifdef LIBXML_OUTPUT_ENABLED
12153 xmlCleanupOutputCallbacks();
12154#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012155#ifdef LIBXML_SCHEMAS_ENABLED
12156 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012157 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012158#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012159 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012160 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012161 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012162 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012163 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012164}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012165
12166/************************************************************************
12167 * *
12168 * New set (2.6.0) of simpler and more flexible APIs *
12169 * *
12170 ************************************************************************/
12171
12172/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012173 * DICT_FREE:
12174 * @str: a string
12175 *
12176 * Free a string if it is not owned by the "dict" dictionnary in the
12177 * current scope
12178 */
12179#define DICT_FREE(str) \
12180 if ((str) && ((!dict) || \
12181 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12182 xmlFree((char *)(str));
12183
12184/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012185 * xmlCtxtReset:
12186 * @ctxt: an XML parser context
12187 *
12188 * Reset a parser context
12189 */
12190void
12191xmlCtxtReset(xmlParserCtxtPtr ctxt)
12192{
12193 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012194 xmlDictPtr dict;
12195
12196 if (ctxt == NULL)
12197 return;
12198
12199 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012200
12201 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12202 xmlFreeInputStream(input);
12203 }
12204 ctxt->inputNr = 0;
12205 ctxt->input = NULL;
12206
12207 ctxt->spaceNr = 0;
12208 ctxt->spaceTab[0] = -1;
12209 ctxt->space = &ctxt->spaceTab[0];
12210
12211
12212 ctxt->nodeNr = 0;
12213 ctxt->node = NULL;
12214
12215 ctxt->nameNr = 0;
12216 ctxt->name = NULL;
12217
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012218 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012219 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012220 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012221 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012222 DICT_FREE(ctxt->directory);
12223 ctxt->directory = NULL;
12224 DICT_FREE(ctxt->extSubURI);
12225 ctxt->extSubURI = NULL;
12226 DICT_FREE(ctxt->extSubSystem);
12227 ctxt->extSubSystem = NULL;
12228 if (ctxt->myDoc != NULL)
12229 xmlFreeDoc(ctxt->myDoc);
12230 ctxt->myDoc = NULL;
12231
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012232 ctxt->standalone = -1;
12233 ctxt->hasExternalSubset = 0;
12234 ctxt->hasPErefs = 0;
12235 ctxt->html = 0;
12236 ctxt->external = 0;
12237 ctxt->instate = XML_PARSER_START;
12238 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012239
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012240 ctxt->wellFormed = 1;
12241 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012242 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012244#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012245 ctxt->vctxt.userData = ctxt;
12246 ctxt->vctxt.error = xmlParserValidityError;
12247 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012248#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012249 ctxt->record_info = 0;
12250 ctxt->nbChars = 0;
12251 ctxt->checkIndex = 0;
12252 ctxt->inSubset = 0;
12253 ctxt->errNo = XML_ERR_OK;
12254 ctxt->depth = 0;
12255 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12256 ctxt->catalogs = NULL;
12257 xmlInitNodeInfoSeq(&ctxt->node_seq);
12258
12259 if (ctxt->attsDefault != NULL) {
12260 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12261 ctxt->attsDefault = NULL;
12262 }
12263 if (ctxt->attsSpecial != NULL) {
12264 xmlHashFree(ctxt->attsSpecial, NULL);
12265 ctxt->attsSpecial = NULL;
12266 }
12267
Daniel Veillard4432df22003-09-28 18:58:27 +000012268#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012269 if (ctxt->catalogs != NULL)
12270 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012271#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012272 if (ctxt->lastError.code != XML_ERR_OK)
12273 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012274}
12275
12276/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012277 * xmlCtxtResetPush:
12278 * @ctxt: an XML parser context
12279 * @chunk: a pointer to an array of chars
12280 * @size: number of chars in the array
12281 * @filename: an optional file name or URI
12282 * @encoding: the document encoding, or NULL
12283 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012284 * Reset a push parser context
12285 *
12286 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012287 */
12288int
12289xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12290 int size, const char *filename, const char *encoding)
12291{
12292 xmlParserInputPtr inputStream;
12293 xmlParserInputBufferPtr buf;
12294 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12295
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012296 if (ctxt == NULL)
12297 return(1);
12298
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012299 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12300 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12301
12302 buf = xmlAllocParserInputBuffer(enc);
12303 if (buf == NULL)
12304 return(1);
12305
12306 if (ctxt == NULL) {
12307 xmlFreeParserInputBuffer(buf);
12308 return(1);
12309 }
12310
12311 xmlCtxtReset(ctxt);
12312
12313 if (ctxt->pushTab == NULL) {
12314 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12315 sizeof(xmlChar *));
12316 if (ctxt->pushTab == NULL) {
12317 xmlErrMemory(ctxt, NULL);
12318 xmlFreeParserInputBuffer(buf);
12319 return(1);
12320 }
12321 }
12322
12323 if (filename == NULL) {
12324 ctxt->directory = NULL;
12325 } else {
12326 ctxt->directory = xmlParserGetDirectory(filename);
12327 }
12328
12329 inputStream = xmlNewInputStream(ctxt);
12330 if (inputStream == NULL) {
12331 xmlFreeParserInputBuffer(buf);
12332 return(1);
12333 }
12334
12335 if (filename == NULL)
12336 inputStream->filename = NULL;
12337 else
12338 inputStream->filename = (char *)
12339 xmlCanonicPath((const xmlChar *) filename);
12340 inputStream->buf = buf;
12341 inputStream->base = inputStream->buf->buffer->content;
12342 inputStream->cur = inputStream->buf->buffer->content;
12343 inputStream->end =
12344 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12345
12346 inputPush(ctxt, inputStream);
12347
12348 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12349 (ctxt->input->buf != NULL)) {
12350 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12351 int cur = ctxt->input->cur - ctxt->input->base;
12352
12353 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12354
12355 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12356 ctxt->input->cur = ctxt->input->base + cur;
12357 ctxt->input->end =
12358 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12359 use];
12360#ifdef DEBUG_PUSH
12361 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12362#endif
12363 }
12364
12365 if (encoding != NULL) {
12366 xmlCharEncodingHandlerPtr hdlr;
12367
12368 hdlr = xmlFindCharEncodingHandler(encoding);
12369 if (hdlr != NULL) {
12370 xmlSwitchToEncoding(ctxt, hdlr);
12371 } else {
12372 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12373 "Unsupported encoding %s\n", BAD_CAST encoding);
12374 }
12375 } else if (enc != XML_CHAR_ENCODING_NONE) {
12376 xmlSwitchEncoding(ctxt, enc);
12377 }
12378
12379 return(0);
12380}
12381
12382/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012383 * xmlCtxtUseOptions:
12384 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012385 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012386 *
12387 * Applies the options to the parser context
12388 *
12389 * Returns 0 in case of success, the set of unknown or unimplemented options
12390 * in case of error.
12391 */
12392int
12393xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12394{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012395 if (ctxt == NULL)
12396 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012397 if (options & XML_PARSE_RECOVER) {
12398 ctxt->recovery = 1;
12399 options -= XML_PARSE_RECOVER;
12400 } else
12401 ctxt->recovery = 0;
12402 if (options & XML_PARSE_DTDLOAD) {
12403 ctxt->loadsubset = XML_DETECT_IDS;
12404 options -= XML_PARSE_DTDLOAD;
12405 } else
12406 ctxt->loadsubset = 0;
12407 if (options & XML_PARSE_DTDATTR) {
12408 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12409 options -= XML_PARSE_DTDATTR;
12410 }
12411 if (options & XML_PARSE_NOENT) {
12412 ctxt->replaceEntities = 1;
12413 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12414 options -= XML_PARSE_NOENT;
12415 } else
12416 ctxt->replaceEntities = 0;
12417 if (options & XML_PARSE_NOWARNING) {
12418 ctxt->sax->warning = NULL;
12419 options -= XML_PARSE_NOWARNING;
12420 }
12421 if (options & XML_PARSE_NOERROR) {
12422 ctxt->sax->error = NULL;
12423 ctxt->sax->fatalError = NULL;
12424 options -= XML_PARSE_NOERROR;
12425 }
12426 if (options & XML_PARSE_PEDANTIC) {
12427 ctxt->pedantic = 1;
12428 options -= XML_PARSE_PEDANTIC;
12429 } else
12430 ctxt->pedantic = 0;
12431 if (options & XML_PARSE_NOBLANKS) {
12432 ctxt->keepBlanks = 0;
12433 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12434 options -= XML_PARSE_NOBLANKS;
12435 } else
12436 ctxt->keepBlanks = 1;
12437 if (options & XML_PARSE_DTDVALID) {
12438 ctxt->validate = 1;
12439 if (options & XML_PARSE_NOWARNING)
12440 ctxt->vctxt.warning = NULL;
12441 if (options & XML_PARSE_NOERROR)
12442 ctxt->vctxt.error = NULL;
12443 options -= XML_PARSE_DTDVALID;
12444 } else
12445 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012446#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012447 if (options & XML_PARSE_SAX1) {
12448 ctxt->sax->startElement = xmlSAX2StartElement;
12449 ctxt->sax->endElement = xmlSAX2EndElement;
12450 ctxt->sax->startElementNs = NULL;
12451 ctxt->sax->endElementNs = NULL;
12452 ctxt->sax->initialized = 1;
12453 options -= XML_PARSE_SAX1;
12454 }
Daniel Veillard81273902003-09-30 00:43:48 +000012455#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012456 if (options & XML_PARSE_NODICT) {
12457 ctxt->dictNames = 0;
12458 options -= XML_PARSE_NODICT;
12459 } else {
12460 ctxt->dictNames = 1;
12461 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012462 if (options & XML_PARSE_NOCDATA) {
12463 ctxt->sax->cdataBlock = NULL;
12464 options -= XML_PARSE_NOCDATA;
12465 }
12466 if (options & XML_PARSE_NSCLEAN) {
12467 ctxt->options |= XML_PARSE_NSCLEAN;
12468 options -= XML_PARSE_NSCLEAN;
12469 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012470 if (options & XML_PARSE_NONET) {
12471 ctxt->options |= XML_PARSE_NONET;
12472 options -= XML_PARSE_NONET;
12473 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012474 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012475 return (options);
12476}
12477
12478/**
12479 * xmlDoRead:
12480 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012481 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012482 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012483 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012484 * @reuse: keep the context for reuse
12485 *
12486 * Common front-end for the xmlRead functions
12487 *
12488 * Returns the resulting document tree or NULL
12489 */
12490static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012491xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12492 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012493{
12494 xmlDocPtr ret;
12495
12496 xmlCtxtUseOptions(ctxt, options);
12497 if (encoding != NULL) {
12498 xmlCharEncodingHandlerPtr hdlr;
12499
12500 hdlr = xmlFindCharEncodingHandler(encoding);
12501 if (hdlr != NULL)
12502 xmlSwitchToEncoding(ctxt, hdlr);
12503 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012504 if ((URL != NULL) && (ctxt->input != NULL) &&
12505 (ctxt->input->filename == NULL))
12506 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012507 xmlParseDocument(ctxt);
12508 if ((ctxt->wellFormed) || ctxt->recovery)
12509 ret = ctxt->myDoc;
12510 else {
12511 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012512 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012513 xmlFreeDoc(ctxt->myDoc);
12514 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012515 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012516 ctxt->myDoc = NULL;
12517 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012518 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012519 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012520
12521 return (ret);
12522}
12523
12524/**
12525 * xmlReadDoc:
12526 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012527 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012529 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012530 *
12531 * parse an XML in-memory document and build a tree.
12532 *
12533 * Returns the resulting document tree
12534 */
12535xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012536xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012537{
12538 xmlParserCtxtPtr ctxt;
12539
12540 if (cur == NULL)
12541 return (NULL);
12542
12543 ctxt = xmlCreateDocParserCtxt(cur);
12544 if (ctxt == NULL)
12545 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012546 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012547}
12548
12549/**
12550 * xmlReadFile:
12551 * @filename: a file or URL
12552 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012553 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012554 *
12555 * parse an XML file from the filesystem or the network.
12556 *
12557 * Returns the resulting document tree
12558 */
12559xmlDocPtr
12560xmlReadFile(const char *filename, const char *encoding, int options)
12561{
12562 xmlParserCtxtPtr ctxt;
12563
Daniel Veillard61b93382003-11-03 14:28:31 +000012564 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012565 if (ctxt == NULL)
12566 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012567 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012568}
12569
12570/**
12571 * xmlReadMemory:
12572 * @buffer: a pointer to a char array
12573 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012574 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012575 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012576 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012577 *
12578 * parse an XML in-memory document and build a tree.
12579 *
12580 * Returns the resulting document tree
12581 */
12582xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012583xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012584{
12585 xmlParserCtxtPtr ctxt;
12586
12587 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12588 if (ctxt == NULL)
12589 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012590 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012591}
12592
12593/**
12594 * xmlReadFd:
12595 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012596 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012597 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012598 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599 *
12600 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012601 * NOTE that the file descriptor will not be closed when the
12602 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012603 *
12604 * Returns the resulting document tree
12605 */
12606xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012607xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012608{
12609 xmlParserCtxtPtr ctxt;
12610 xmlParserInputBufferPtr input;
12611 xmlParserInputPtr stream;
12612
12613 if (fd < 0)
12614 return (NULL);
12615
12616 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12617 if (input == NULL)
12618 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012619 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012620 ctxt = xmlNewParserCtxt();
12621 if (ctxt == NULL) {
12622 xmlFreeParserInputBuffer(input);
12623 return (NULL);
12624 }
12625 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12626 if (stream == NULL) {
12627 xmlFreeParserInputBuffer(input);
12628 xmlFreeParserCtxt(ctxt);
12629 return (NULL);
12630 }
12631 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012632 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012633}
12634
12635/**
12636 * xmlReadIO:
12637 * @ioread: an I/O read function
12638 * @ioclose: an I/O close function
12639 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012640 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012641 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012642 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012643 *
12644 * parse an XML document from I/O functions and source and build a tree.
12645 *
12646 * Returns the resulting document tree
12647 */
12648xmlDocPtr
12649xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012650 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012651{
12652 xmlParserCtxtPtr ctxt;
12653 xmlParserInputBufferPtr input;
12654 xmlParserInputPtr stream;
12655
12656 if (ioread == NULL)
12657 return (NULL);
12658
12659 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12660 XML_CHAR_ENCODING_NONE);
12661 if (input == NULL)
12662 return (NULL);
12663 ctxt = xmlNewParserCtxt();
12664 if (ctxt == NULL) {
12665 xmlFreeParserInputBuffer(input);
12666 return (NULL);
12667 }
12668 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12669 if (stream == NULL) {
12670 xmlFreeParserInputBuffer(input);
12671 xmlFreeParserCtxt(ctxt);
12672 return (NULL);
12673 }
12674 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012675 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012676}
12677
12678/**
12679 * xmlCtxtReadDoc:
12680 * @ctxt: an XML parser context
12681 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012682 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012683 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012684 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012685 *
12686 * parse an XML in-memory document and build a tree.
12687 * This reuses the existing @ctxt parser context
12688 *
12689 * Returns the resulting document tree
12690 */
12691xmlDocPtr
12692xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012693 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012694{
12695 xmlParserInputPtr stream;
12696
12697 if (cur == NULL)
12698 return (NULL);
12699 if (ctxt == NULL)
12700 return (NULL);
12701
12702 xmlCtxtReset(ctxt);
12703
12704 stream = xmlNewStringInputStream(ctxt, cur);
12705 if (stream == NULL) {
12706 return (NULL);
12707 }
12708 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012709 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012710}
12711
12712/**
12713 * xmlCtxtReadFile:
12714 * @ctxt: an XML parser context
12715 * @filename: a file or URL
12716 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012717 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012718 *
12719 * parse an XML file from the filesystem or the network.
12720 * This reuses the existing @ctxt parser context
12721 *
12722 * Returns the resulting document tree
12723 */
12724xmlDocPtr
12725xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12726 const char *encoding, int options)
12727{
12728 xmlParserInputPtr stream;
12729
12730 if (filename == NULL)
12731 return (NULL);
12732 if (ctxt == NULL)
12733 return (NULL);
12734
12735 xmlCtxtReset(ctxt);
12736
Daniel Veillard29614c72004-11-26 10:47:26 +000012737 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012738 if (stream == NULL) {
12739 return (NULL);
12740 }
12741 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012742 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012743}
12744
12745/**
12746 * xmlCtxtReadMemory:
12747 * @ctxt: an XML parser context
12748 * @buffer: a pointer to a char array
12749 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012750 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012751 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012752 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012753 *
12754 * parse an XML in-memory document and build a tree.
12755 * This reuses the existing @ctxt parser context
12756 *
12757 * Returns the resulting document tree
12758 */
12759xmlDocPtr
12760xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012761 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012762{
12763 xmlParserInputBufferPtr input;
12764 xmlParserInputPtr stream;
12765
12766 if (ctxt == NULL)
12767 return (NULL);
12768 if (buffer == NULL)
12769 return (NULL);
12770
12771 xmlCtxtReset(ctxt);
12772
12773 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12774 if (input == NULL) {
12775 return(NULL);
12776 }
12777
12778 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12779 if (stream == NULL) {
12780 xmlFreeParserInputBuffer(input);
12781 return(NULL);
12782 }
12783
12784 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012785 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012786}
12787
12788/**
12789 * xmlCtxtReadFd:
12790 * @ctxt: an XML parser context
12791 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012792 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012793 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012794 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012795 *
12796 * parse an XML from a file descriptor and build a tree.
12797 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012798 * NOTE that the file descriptor will not be closed when the
12799 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012800 *
12801 * Returns the resulting document tree
12802 */
12803xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012804xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12805 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012806{
12807 xmlParserInputBufferPtr input;
12808 xmlParserInputPtr stream;
12809
12810 if (fd < 0)
12811 return (NULL);
12812 if (ctxt == NULL)
12813 return (NULL);
12814
12815 xmlCtxtReset(ctxt);
12816
12817
12818 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12819 if (input == NULL)
12820 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012821 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012822 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12823 if (stream == NULL) {
12824 xmlFreeParserInputBuffer(input);
12825 return (NULL);
12826 }
12827 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012828 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012829}
12830
12831/**
12832 * xmlCtxtReadIO:
12833 * @ctxt: an XML parser context
12834 * @ioread: an I/O read function
12835 * @ioclose: an I/O close function
12836 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012837 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012838 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012839 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012840 *
12841 * parse an XML document from I/O functions and source and build a tree.
12842 * This reuses the existing @ctxt parser context
12843 *
12844 * Returns the resulting document tree
12845 */
12846xmlDocPtr
12847xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12848 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012849 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012850 const char *encoding, int options)
12851{
12852 xmlParserInputBufferPtr input;
12853 xmlParserInputPtr stream;
12854
12855 if (ioread == NULL)
12856 return (NULL);
12857 if (ctxt == NULL)
12858 return (NULL);
12859
12860 xmlCtxtReset(ctxt);
12861
12862 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12863 XML_CHAR_ENCODING_NONE);
12864 if (input == NULL)
12865 return (NULL);
12866 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12867 if (stream == NULL) {
12868 xmlFreeParserInputBuffer(input);
12869 return (NULL);
12870 }
12871 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012872 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012873}