blob: 5a970360079fa84b442ca11847841e85351795cc [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
429 * Handle a warning.
430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
857 xmlRealloc(ctxt->nsTab,
858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
1847 buffer##_size *= 2; \
1848 buffer = (xmlChar *) \
1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001850 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001851}
1852
1853/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001854 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * @ctxt: the parser context
1856 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001857 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1859 * @end: an end marker xmlChar, 0 if none
1860 * @end2: an end marker xmlChar, 0 if none
1861 * @end3: an end marker xmlChar, 0 if none
1862 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001863 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001864 *
1865 * [67] Reference ::= EntityRef | CharRef
1866 *
1867 * [69] PEReference ::= '%' Name ';'
1868 *
1869 * Returns A newly allocated string with the substitution done. The caller
1870 * must deallocate it !
1871 */
1872xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1874 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 xmlChar *buffer = NULL;
1876 int buffer_size = 0;
1877
1878 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlEntityPtr ent;
1881 int c,l;
1882 int nbchars = 0;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001885 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001887
1888 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 return(NULL);
1891 }
1892
1893 /*
1894 * allocate a translation buffer.
1895 */
1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001898 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 * we are operating on already parsed values.
1903 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 if (str < last)
1905 c = CUR_SCHAR(str, l);
1906 else
1907 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001908 while ((c != 0) && (c != end) && /* non input consuming loop */
1909 (c != end2) && (c != end3)) {
1910
1911 if (c == 0) break;
1912 if ((c == '&') && (str[1] == '#')) {
1913 int val = xmlParseStringCharRef(ctxt, &str);
1914 if (val != 0) {
1915 COPY_BUF(0,buffer,nbchars,val);
1916 }
1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext,
1920 "String decoding Entity Reference: %.30s\n",
1921 str);
1922 ent = xmlParseStringEntityRef(ctxt, &str);
1923 if ((ent != NULL) &&
1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1925 if (ent->content != NULL) {
1926 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1927 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1929 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931 } else if ((ent != NULL) && (ent->content != NULL)) {
1932 xmlChar *rep;
1933
1934 ctxt->depth++;
1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1936 0, 0, 0);
1937 ctxt->depth--;
1938 if (rep != NULL) {
1939 current = rep;
1940 while (*current != 0) { /* non input consuming loop */
1941 buffer[nbchars++] = *current++;
1942 if (nbchars >
1943 buffer_size - XML_PARSER_BUFFER_SIZE) {
1944 growBuffer(buffer);
1945 }
1946 }
1947 xmlFree(rep);
1948 }
1949 } else if (ent != NULL) {
1950 int i = xmlStrlen(ent->name);
1951 const xmlChar *cur = ent->name;
1952
1953 buffer[nbchars++] = '&';
1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1955 growBuffer(buffer);
1956 }
1957 for (;i > 0;i--)
1958 buffer[nbchars++] = *cur++;
1959 buffer[nbchars++] = ';';
1960 }
1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "String decoding PE Reference: %.30s\n", str);
1965 ent = xmlParseStringPEReference(ctxt, &str);
1966 if (ent != NULL) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 }
1985 } else {
1986 COPY_BUF(l,buffer,nbchars,c);
1987 str += l;
1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001992 if (str < last)
1993 c = CUR_SCHAR(str, l);
1994 else
1995 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 }
1997 buffer[nbchars++] = 0;
1998 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001999
2000mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002002 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003}
2004
Daniel Veillarde57ec792003-09-10 10:50:59 +00002005/**
2006 * xmlStringDecodeEntities:
2007 * @ctxt: the parser context
2008 * @str: the input string
2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2010 * @end: an end marker xmlChar, 0 if none
2011 * @end2: an end marker xmlChar, 0 if none
2012 * @end3: an end marker xmlChar, 0 if none
2013 *
2014 * Takes a entity string content and process to do the adequate substitutions.
2015 *
2016 * [67] Reference ::= EntityRef | CharRef
2017 *
2018 * [69] PEReference ::= '%' Name ';'
2019 *
2020 * Returns A newly allocated string with the substitution done. The caller
2021 * must deallocate it !
2022 */
2023xmlChar *
2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2025 xmlChar end, xmlChar end2, xmlChar end3) {
2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2027 end, end2, end3));
2028}
Owen Taylor3473f882001-02-23 17:55:21 +00002029
2030/************************************************************************
2031 * *
2032 * Commodity functions to handle xmlChars *
2033 * *
2034 ************************************************************************/
2035
2036/**
2037 * xmlStrndup:
2038 * @cur: the input xmlChar *
2039 * @len: the len of @cur
2040 *
2041 * a strndup for array of xmlChar's
2042 *
2043 * Returns a new xmlChar * or NULL
2044 */
2045xmlChar *
2046xmlStrndup(const xmlChar *cur, int len) {
2047 xmlChar *ret;
2048
2049 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002050 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002051 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002052 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002053 return(NULL);
2054 }
2055 memcpy(ret, cur, len * sizeof(xmlChar));
2056 ret[len] = 0;
2057 return(ret);
2058}
2059
2060/**
2061 * xmlStrdup:
2062 * @cur: the input xmlChar *
2063 *
2064 * a strdup for array of xmlChar's. Since they are supposed to be
2065 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2066 * a termination mark of '0'.
2067 *
2068 * Returns a new xmlChar * or NULL
2069 */
2070xmlChar *
2071xmlStrdup(const xmlChar *cur) {
2072 const xmlChar *p = cur;
2073
2074 if (cur == NULL) return(NULL);
2075 while (*p != 0) p++; /* non input consuming */
2076 return(xmlStrndup(cur, p - cur));
2077}
2078
2079/**
2080 * xmlCharStrndup:
2081 * @cur: the input char *
2082 * @len: the len of @cur
2083 *
2084 * a strndup for char's to xmlChar's
2085 *
2086 * Returns a new xmlChar * or NULL
2087 */
2088
2089xmlChar *
2090xmlCharStrndup(const char *cur, int len) {
2091 int i;
2092 xmlChar *ret;
2093
2094 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002095 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002096 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002097 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002098 return(NULL);
2099 }
2100 for (i = 0;i < len;i++)
2101 ret[i] = (xmlChar) cur[i];
2102 ret[len] = 0;
2103 return(ret);
2104}
2105
2106/**
2107 * xmlCharStrdup:
2108 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002109 *
2110 * a strdup for char's to xmlChar's
2111 *
2112 * Returns a new xmlChar * or NULL
2113 */
2114
2115xmlChar *
2116xmlCharStrdup(const char *cur) {
2117 const char *p = cur;
2118
2119 if (cur == NULL) return(NULL);
2120 while (*p != '\0') p++; /* non input consuming */
2121 return(xmlCharStrndup(cur, p - cur));
2122}
2123
2124/**
2125 * xmlStrcmp:
2126 * @str1: the first xmlChar *
2127 * @str2: the second xmlChar *
2128 *
2129 * a strcmp for xmlChar's
2130 *
2131 * Returns the integer result of the comparison
2132 */
2133
2134int
2135xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2136 register int tmp;
2137
2138 if (str1 == str2) return(0);
2139 if (str1 == NULL) return(-1);
2140 if (str2 == NULL) return(1);
2141 do {
2142 tmp = *str1++ - *str2;
2143 if (tmp != 0) return(tmp);
2144 } while (*str2++ != 0);
2145 return 0;
2146}
2147
2148/**
2149 * xmlStrEqual:
2150 * @str1: the first xmlChar *
2151 * @str2: the second xmlChar *
2152 *
2153 * Check if both string are equal of have same content
2154 * Should be a bit more readable and faster than xmlStrEqual()
2155 *
2156 * Returns 1 if they are equal, 0 if they are different
2157 */
2158
2159int
2160xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2161 if (str1 == str2) return(1);
2162 if (str1 == NULL) return(0);
2163 if (str2 == NULL) return(0);
2164 do {
2165 if (*str1++ != *str2) return(0);
2166 } while (*str2++);
2167 return(1);
2168}
2169
2170/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002171 * xmlStrQEqual:
2172 * @pref: the prefix of the QName
2173 * @name: the localname of the QName
2174 * @str: the second xmlChar *
2175 *
2176 * Check if a QName is Equal to a given string
2177 *
2178 * Returns 1 if they are equal, 0 if they are different
2179 */
2180
2181int
2182xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2183 if (pref == NULL) return(xmlStrEqual(name, str));
2184 if (name == NULL) return(0);
2185 if (str == NULL) return(0);
2186
2187 do {
2188 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002189 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002190 if (*str++ != ':') return(0);
2191 do {
2192 if (*name++ != *str) return(0);
2193 } while (*str++);
2194 return(1);
2195}
2196
2197/**
Owen Taylor3473f882001-02-23 17:55:21 +00002198 * xmlStrncmp:
2199 * @str1: the first xmlChar *
2200 * @str2: the second xmlChar *
2201 * @len: the max comparison length
2202 *
2203 * a strncmp for xmlChar's
2204 *
2205 * Returns the integer result of the comparison
2206 */
2207
2208int
2209xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2210 register int tmp;
2211
2212 if (len <= 0) return(0);
2213 if (str1 == str2) return(0);
2214 if (str1 == NULL) return(-1);
2215 if (str2 == NULL) return(1);
2216 do {
2217 tmp = *str1++ - *str2;
2218 if (tmp != 0 || --len == 0) return(tmp);
2219 } while (*str2++ != 0);
2220 return 0;
2221}
2222
Daniel Veillardb44025c2001-10-11 22:55:55 +00002223static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002224 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2225 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2226 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2227 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2228 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2229 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2230 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2231 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2232 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2233 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2234 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2235 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2236 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2237 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2238 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2239 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2240 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2241 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2242 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2243 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2244 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2245 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2246 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2247 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2248 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2249 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2250 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2251 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2252 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2253 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2254 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2255 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2256};
2257
2258/**
2259 * xmlStrcasecmp:
2260 * @str1: the first xmlChar *
2261 * @str2: the second xmlChar *
2262 *
2263 * a strcasecmp for xmlChar's
2264 *
2265 * Returns the integer result of the comparison
2266 */
2267
2268int
2269xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2270 register int tmp;
2271
2272 if (str1 == str2) return(0);
2273 if (str1 == NULL) return(-1);
2274 if (str2 == NULL) return(1);
2275 do {
2276 tmp = casemap[*str1++] - casemap[*str2];
2277 if (tmp != 0) return(tmp);
2278 } while (*str2++ != 0);
2279 return 0;
2280}
2281
2282/**
2283 * xmlStrncasecmp:
2284 * @str1: the first xmlChar *
2285 * @str2: the second xmlChar *
2286 * @len: the max comparison length
2287 *
2288 * a strncasecmp for xmlChar's
2289 *
2290 * Returns the integer result of the comparison
2291 */
2292
2293int
2294xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2295 register int tmp;
2296
2297 if (len <= 0) return(0);
2298 if (str1 == str2) return(0);
2299 if (str1 == NULL) return(-1);
2300 if (str2 == NULL) return(1);
2301 do {
2302 tmp = casemap[*str1++] - casemap[*str2];
2303 if (tmp != 0 || --len == 0) return(tmp);
2304 } while (*str2++ != 0);
2305 return 0;
2306}
2307
2308/**
2309 * xmlStrchr:
2310 * @str: the xmlChar * array
2311 * @val: the xmlChar to search
2312 *
2313 * a strchr for xmlChar's
2314 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002315 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002316 */
2317
2318const xmlChar *
2319xmlStrchr(const xmlChar *str, xmlChar val) {
2320 if (str == NULL) return(NULL);
2321 while (*str != 0) { /* non input consuming */
2322 if (*str == val) return((xmlChar *) str);
2323 str++;
2324 }
2325 return(NULL);
2326}
2327
2328/**
2329 * xmlStrstr:
2330 * @str: the xmlChar * array (haystack)
2331 * @val: the xmlChar to search (needle)
2332 *
2333 * a strstr for xmlChar's
2334 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002335 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002336 */
2337
2338const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002339xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002340 int n;
2341
2342 if (str == NULL) return(NULL);
2343 if (val == NULL) return(NULL);
2344 n = xmlStrlen(val);
2345
2346 if (n == 0) return(str);
2347 while (*str != 0) { /* non input consuming */
2348 if (*str == *val) {
2349 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2350 }
2351 str++;
2352 }
2353 return(NULL);
2354}
2355
2356/**
2357 * xmlStrcasestr:
2358 * @str: the xmlChar * array (haystack)
2359 * @val: the xmlChar to search (needle)
2360 *
2361 * a case-ignoring strstr for xmlChar's
2362 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002363 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002364 */
2365
2366const xmlChar *
2367xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2368 int n;
2369
2370 if (str == NULL) return(NULL);
2371 if (val == NULL) return(NULL);
2372 n = xmlStrlen(val);
2373
2374 if (n == 0) return(str);
2375 while (*str != 0) { /* non input consuming */
2376 if (casemap[*str] == casemap[*val])
2377 if (!xmlStrncasecmp(str, val, n)) return(str);
2378 str++;
2379 }
2380 return(NULL);
2381}
2382
2383/**
2384 * xmlStrsub:
2385 * @str: the xmlChar * array (haystack)
2386 * @start: the index of the first char (zero based)
2387 * @len: the length of the substring
2388 *
2389 * Extract a substring of a given string
2390 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002391 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002392 */
2393
2394xmlChar *
2395xmlStrsub(const xmlChar *str, int start, int len) {
2396 int i;
2397
2398 if (str == NULL) return(NULL);
2399 if (start < 0) return(NULL);
2400 if (len < 0) return(NULL);
2401
2402 for (i = 0;i < start;i++) {
2403 if (*str == 0) return(NULL);
2404 str++;
2405 }
2406 if (*str == 0) return(NULL);
2407 return(xmlStrndup(str, len));
2408}
2409
2410/**
2411 * xmlStrlen:
2412 * @str: the xmlChar * array
2413 *
2414 * length of a xmlChar's string
2415 *
2416 * Returns the number of xmlChar contained in the ARRAY.
2417 */
2418
2419int
2420xmlStrlen(const xmlChar *str) {
2421 int len = 0;
2422
2423 if (str == NULL) return(0);
2424 while (*str != 0) { /* non input consuming */
2425 str++;
2426 len++;
2427 }
2428 return(len);
2429}
2430
2431/**
2432 * xmlStrncat:
2433 * @cur: the original xmlChar * array
2434 * @add: the xmlChar * array added
2435 * @len: the length of @add
2436 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002437 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002438 * first bytes of @add.
2439 *
2440 * Returns a new xmlChar *, the original @cur is reallocated if needed
2441 * and should not be freed
2442 */
2443
2444xmlChar *
2445xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2446 int size;
2447 xmlChar *ret;
2448
2449 if ((add == NULL) || (len == 0))
2450 return(cur);
2451 if (cur == NULL)
2452 return(xmlStrndup(add, len));
2453
2454 size = xmlStrlen(cur);
2455 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2456 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002457 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002458 return(cur);
2459 }
2460 memcpy(&ret[size], add, len * sizeof(xmlChar));
2461 ret[size + len] = 0;
2462 return(ret);
2463}
2464
2465/**
2466 * xmlStrcat:
2467 * @cur: the original xmlChar * array
2468 * @add: the xmlChar * array added
2469 *
2470 * a strcat for array of xmlChar's. Since they are supposed to be
2471 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2472 * a termination mark of '0'.
2473 *
2474 * Returns a new xmlChar * containing the concatenated string.
2475 */
2476xmlChar *
2477xmlStrcat(xmlChar *cur, const xmlChar *add) {
2478 const xmlChar *p = add;
2479
2480 if (add == NULL) return(cur);
2481 if (cur == NULL)
2482 return(xmlStrdup(add));
2483
2484 while (*p != 0) p++; /* non input consuming */
2485 return(xmlStrncat(cur, add, p - add));
2486}
2487
Aleksey Sanine7acf432003-10-02 20:05:27 +00002488/**
2489 * xmlStrPrintf:
2490 * @buf: the result buffer.
2491 * @len: the result buffer length.
2492 * @msg: the message with printf formatting.
2493 * @...: extra parameters for the message.
2494 *
2495 * Formats @msg and places result into @buf.
2496 *
2497 * Returns the number of characters written to @buf or -1 if an error occurs.
2498 */
2499int
2500xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2501 va_list args;
2502 int ret;
2503
2504 if((buf == NULL) || (msg == NULL)) {
2505 return(-1);
2506 }
2507
2508 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002509 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002510 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002511 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002512
2513 return(ret);
2514}
2515
Aleksey Saninb5a46da2003-10-29 15:51:17 +00002516/**
2517 * xmlStrVPrintf:
2518 * @buf: the result buffer.
2519 * @len: the result buffer length.
2520 * @msg: the message with printf formatting.
2521 * @ap: extra parameters for the message.
2522 *
2523 * Formats @msg and places result into @buf.
2524 *
2525 * Returns the number of characters written to @buf or -1 if an error occurs.
2526 */
2527int
2528xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
2529 int ret;
2530
2531 if((buf == NULL) || (msg == NULL)) {
2532 return(-1);
2533 }
2534
2535 ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
2536 buf[len - 1] = 0; /* be safe ! */
2537
2538 return(ret);
2539}
Owen Taylor3473f882001-02-23 17:55:21 +00002540/************************************************************************
2541 * *
2542 * Commodity functions, cleanup needed ? *
2543 * *
2544 ************************************************************************/
2545
2546/**
2547 * areBlanks:
2548 * @ctxt: an XML parser context
2549 * @str: a xmlChar *
2550 * @len: the size of @str
2551 *
2552 * Is this a sequence of blank chars that one can ignore ?
2553 *
2554 * Returns 1 if ignorable 0 otherwise.
2555 */
2556
2557static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2558 int i, ret;
2559 xmlNodePtr lastChild;
2560
Daniel Veillard05c13a22001-09-09 08:38:09 +00002561 /*
2562 * Don't spend time trying to differentiate them, the same callback is
2563 * used !
2564 */
2565 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002566 return(0);
2567
Owen Taylor3473f882001-02-23 17:55:21 +00002568 /*
2569 * Check for xml:space value.
2570 */
2571 if (*(ctxt->space) == 1)
2572 return(0);
2573
2574 /*
2575 * Check that the string is made of blanks
2576 */
2577 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002578 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002579
2580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002581 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002582 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002583 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ctxt->myDoc != NULL) {
2585 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2586 if (ret == 0) return(1);
2587 if (ret == 1) return(0);
2588 }
2589
2590 /*
2591 * Otherwise, heuristic :-\
2592 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002593 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002594 if ((ctxt->node->children == NULL) &&
2595 (RAW == '<') && (NXT(1) == '/')) return(0);
2596
2597 lastChild = xmlGetLastChild(ctxt->node);
2598 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002599 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2600 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 } else if (xmlNodeIsText(lastChild))
2602 return(0);
2603 else if ((ctxt->node->children != NULL) &&
2604 (xmlNodeIsText(ctxt->node->children)))
2605 return(0);
2606 return(1);
2607}
2608
Owen Taylor3473f882001-02-23 17:55:21 +00002609/************************************************************************
2610 * *
2611 * Extra stuff for namespace support *
2612 * Relates to http://www.w3.org/TR/WD-xml-names *
2613 * *
2614 ************************************************************************/
2615
2616/**
2617 * xmlSplitQName:
2618 * @ctxt: an XML parser context
2619 * @name: an XML parser context
2620 * @prefix: a xmlChar **
2621 *
2622 * parse an UTF8 encoded XML qualified name string
2623 *
2624 * [NS 5] QName ::= (Prefix ':')? LocalPart
2625 *
2626 * [NS 6] Prefix ::= NCName
2627 *
2628 * [NS 7] LocalPart ::= NCName
2629 *
2630 * Returns the local part, and prefix is updated
2631 * to get the Prefix if any.
2632 */
2633
2634xmlChar *
2635xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2636 xmlChar buf[XML_MAX_NAMELEN + 5];
2637 xmlChar *buffer = NULL;
2638 int len = 0;
2639 int max = XML_MAX_NAMELEN;
2640 xmlChar *ret = NULL;
2641 const xmlChar *cur = name;
2642 int c;
2643
2644 *prefix = NULL;
2645
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002646 if (cur == NULL) return(NULL);
2647
Owen Taylor3473f882001-02-23 17:55:21 +00002648#ifndef XML_XML_NAMESPACE
2649 /* xml: prefix is not really a namespace */
2650 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2651 (cur[2] == 'l') && (cur[3] == ':'))
2652 return(xmlStrdup(name));
2653#endif
2654
Daniel Veillard597bc482003-07-24 16:08:28 +00002655 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002656 if (cur[0] == ':')
2657 return(xmlStrdup(name));
2658
2659 c = *cur++;
2660 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2661 buf[len++] = c;
2662 c = *cur++;
2663 }
2664 if (len >= max) {
2665 /*
2666 * Okay someone managed to make a huge name, so he's ready to pay
2667 * for the processing speed.
2668 */
2669 max = len * 2;
2670
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676 memcpy(buffer, buf, len);
2677 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2678 if (len + 10 > max) {
2679 max *= 2;
2680 buffer = (xmlChar *) xmlRealloc(buffer,
2681 max * sizeof(xmlChar));
2682 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002684 return(NULL);
2685 }
2686 }
2687 buffer[len++] = c;
2688 c = *cur++;
2689 }
2690 buffer[len] = 0;
2691 }
2692
Daniel Veillard597bc482003-07-24 16:08:28 +00002693 /* nasty but well=formed
2694 if ((c == ':') && (*cur == 0)) {
2695 return(xmlStrdup(name));
2696 } */
2697
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if (buffer == NULL)
2699 ret = xmlStrndup(buf, len);
2700 else {
2701 ret = buffer;
2702 buffer = NULL;
2703 max = XML_MAX_NAMELEN;
2704 }
2705
2706
2707 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002708 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002709 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002710 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002711 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002712 }
Owen Taylor3473f882001-02-23 17:55:21 +00002713 len = 0;
2714
Daniel Veillardbb284f42002-10-16 18:02:47 +00002715 /*
2716 * Check that the first character is proper to start
2717 * a new name
2718 */
2719 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2720 ((c >= 0x41) && (c <= 0x5A)) ||
2721 (c == '_') || (c == ':'))) {
2722 int l;
2723 int first = CUR_SCHAR(cur, l);
2724
2725 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002726 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002727 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002728 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002729 }
2730 }
2731 cur++;
2732
Owen Taylor3473f882001-02-23 17:55:21 +00002733 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2734 buf[len++] = c;
2735 c = *cur++;
2736 }
2737 if (len >= max) {
2738 /*
2739 * Okay someone managed to make a huge name, so he's ready to pay
2740 * for the processing speed.
2741 */
2742 max = len * 2;
2743
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002744 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002745 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 return(NULL);
2748 }
2749 memcpy(buffer, buf, len);
2750 while (c != 0) { /* tested bigname2.xml */
2751 if (len + 10 > max) {
2752 max *= 2;
2753 buffer = (xmlChar *) xmlRealloc(buffer,
2754 max * sizeof(xmlChar));
2755 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002756 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002757 return(NULL);
2758 }
2759 }
2760 buffer[len++] = c;
2761 c = *cur++;
2762 }
2763 buffer[len] = 0;
2764 }
2765
2766 if (buffer == NULL)
2767 ret = xmlStrndup(buf, len);
2768 else {
2769 ret = buffer;
2770 }
2771 }
2772
2773 return(ret);
2774}
2775
2776/************************************************************************
2777 * *
2778 * The parser itself *
2779 * Relates to http://www.w3.org/TR/REC-xml *
2780 * *
2781 ************************************************************************/
2782
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002783static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002784static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002786
Owen Taylor3473f882001-02-23 17:55:21 +00002787/**
2788 * xmlParseName:
2789 * @ctxt: an XML parser context
2790 *
2791 * parse an XML name.
2792 *
2793 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2794 * CombiningChar | Extender
2795 *
2796 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2797 *
2798 * [6] Names ::= Name (S Name)*
2799 *
2800 * Returns the Name parsed or NULL
2801 */
2802
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002803const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002804xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002805 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002806 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002807 int count = 0;
2808
2809 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002810
2811 /*
2812 * Accelerator for simple ASCII names
2813 */
2814 in = ctxt->input->cur;
2815 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2816 ((*in >= 0x41) && (*in <= 0x5A)) ||
2817 (*in == '_') || (*in == ':')) {
2818 in++;
2819 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2820 ((*in >= 0x41) && (*in <= 0x5A)) ||
2821 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002822 (*in == '_') || (*in == '-') ||
2823 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002825 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002826 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002827 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002828 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002829 ctxt->nbChars += count;
2830 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002831 if (ret == NULL)
2832 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 return(ret);
2834 }
2835 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002836 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002837}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002838
Daniel Veillard46de64e2002-05-29 08:21:33 +00002839/**
2840 * xmlParseNameAndCompare:
2841 * @ctxt: an XML parser context
2842 *
2843 * parse an XML name and compares for match
2844 * (specialized for endtag parsing)
2845 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002846 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2847 * and the name for mismatch
2848 */
2849
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002850static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002851xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2852 const xmlChar *cmp = other;
2853 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002854 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002855
2856 GROW;
2857
2858 in = ctxt->input->cur;
2859 while (*in != 0 && *in == *cmp) {
2860 ++in;
2861 ++cmp;
2862 }
William M. Brack76e95df2003-10-18 16:20:14 +00002863 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002864 /* success */
2865 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002866 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002867 }
2868 /* failure (or end of input buffer), check with full function */
2869 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002870 /* strings coming from the dictionnary direct compare possible */
2871 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002872 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002873 }
2874 return ret;
2875}
2876
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002877static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002878xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002879 int len = 0, l;
2880 int c;
2881 int count = 0;
2882
2883 /*
2884 * Handler for more complex cases
2885 */
2886 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002887 c = CUR_CHAR(l);
2888 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2889 (!IS_LETTER(c) && (c != '_') &&
2890 (c != ':'))) {
2891 return(NULL);
2892 }
2893
2894 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002895 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002896 (c == '.') || (c == '-') ||
2897 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002898 (IS_COMBINING(c)) ||
2899 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (count++ > 100) {
2901 count = 0;
2902 GROW;
2903 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002904 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002905 NEXTL(l);
2906 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002907 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002908 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002909}
2910
2911/**
2912 * xmlParseStringName:
2913 * @ctxt: an XML parser context
2914 * @str: a pointer to the string pointer (IN/OUT)
2915 *
2916 * parse an XML name.
2917 *
2918 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2919 * CombiningChar | Extender
2920 *
2921 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2922 *
2923 * [6] Names ::= Name (S Name)*
2924 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002925 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002926 * is updated to the current location in the string.
2927 */
2928
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002929static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002930xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2931 xmlChar buf[XML_MAX_NAMELEN + 5];
2932 const xmlChar *cur = *str;
2933 int len = 0, l;
2934 int c;
2935
2936 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002937 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002938 (c != ':')) {
2939 return(NULL);
2940 }
2941
William M. Brack871611b2003-10-18 04:53:14 +00002942 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (c == '.') || (c == '-') ||
2944 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002945 (IS_COMBINING(c)) ||
2946 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002947 COPY_BUF(l,buf,len,c);
2948 cur += l;
2949 c = CUR_SCHAR(cur, l);
2950 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2951 /*
2952 * Okay someone managed to make a huge name, so he's ready to pay
2953 * for the processing speed.
2954 */
2955 xmlChar *buffer;
2956 int max = len * 2;
2957
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002958 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 return(NULL);
2962 }
2963 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002964 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002965 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002966 (c == '.') || (c == '-') ||
2967 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002968 (IS_COMBINING(c)) ||
2969 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002970 if (len + 10 > max) {
2971 max *= 2;
2972 buffer = (xmlChar *) xmlRealloc(buffer,
2973 max * sizeof(xmlChar));
2974 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002975 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002976 return(NULL);
2977 }
2978 }
2979 COPY_BUF(l,buffer,len,c);
2980 cur += l;
2981 c = CUR_SCHAR(cur, l);
2982 }
2983 buffer[len] = 0;
2984 *str = cur;
2985 return(buffer);
2986 }
2987 }
2988 *str = cur;
2989 return(xmlStrndup(buf, len));
2990}
2991
2992/**
2993 * xmlParseNmtoken:
2994 * @ctxt: an XML parser context
2995 *
2996 * parse an XML Nmtoken.
2997 *
2998 * [7] Nmtoken ::= (NameChar)+
2999 *
3000 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
3001 *
3002 * Returns the Nmtoken parsed or NULL
3003 */
3004
3005xmlChar *
3006xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3007 xmlChar buf[XML_MAX_NAMELEN + 5];
3008 int len = 0, l;
3009 int c;
3010 int count = 0;
3011
3012 GROW;
3013 c = CUR_CHAR(l);
3014
William M. Brack871611b2003-10-18 04:53:14 +00003015 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003016 (c == '.') || (c == '-') ||
3017 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003018 (IS_COMBINING(c)) ||
3019 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003020 if (count++ > 100) {
3021 count = 0;
3022 GROW;
3023 }
3024 COPY_BUF(l,buf,len,c);
3025 NEXTL(l);
3026 c = CUR_CHAR(l);
3027 if (len >= XML_MAX_NAMELEN) {
3028 /*
3029 * Okay someone managed to make a huge token, so he's ready to pay
3030 * for the processing speed.
3031 */
3032 xmlChar *buffer;
3033 int max = len * 2;
3034
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003035 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
3040 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003041 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003042 (c == '.') || (c == '-') ||
3043 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003044 (IS_COMBINING(c)) ||
3045 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (count++ > 100) {
3047 count = 0;
3048 GROW;
3049 }
3050 if (len + 10 > max) {
3051 max *= 2;
3052 buffer = (xmlChar *) xmlRealloc(buffer,
3053 max * sizeof(xmlChar));
3054 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003055 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003056 return(NULL);
3057 }
3058 }
3059 COPY_BUF(l,buffer,len,c);
3060 NEXTL(l);
3061 c = CUR_CHAR(l);
3062 }
3063 buffer[len] = 0;
3064 return(buffer);
3065 }
3066 }
3067 if (len == 0)
3068 return(NULL);
3069 return(xmlStrndup(buf, len));
3070}
3071
3072/**
3073 * xmlParseEntityValue:
3074 * @ctxt: an XML parser context
3075 * @orig: if non-NULL store a copy of the original entity value
3076 *
3077 * parse a value for ENTITY declarations
3078 *
3079 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3080 * "'" ([^%&'] | PEReference | Reference)* "'"
3081 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003082 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003083 */
3084
3085xmlChar *
3086xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3087 xmlChar *buf = NULL;
3088 int len = 0;
3089 int size = XML_PARSER_BUFFER_SIZE;
3090 int c, l;
3091 xmlChar stop;
3092 xmlChar *ret = NULL;
3093 const xmlChar *cur = NULL;
3094 xmlParserInputPtr input;
3095
3096 if (RAW == '"') stop = '"';
3097 else if (RAW == '\'') stop = '\'';
3098 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003099 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 return(NULL);
3101 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003102 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003103 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003105 return(NULL);
3106 }
3107
3108 /*
3109 * The content of the entity definition is copied in a buffer.
3110 */
3111
3112 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3113 input = ctxt->input;
3114 GROW;
3115 NEXT;
3116 c = CUR_CHAR(l);
3117 /*
3118 * NOTE: 4.4.5 Included in Literal
3119 * When a parameter entity reference appears in a literal entity
3120 * value, ... a single or double quote character in the replacement
3121 * text is always treated as a normal data character and will not
3122 * terminate the literal.
3123 * In practice it means we stop the loop only when back at parsing
3124 * the initial entity and the quote is found
3125 */
William M. Brack871611b2003-10-18 04:53:14 +00003126 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003127 (ctxt->input != input))) {
3128 if (len + 5 >= size) {
3129 size *= 2;
3130 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3131 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003132 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 return(NULL);
3134 }
3135 }
3136 COPY_BUF(l,buf,len,c);
3137 NEXTL(l);
3138 /*
3139 * Pop-up of finished entities.
3140 */
3141 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3142 xmlPopInput(ctxt);
3143
3144 GROW;
3145 c = CUR_CHAR(l);
3146 if (c == 0) {
3147 GROW;
3148 c = CUR_CHAR(l);
3149 }
3150 }
3151 buf[len] = 0;
3152
3153 /*
3154 * Raise problem w.r.t. '&' and '%' being used in non-entities
3155 * reference constructs. Note Charref will be handled in
3156 * xmlStringDecodeEntities()
3157 */
3158 cur = buf;
3159 while (*cur != 0) { /* non input consuming */
3160 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3161 xmlChar *name;
3162 xmlChar tmp = *cur;
3163
3164 cur++;
3165 name = xmlParseStringName(ctxt, &cur);
3166 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003167 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003168 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003169 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003170 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003171 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3172 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003173 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003174 }
3175 if (name != NULL)
3176 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003177 if (*cur == 0)
3178 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003179 }
3180 cur++;
3181 }
3182
3183 /*
3184 * Then PEReference entities are substituted.
3185 */
3186 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003187 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003188 xmlFree(buf);
3189 } else {
3190 NEXT;
3191 /*
3192 * NOTE: 4.4.7 Bypassed
3193 * When a general entity reference appears in the EntityValue in
3194 * an entity declaration, it is bypassed and left as is.
3195 * so XML_SUBSTITUTE_REF is not set here.
3196 */
3197 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3198 0, 0, 0);
3199 if (orig != NULL)
3200 *orig = buf;
3201 else
3202 xmlFree(buf);
3203 }
3204
3205 return(ret);
3206}
3207
3208/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003209 * xmlParseAttValueComplex:
3210 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003211 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003212 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003213 *
3214 * parse a value for an attribute, this is the fallback function
3215 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003216 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003220static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003221xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003222 xmlChar limit = 0;
3223 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003224 int len = 0;
3225 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003226 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003227 xmlChar *current = NULL;
3228 xmlEntityPtr ent;
3229
Owen Taylor3473f882001-02-23 17:55:21 +00003230 if (NXT(0) == '"') {
3231 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3232 limit = '"';
3233 NEXT;
3234 } else if (NXT(0) == '\'') {
3235 limit = '\'';
3236 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3237 NEXT;
3238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003239 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003240 return(NULL);
3241 }
3242
3243 /*
3244 * allocate a translation buffer.
3245 */
3246 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003247 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003248 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003249
3250 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003251 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003252 */
3253 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003254 while ((NXT(0) != limit) && /* checked */
3255 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003256 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003257 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003258 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003259 if (NXT(1) == '#') {
3260 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003261
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003263 if (ctxt->replaceEntities) {
3264 if (len > buf_size - 10) {
3265 growBuffer(buf);
3266 }
3267 buf[len++] = '&';
3268 } else {
3269 /*
3270 * The reparsing will be done in xmlStringGetNodeList()
3271 * called by the attribute() function in SAX.c
3272 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003273 if (len > buf_size - 10) {
3274 growBuffer(buf);
3275 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003276 buf[len++] = '&';
3277 buf[len++] = '#';
3278 buf[len++] = '3';
3279 buf[len++] = '8';
3280 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003283 if (len > buf_size - 10) {
3284 growBuffer(buf);
3285 }
Owen Taylor3473f882001-02-23 17:55:21 +00003286 len += xmlCopyChar(0, &buf[len], val);
3287 }
3288 } else {
3289 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003290 if ((ent != NULL) &&
3291 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3292 if (len > buf_size - 10) {
3293 growBuffer(buf);
3294 }
3295 if ((ctxt->replaceEntities == 0) &&
3296 (ent->content[0] == '&')) {
3297 buf[len++] = '&';
3298 buf[len++] = '#';
3299 buf[len++] = '3';
3300 buf[len++] = '8';
3301 buf[len++] = ';';
3302 } else {
3303 buf[len++] = ent->content[0];
3304 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003305 } else if ((ent != NULL) &&
3306 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003307 xmlChar *rep;
3308
3309 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3310 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003311 XML_SUBSTITUTE_REF,
3312 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003313 if (rep != NULL) {
3314 current = rep;
3315 while (*current != 0) { /* non input consuming */
3316 buf[len++] = *current++;
3317 if (len > buf_size - 10) {
3318 growBuffer(buf);
3319 }
3320 }
3321 xmlFree(rep);
3322 }
3323 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003324 if (len > buf_size - 10) {
3325 growBuffer(buf);
3326 }
Owen Taylor3473f882001-02-23 17:55:21 +00003327 if (ent->content != NULL)
3328 buf[len++] = ent->content[0];
3329 }
3330 } else if (ent != NULL) {
3331 int i = xmlStrlen(ent->name);
3332 const xmlChar *cur = ent->name;
3333
3334 /*
3335 * This may look absurd but is needed to detect
3336 * entities problems
3337 */
3338 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3339 (ent->content != NULL)) {
3340 xmlChar *rep;
3341 rep = xmlStringDecodeEntities(ctxt, ent->content,
3342 XML_SUBSTITUTE_REF, 0, 0, 0);
3343 if (rep != NULL)
3344 xmlFree(rep);
3345 }
3346
3347 /*
3348 * Just output the reference
3349 */
3350 buf[len++] = '&';
3351 if (len > buf_size - i - 10) {
3352 growBuffer(buf);
3353 }
3354 for (;i > 0;i--)
3355 buf[len++] = *cur++;
3356 buf[len++] = ';';
3357 }
3358 }
3359 } else {
3360 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003361 if ((len != 0) || (!normalize)) {
3362 if ((!normalize) || (!in_space)) {
3363 COPY_BUF(l,buf,len,0x20);
3364 if (len > buf_size - 10) {
3365 growBuffer(buf);
3366 }
3367 }
3368 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003369 }
3370 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003371 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003372 COPY_BUF(l,buf,len,c);
3373 if (len > buf_size - 10) {
3374 growBuffer(buf);
3375 }
3376 }
3377 NEXTL(l);
3378 }
3379 GROW;
3380 c = CUR_CHAR(l);
3381 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003382 if ((in_space) && (normalize)) {
3383 while (buf[len - 1] == 0x20) len--;
3384 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003385 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003386 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003388 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003389 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3390 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003391 } else
3392 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003393 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003394 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003395
3396mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003397 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003398 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003399}
3400
3401/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003402 * xmlParseAttValue:
3403 * @ctxt: an XML parser context
3404 *
3405 * parse a value for an attribute
3406 * Note: the parser won't do substitution of entities here, this
3407 * will be handled later in xmlStringGetNodeList
3408 *
3409 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3410 * "'" ([^<&'] | Reference)* "'"
3411 *
3412 * 3.3.3 Attribute-Value Normalization:
3413 * Before the value of an attribute is passed to the application or
3414 * checked for validity, the XML processor must normalize it as follows:
3415 * - a character reference is processed by appending the referenced
3416 * character to the attribute value
3417 * - an entity reference is processed by recursively processing the
3418 * replacement text of the entity
3419 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3420 * appending #x20 to the normalized value, except that only a single
3421 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3422 * parsed entity or the literal entity value of an internal parsed entity
3423 * - other characters are processed by appending them to the normalized value
3424 * If the declared value is not CDATA, then the XML processor must further
3425 * process the normalized attribute value by discarding any leading and
3426 * trailing space (#x20) characters, and by replacing sequences of space
3427 * (#x20) characters by a single space (#x20) character.
3428 * All attributes for which no declaration has been read should be treated
3429 * by a non-validating parser as if declared CDATA.
3430 *
3431 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3432 */
3433
3434
3435xmlChar *
3436xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003437 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003438}
3439
3440/**
Owen Taylor3473f882001-02-23 17:55:21 +00003441 * xmlParseSystemLiteral:
3442 * @ctxt: an XML parser context
3443 *
3444 * parse an XML Literal
3445 *
3446 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3447 *
3448 * Returns the SystemLiteral parsed or NULL
3449 */
3450
3451xmlChar *
3452xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3453 xmlChar *buf = NULL;
3454 int len = 0;
3455 int size = XML_PARSER_BUFFER_SIZE;
3456 int cur, l;
3457 xmlChar stop;
3458 int state = ctxt->instate;
3459 int count = 0;
3460
3461 SHRINK;
3462 if (RAW == '"') {
3463 NEXT;
3464 stop = '"';
3465 } else if (RAW == '\'') {
3466 NEXT;
3467 stop = '\'';
3468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003469 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003470 return(NULL);
3471 }
3472
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003473 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003474 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003475 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003476 return(NULL);
3477 }
3478 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3479 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003480 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003481 if (len + 5 >= size) {
3482 size *= 2;
3483 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3484 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 ctxt->instate = (xmlParserInputState) state;
3487 return(NULL);
3488 }
3489 }
3490 count++;
3491 if (count > 50) {
3492 GROW;
3493 count = 0;
3494 }
3495 COPY_BUF(l,buf,len,cur);
3496 NEXTL(l);
3497 cur = CUR_CHAR(l);
3498 if (cur == 0) {
3499 GROW;
3500 SHRINK;
3501 cur = CUR_CHAR(l);
3502 }
3503 }
3504 buf[len] = 0;
3505 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003506 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003507 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003508 } else {
3509 NEXT;
3510 }
3511 return(buf);
3512}
3513
3514/**
3515 * xmlParsePubidLiteral:
3516 * @ctxt: an XML parser context
3517 *
3518 * parse an XML public literal
3519 *
3520 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3521 *
3522 * Returns the PubidLiteral parsed or NULL.
3523 */
3524
3525xmlChar *
3526xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3527 xmlChar *buf = NULL;
3528 int len = 0;
3529 int size = XML_PARSER_BUFFER_SIZE;
3530 xmlChar cur;
3531 xmlChar stop;
3532 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003533 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003534
3535 SHRINK;
3536 if (RAW == '"') {
3537 NEXT;
3538 stop = '"';
3539 } else if (RAW == '\'') {
3540 NEXT;
3541 stop = '\'';
3542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003543 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003544 return(NULL);
3545 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003546 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003547 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003548 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003549 return(NULL);
3550 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003551 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003553 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003554 if (len + 1 >= size) {
3555 size *= 2;
3556 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3557 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003558 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003559 return(NULL);
3560 }
3561 }
3562 buf[len++] = cur;
3563 count++;
3564 if (count > 50) {
3565 GROW;
3566 count = 0;
3567 }
3568 NEXT;
3569 cur = CUR;
3570 if (cur == 0) {
3571 GROW;
3572 SHRINK;
3573 cur = CUR;
3574 }
3575 }
3576 buf[len] = 0;
3577 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003578 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003579 } else {
3580 NEXT;
3581 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003582 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003583 return(buf);
3584}
3585
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003587/**
3588 * xmlParseCharData:
3589 * @ctxt: an XML parser context
3590 * @cdata: int indicating whether we are within a CDATA section
3591 *
3592 * parse a CharData section.
3593 * if we are within a CDATA section ']]>' marks an end of section.
3594 *
3595 * The right angle bracket (>) may be represented using the string "&gt;",
3596 * and must, for compatibility, be escaped using "&gt;" or a character
3597 * reference when it appears in the string "]]>" in content, when that
3598 * string is not marking the end of a CDATA section.
3599 *
3600 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3601 */
3602
3603void
3604xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003605 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003606 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003607 int line = ctxt->input->line;
3608 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003609
3610 SHRINK;
3611 GROW;
3612 /*
3613 * Accelerated common case where input don't need to be
3614 * modified before passing it to the handler.
3615 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003616 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003617 in = ctxt->input->cur;
3618 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003619get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003620 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3621 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003622 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003623 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003624 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003625 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003626 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003627 ctxt->input->line++;
3628 in++;
3629 }
3630 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003631 }
3632 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003633 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003634 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003635 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003636 return;
3637 }
3638 in++;
3639 goto get_more;
3640 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003641 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003642 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003643 if ((ctxt->sax->ignorableWhitespace !=
3644 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003645 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003646 const xmlChar *tmp = ctxt->input->cur;
3647 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003648
Daniel Veillarda7374592001-05-10 14:17:55 +00003649 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003650 ctxt->sax->ignorableWhitespace(ctxt->userData,
3651 tmp, nbchar);
3652 } else if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData,
3654 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003655 line = ctxt->input->line;
3656 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003657 } else {
3658 if (ctxt->sax->characters != NULL)
3659 ctxt->sax->characters(ctxt->userData,
3660 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003661 line = ctxt->input->line;
3662 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003663 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003664 }
3665 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003666 if (*in == 0xD) {
3667 in++;
3668 if (*in == 0xA) {
3669 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003670 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003671 ctxt->input->line++;
3672 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003673 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003674 in--;
3675 }
3676 if (*in == '<') {
3677 return;
3678 }
3679 if (*in == '&') {
3680 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003681 }
3682 SHRINK;
3683 GROW;
3684 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003685 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003686 nbchar = 0;
3687 }
Daniel Veillard50582112001-03-26 22:52:16 +00003688 ctxt->input->line = line;
3689 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003690 xmlParseCharDataComplex(ctxt, cdata);
3691}
3692
Daniel Veillard01c13b52002-12-10 15:19:08 +00003693/**
3694 * xmlParseCharDataComplex:
3695 * @ctxt: an XML parser context
3696 * @cdata: int indicating whether we are within a CDATA section
3697 *
3698 * parse a CharData section.this is the fallback function
3699 * of xmlParseCharData() when the parsing requires handling
3700 * of non-ASCII characters.
3701 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003702void
3703xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003704 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3705 int nbchar = 0;
3706 int cur, l;
3707 int count = 0;
3708
3709 SHRINK;
3710 GROW;
3711 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003712 while ((cur != '<') && /* checked */
3713 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003714 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003715 if ((cur == ']') && (NXT(1) == ']') &&
3716 (NXT(2) == '>')) {
3717 if (cdata) break;
3718 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003719 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 }
3722 COPY_BUF(l,buf,nbchar,cur);
3723 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003724 buf[nbchar] = 0;
3725
Owen Taylor3473f882001-02-23 17:55:21 +00003726 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003727 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003728 */
3729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3730 if (areBlanks(ctxt, buf, nbchar)) {
3731 if (ctxt->sax->ignorableWhitespace != NULL)
3732 ctxt->sax->ignorableWhitespace(ctxt->userData,
3733 buf, nbchar);
3734 } else {
3735 if (ctxt->sax->characters != NULL)
3736 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3737 }
3738 }
3739 nbchar = 0;
3740 }
3741 count++;
3742 if (count > 50) {
3743 GROW;
3744 count = 0;
3745 }
3746 NEXTL(l);
3747 cur = CUR_CHAR(l);
3748 }
3749 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003750 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003751 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003752 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003753 */
3754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3755 if (areBlanks(ctxt, buf, nbchar)) {
3756 if (ctxt->sax->ignorableWhitespace != NULL)
3757 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3758 } else {
3759 if (ctxt->sax->characters != NULL)
3760 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3761 }
3762 }
3763 }
3764}
3765
3766/**
3767 * xmlParseExternalID:
3768 * @ctxt: an XML parser context
3769 * @publicID: a xmlChar** receiving PubidLiteral
3770 * @strict: indicate whether we should restrict parsing to only
3771 * production [75], see NOTE below
3772 *
3773 * Parse an External ID or a Public ID
3774 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003775 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003776 * 'PUBLIC' S PubidLiteral S SystemLiteral
3777 *
3778 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3779 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3780 *
3781 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3782 *
3783 * Returns the function returns SystemLiteral and in the second
3784 * case publicID receives PubidLiteral, is strict is off
3785 * it is possible to return NULL and have publicID set.
3786 */
3787
3788xmlChar *
3789xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3790 xmlChar *URI = NULL;
3791
3792 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003793
3794 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003795 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003797 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3799 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003800 }
3801 SKIP_BLANKS;
3802 URI = xmlParseSystemLiteral(ctxt);
3803 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003804 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003806 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003807 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003808 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003809 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003810 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003811 }
3812 SKIP_BLANKS;
3813 *publicID = xmlParsePubidLiteral(ctxt);
3814 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003815 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003816 }
3817 if (strict) {
3818 /*
3819 * We don't handle [83] so "S SystemLiteral" is required.
3820 */
William M. Brack76e95df2003-10-18 16:20:14 +00003821 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003822 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003823 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003824 }
3825 } else {
3826 /*
3827 * We handle [83] so we return immediately, if
3828 * "S SystemLiteral" is not detected. From a purely parsing
3829 * point of view that's a nice mess.
3830 */
3831 const xmlChar *ptr;
3832 GROW;
3833
3834 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003835 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836
William M. Brack76e95df2003-10-18 16:20:14 +00003837 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3839 }
3840 SKIP_BLANKS;
3841 URI = xmlParseSystemLiteral(ctxt);
3842 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003843 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 }
3846 return(URI);
3847}
3848
3849/**
3850 * xmlParseComment:
3851 * @ctxt: an XML parser context
3852 *
3853 * Skip an XML (SGML) comment <!-- .... -->
3854 * The spec says that "For compatibility, the string "--" (double-hyphen)
3855 * must not occur within comments. "
3856 *
3857 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3858 */
3859void
3860xmlParseComment(xmlParserCtxtPtr ctxt) {
3861 xmlChar *buf = NULL;
3862 int len;
3863 int size = XML_PARSER_BUFFER_SIZE;
3864 int q, ql;
3865 int r, rl;
3866 int cur, l;
3867 xmlParserInputState state;
3868 xmlParserInputPtr input = ctxt->input;
3869 int count = 0;
3870
3871 /*
3872 * Check that there is a comment right here.
3873 */
3874 if ((RAW != '<') || (NXT(1) != '!') ||
3875 (NXT(2) != '-') || (NXT(3) != '-')) return;
3876
3877 state = ctxt->instate;
3878 ctxt->instate = XML_PARSER_COMMENT;
3879 SHRINK;
3880 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003881 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003882 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003883 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003884 ctxt->instate = state;
3885 return;
3886 }
3887 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003888 if (q == 0)
3889 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003890 NEXTL(ql);
3891 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003892 if (r == 0)
3893 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003894 NEXTL(rl);
3895 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003896 if (cur == 0)
3897 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003898 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003899 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003900 ((cur != '>') ||
3901 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003902 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003903 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003904 }
3905 if (len + 5 >= size) {
3906 size *= 2;
3907 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3908 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003909 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003910 ctxt->instate = state;
3911 return;
3912 }
3913 }
3914 COPY_BUF(ql,buf,len,q);
3915 q = r;
3916 ql = rl;
3917 r = cur;
3918 rl = l;
3919
3920 count++;
3921 if (count > 50) {
3922 GROW;
3923 count = 0;
3924 }
3925 NEXTL(l);
3926 cur = CUR_CHAR(l);
3927 if (cur == 0) {
3928 SHRINK;
3929 GROW;
3930 cur = CUR_CHAR(l);
3931 }
3932 }
3933 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003934 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003935 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003936 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 xmlFree(buf);
3938 } else {
3939 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3941 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003942 }
3943 NEXT;
3944 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3945 (!ctxt->disableSAX))
3946 ctxt->sax->comment(ctxt->userData, buf);
3947 xmlFree(buf);
3948 }
3949 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003950 return;
3951not_terminated:
3952 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3953 "Comment not terminated\n", NULL);
3954 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003955}
3956
3957/**
3958 * xmlParsePITarget:
3959 * @ctxt: an XML parser context
3960 *
3961 * parse the name of a PI
3962 *
3963 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3964 *
3965 * Returns the PITarget name or NULL
3966 */
3967
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003968const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003969xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003970 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003971
3972 name = xmlParseName(ctxt);
3973 if ((name != NULL) &&
3974 ((name[0] == 'x') || (name[0] == 'X')) &&
3975 ((name[1] == 'm') || (name[1] == 'M')) &&
3976 ((name[2] == 'l') || (name[2] == 'L'))) {
3977 int i;
3978 if ((name[0] == 'x') && (name[1] == 'm') &&
3979 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003980 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003981 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003982 return(name);
3983 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003984 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 return(name);
3986 }
3987 for (i = 0;;i++) {
3988 if (xmlW3CPIs[i] == NULL) break;
3989 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3990 return(name);
3991 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003992 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3993 "xmlParsePITarget: invalid name prefix 'xml'\n",
3994 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 }
3996 return(name);
3997}
3998
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003999#ifdef LIBXML_CATALOG_ENABLED
4000/**
4001 * xmlParseCatalogPI:
4002 * @ctxt: an XML parser context
4003 * @catalog: the PI value string
4004 *
4005 * parse an XML Catalog Processing Instruction.
4006 *
4007 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4008 *
4009 * Occurs only if allowed by the user and if happening in the Misc
4010 * part of the document before any doctype informations
4011 * This will add the given catalog to the parsing context in order
4012 * to be used if there is a resolution need further down in the document
4013 */
4014
4015static void
4016xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4017 xmlChar *URL = NULL;
4018 const xmlChar *tmp, *base;
4019 xmlChar marker;
4020
4021 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004022 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004023 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4024 goto error;
4025 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004026 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004027 if (*tmp != '=') {
4028 return;
4029 }
4030 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004031 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004032 marker = *tmp;
4033 if ((marker != '\'') && (marker != '"'))
4034 goto error;
4035 tmp++;
4036 base = tmp;
4037 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4038 if (*tmp == 0)
4039 goto error;
4040 URL = xmlStrndup(base, tmp - base);
4041 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004042 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004043 if (*tmp != 0)
4044 goto error;
4045
4046 if (URL != NULL) {
4047 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4048 xmlFree(URL);
4049 }
4050 return;
4051
4052error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004053 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4054 "Catalog PI syntax error: %s\n",
4055 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004056 if (URL != NULL)
4057 xmlFree(URL);
4058}
4059#endif
4060
Owen Taylor3473f882001-02-23 17:55:21 +00004061/**
4062 * xmlParsePI:
4063 * @ctxt: an XML parser context
4064 *
4065 * parse an XML Processing Instruction.
4066 *
4067 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4068 *
4069 * The processing is transfered to SAX once parsed.
4070 */
4071
4072void
4073xmlParsePI(xmlParserCtxtPtr ctxt) {
4074 xmlChar *buf = NULL;
4075 int len = 0;
4076 int size = XML_PARSER_BUFFER_SIZE;
4077 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004078 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 xmlParserInputState state;
4080 int count = 0;
4081
4082 if ((RAW == '<') && (NXT(1) == '?')) {
4083 xmlParserInputPtr input = ctxt->input;
4084 state = ctxt->instate;
4085 ctxt->instate = XML_PARSER_PI;
4086 /*
4087 * this is a Processing Instruction.
4088 */
4089 SKIP(2);
4090 SHRINK;
4091
4092 /*
4093 * Parse the target name and check for special support like
4094 * namespace.
4095 */
4096 target = xmlParsePITarget(ctxt);
4097 if (target != NULL) {
4098 if ((RAW == '?') && (NXT(1) == '>')) {
4099 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004100 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4101 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004102 }
4103 SKIP(2);
4104
4105 /*
4106 * SAX: PI detected.
4107 */
4108 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4109 (ctxt->sax->processingInstruction != NULL))
4110 ctxt->sax->processingInstruction(ctxt->userData,
4111 target, NULL);
4112 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004113 return;
4114 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004115 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004116 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004117 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 ctxt->instate = state;
4119 return;
4120 }
4121 cur = CUR;
4122 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004123 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4124 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004125 }
4126 SKIP_BLANKS;
4127 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004128 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004129 ((cur != '?') || (NXT(1) != '>'))) {
4130 if (len + 5 >= size) {
4131 size *= 2;
4132 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4133 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004134 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004135 ctxt->instate = state;
4136 return;
4137 }
4138 }
4139 count++;
4140 if (count > 50) {
4141 GROW;
4142 count = 0;
4143 }
4144 COPY_BUF(l,buf,len,cur);
4145 NEXTL(l);
4146 cur = CUR_CHAR(l);
4147 if (cur == 0) {
4148 SHRINK;
4149 GROW;
4150 cur = CUR_CHAR(l);
4151 }
4152 }
4153 buf[len] = 0;
4154 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004155 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4156 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 } else {
4158 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4160 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004161 }
4162 SKIP(2);
4163
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004164#ifdef LIBXML_CATALOG_ENABLED
4165 if (((state == XML_PARSER_MISC) ||
4166 (state == XML_PARSER_START)) &&
4167 (xmlStrEqual(target, XML_CATALOG_PI))) {
4168 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4169 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4170 (allow == XML_CATA_ALLOW_ALL))
4171 xmlParseCatalogPI(ctxt, buf);
4172 }
4173#endif
4174
4175
Owen Taylor3473f882001-02-23 17:55:21 +00004176 /*
4177 * SAX: PI detected.
4178 */
4179 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4180 (ctxt->sax->processingInstruction != NULL))
4181 ctxt->sax->processingInstruction(ctxt->userData,
4182 target, buf);
4183 }
4184 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004185 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 }
4188 ctxt->instate = state;
4189 }
4190}
4191
4192/**
4193 * xmlParseNotationDecl:
4194 * @ctxt: an XML parser context
4195 *
4196 * parse a notation declaration
4197 *
4198 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4199 *
4200 * Hence there is actually 3 choices:
4201 * 'PUBLIC' S PubidLiteral
4202 * 'PUBLIC' S PubidLiteral S SystemLiteral
4203 * and 'SYSTEM' S SystemLiteral
4204 *
4205 * See the NOTE on xmlParseExternalID().
4206 */
4207
4208void
4209xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004210 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 xmlChar *Pubid;
4212 xmlChar *Systemid;
4213
Daniel Veillarda07050d2003-10-19 14:46:32 +00004214 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004215 xmlParserInputPtr input = ctxt->input;
4216 SHRINK;
4217 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004218 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004219 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4220 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004221 return;
4222 }
4223 SKIP_BLANKS;
4224
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004227 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004228 return;
4229 }
William M. Brack76e95df2003-10-18 16:20:14 +00004230 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004231 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004232 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236
4237 /*
4238 * Parse the IDs.
4239 */
4240 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4241 SKIP_BLANKS;
4242
4243 if (RAW == '>') {
4244 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004245 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4246 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004247 }
4248 NEXT;
4249 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4250 (ctxt->sax->notationDecl != NULL))
4251 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4252 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004253 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 if (Systemid != NULL) xmlFree(Systemid);
4256 if (Pubid != NULL) xmlFree(Pubid);
4257 }
4258}
4259
4260/**
4261 * xmlParseEntityDecl:
4262 * @ctxt: an XML parser context
4263 *
4264 * parse <!ENTITY declarations
4265 *
4266 * [70] EntityDecl ::= GEDecl | PEDecl
4267 *
4268 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4269 *
4270 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4271 *
4272 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4273 *
4274 * [74] PEDef ::= EntityValue | ExternalID
4275 *
4276 * [76] NDataDecl ::= S 'NDATA' S Name
4277 *
4278 * [ VC: Notation Declared ]
4279 * The Name must match the declared name of a notation.
4280 */
4281
4282void
4283xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004284 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 xmlChar *value = NULL;
4286 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004287 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 int isParameter = 0;
4289 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004290 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004291
4292 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004293 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004294 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 SHRINK;
4296 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004297 skipped = SKIP_BLANKS;
4298 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4300 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302
4303 if (RAW == '%') {
4304 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004305 skipped = SKIP_BLANKS;
4306 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4308 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004309 }
Owen Taylor3473f882001-02-23 17:55:21 +00004310 isParameter = 1;
4311 }
4312
Daniel Veillard76d66f42001-05-16 21:05:17 +00004313 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004314 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004315 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4316 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004317 return;
4318 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004319 skipped = SKIP_BLANKS;
4320 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4322 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Owen Taylor3473f882001-02-23 17:55:21 +00004324
Daniel Veillardf5582f12002-06-11 10:08:16 +00004325 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004326 /*
4327 * handle the various case of definitions...
4328 */
4329 if (isParameter) {
4330 if ((RAW == '"') || (RAW == '\'')) {
4331 value = xmlParseEntityValue(ctxt, &orig);
4332 if (value) {
4333 if ((ctxt->sax != NULL) &&
4334 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4335 ctxt->sax->entityDecl(ctxt->userData, name,
4336 XML_INTERNAL_PARAMETER_ENTITY,
4337 NULL, NULL, value);
4338 }
4339 } else {
4340 URI = xmlParseExternalID(ctxt, &literal, 1);
4341 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004342 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 if (URI) {
4345 xmlURIPtr uri;
4346
4347 uri = xmlParseURI((const char *) URI);
4348 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004349 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4350 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004351 /*
4352 * This really ought to be a well formedness error
4353 * but the XML Core WG decided otherwise c.f. issue
4354 * E26 of the XML erratas.
4355 */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 } else {
4357 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004358 /*
4359 * Okay this is foolish to block those but not
4360 * invalid URIs.
4361 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004362 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004363 } else {
4364 if ((ctxt->sax != NULL) &&
4365 (!ctxt->disableSAX) &&
4366 (ctxt->sax->entityDecl != NULL))
4367 ctxt->sax->entityDecl(ctxt->userData, name,
4368 XML_EXTERNAL_PARAMETER_ENTITY,
4369 literal, URI, NULL);
4370 }
4371 xmlFreeURI(uri);
4372 }
4373 }
4374 }
4375 } else {
4376 if ((RAW == '"') || (RAW == '\'')) {
4377 value = xmlParseEntityValue(ctxt, &orig);
4378 if ((ctxt->sax != NULL) &&
4379 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4380 ctxt->sax->entityDecl(ctxt->userData, name,
4381 XML_INTERNAL_GENERAL_ENTITY,
4382 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004383 /*
4384 * For expat compatibility in SAX mode.
4385 */
4386 if ((ctxt->myDoc == NULL) ||
4387 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4388 if (ctxt->myDoc == NULL) {
4389 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4390 }
4391 if (ctxt->myDoc->intSubset == NULL)
4392 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4393 BAD_CAST "fake", NULL, NULL);
4394
Daniel Veillard1af9a412003-08-20 22:54:39 +00004395 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4396 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004397 }
Owen Taylor3473f882001-02-23 17:55:21 +00004398 } else {
4399 URI = xmlParseExternalID(ctxt, &literal, 1);
4400 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004401 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 }
4403 if (URI) {
4404 xmlURIPtr uri;
4405
4406 uri = xmlParseURI((const char *)URI);
4407 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004408 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4409 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004410 /*
4411 * This really ought to be a well formedness error
4412 * but the XML Core WG decided otherwise c.f. issue
4413 * E26 of the XML erratas.
4414 */
Owen Taylor3473f882001-02-23 17:55:21 +00004415 } else {
4416 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004417 /*
4418 * Okay this is foolish to block those but not
4419 * invalid URIs.
4420 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 }
4423 xmlFreeURI(uri);
4424 }
4425 }
William M. Brack76e95df2003-10-18 16:20:14 +00004426 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4428 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004431 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004432 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004433 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004434 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4435 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004438 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4440 (ctxt->sax->unparsedEntityDecl != NULL))
4441 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4442 literal, URI, ndata);
4443 } else {
4444 if ((ctxt->sax != NULL) &&
4445 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4446 ctxt->sax->entityDecl(ctxt->userData, name,
4447 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4448 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004449 /*
4450 * For expat compatibility in SAX mode.
4451 * assuming the entity repalcement was asked for
4452 */
4453 if ((ctxt->replaceEntities != 0) &&
4454 ((ctxt->myDoc == NULL) ||
4455 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4456 if (ctxt->myDoc == NULL) {
4457 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4458 }
4459
4460 if (ctxt->myDoc->intSubset == NULL)
4461 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4462 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004463 xmlSAX2EntityDecl(ctxt, name,
4464 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4465 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004466 }
Owen Taylor3473f882001-02-23 17:55:21 +00004467 }
4468 }
4469 }
4470 SKIP_BLANKS;
4471 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004472 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004473 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004474 } else {
4475 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004476 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4477 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004478 }
4479 NEXT;
4480 }
4481 if (orig != NULL) {
4482 /*
4483 * Ugly mechanism to save the raw entity value.
4484 */
4485 xmlEntityPtr cur = NULL;
4486
4487 if (isParameter) {
4488 if ((ctxt->sax != NULL) &&
4489 (ctxt->sax->getParameterEntity != NULL))
4490 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4491 } else {
4492 if ((ctxt->sax != NULL) &&
4493 (ctxt->sax->getEntity != NULL))
4494 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004495 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004496 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004497 }
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 if (cur != NULL) {
4500 if (cur->orig != NULL)
4501 xmlFree(orig);
4502 else
4503 cur->orig = orig;
4504 } else
4505 xmlFree(orig);
4506 }
Owen Taylor3473f882001-02-23 17:55:21 +00004507 if (value != NULL) xmlFree(value);
4508 if (URI != NULL) xmlFree(URI);
4509 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004510 }
4511}
4512
4513/**
4514 * xmlParseDefaultDecl:
4515 * @ctxt: an XML parser context
4516 * @value: Receive a possible fixed default value for the attribute
4517 *
4518 * Parse an attribute default declaration
4519 *
4520 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4521 *
4522 * [ VC: Required Attribute ]
4523 * if the default declaration is the keyword #REQUIRED, then the
4524 * attribute must be specified for all elements of the type in the
4525 * attribute-list declaration.
4526 *
4527 * [ VC: Attribute Default Legal ]
4528 * The declared default value must meet the lexical constraints of
4529 * the declared attribute type c.f. xmlValidateAttributeDecl()
4530 *
4531 * [ VC: Fixed Attribute Default ]
4532 * if an attribute has a default value declared with the #FIXED
4533 * keyword, instances of that attribute must match the default value.
4534 *
4535 * [ WFC: No < in Attribute Values ]
4536 * handled in xmlParseAttValue()
4537 *
4538 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4539 * or XML_ATTRIBUTE_FIXED.
4540 */
4541
4542int
4543xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4544 int val;
4545 xmlChar *ret;
4546
4547 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004548 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 SKIP(9);
4550 return(XML_ATTRIBUTE_REQUIRED);
4551 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004552 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004553 SKIP(8);
4554 return(XML_ATTRIBUTE_IMPLIED);
4555 }
4556 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004557 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004558 SKIP(6);
4559 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004560 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004561 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4562 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004563 }
4564 SKIP_BLANKS;
4565 }
4566 ret = xmlParseAttValue(ctxt);
4567 ctxt->instate = XML_PARSER_DTD;
4568 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004569 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004570 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004571 } else
4572 *value = ret;
4573 return(val);
4574}
4575
4576/**
4577 * xmlParseNotationType:
4578 * @ctxt: an XML parser context
4579 *
4580 * parse an Notation attribute type.
4581 *
4582 * Note: the leading 'NOTATION' S part has already being parsed...
4583 *
4584 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4585 *
4586 * [ VC: Notation Attributes ]
4587 * Values of this type must match one of the notation names included
4588 * in the declaration; all notation names in the declaration must be declared.
4589 *
4590 * Returns: the notation attribute tree built while parsing
4591 */
4592
4593xmlEnumerationPtr
4594xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004595 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004596 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4597
4598 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 return(NULL);
4601 }
4602 SHRINK;
4603 do {
4604 NEXT;
4605 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004606 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004607 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004608 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4609 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004610 return(ret);
4611 }
4612 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004613 if (cur == NULL) return(ret);
4614 if (last == NULL) ret = last = cur;
4615 else {
4616 last->next = cur;
4617 last = cur;
4618 }
4619 SKIP_BLANKS;
4620 } while (RAW == '|');
4621 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004622 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004623 if ((last != NULL) && (last != ret))
4624 xmlFreeEnumeration(last);
4625 return(ret);
4626 }
4627 NEXT;
4628 return(ret);
4629}
4630
4631/**
4632 * xmlParseEnumerationType:
4633 * @ctxt: an XML parser context
4634 *
4635 * parse an Enumeration attribute type.
4636 *
4637 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4638 *
4639 * [ VC: Enumeration ]
4640 * Values of this type must match one of the Nmtoken tokens in
4641 * the declaration
4642 *
4643 * Returns: the enumeration attribute tree built while parsing
4644 */
4645
4646xmlEnumerationPtr
4647xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4648 xmlChar *name;
4649 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4650
4651 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004652 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004653 return(NULL);
4654 }
4655 SHRINK;
4656 do {
4657 NEXT;
4658 SKIP_BLANKS;
4659 name = xmlParseNmtoken(ctxt);
4660 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004661 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004662 return(ret);
4663 }
4664 cur = xmlCreateEnumeration(name);
4665 xmlFree(name);
4666 if (cur == NULL) return(ret);
4667 if (last == NULL) ret = last = cur;
4668 else {
4669 last->next = cur;
4670 last = cur;
4671 }
4672 SKIP_BLANKS;
4673 } while (RAW == '|');
4674 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004675 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004676 return(ret);
4677 }
4678 NEXT;
4679 return(ret);
4680}
4681
4682/**
4683 * xmlParseEnumeratedType:
4684 * @ctxt: an XML parser context
4685 * @tree: the enumeration tree built while parsing
4686 *
4687 * parse an Enumerated attribute type.
4688 *
4689 * [57] EnumeratedType ::= NotationType | Enumeration
4690 *
4691 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4692 *
4693 *
4694 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4695 */
4696
4697int
4698xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004699 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004701 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004704 return(0);
4705 }
4706 SKIP_BLANKS;
4707 *tree = xmlParseNotationType(ctxt);
4708 if (*tree == NULL) return(0);
4709 return(XML_ATTRIBUTE_NOTATION);
4710 }
4711 *tree = xmlParseEnumerationType(ctxt);
4712 if (*tree == NULL) return(0);
4713 return(XML_ATTRIBUTE_ENUMERATION);
4714}
4715
4716/**
4717 * xmlParseAttributeType:
4718 * @ctxt: an XML parser context
4719 * @tree: the enumeration tree built while parsing
4720 *
4721 * parse the Attribute list def for an element
4722 *
4723 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4724 *
4725 * [55] StringType ::= 'CDATA'
4726 *
4727 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4728 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4729 *
4730 * Validity constraints for attribute values syntax are checked in
4731 * xmlValidateAttributeValue()
4732 *
4733 * [ VC: ID ]
4734 * Values of type ID must match the Name production. A name must not
4735 * appear more than once in an XML document as a value of this type;
4736 * i.e., ID values must uniquely identify the elements which bear them.
4737 *
4738 * [ VC: One ID per Element Type ]
4739 * No element type may have more than one ID attribute specified.
4740 *
4741 * [ VC: ID Attribute Default ]
4742 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4743 *
4744 * [ VC: IDREF ]
4745 * Values of type IDREF must match the Name production, and values
4746 * of type IDREFS must match Names; each IDREF Name must match the value
4747 * of an ID attribute on some element in the XML document; i.e. IDREF
4748 * values must match the value of some ID attribute.
4749 *
4750 * [ VC: Entity Name ]
4751 * Values of type ENTITY must match the Name production, values
4752 * of type ENTITIES must match Names; each Entity Name must match the
4753 * name of an unparsed entity declared in the DTD.
4754 *
4755 * [ VC: Name Token ]
4756 * Values of type NMTOKEN must match the Nmtoken production; values
4757 * of type NMTOKENS must match Nmtokens.
4758 *
4759 * Returns the attribute type
4760 */
4761int
4762xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4763 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004764 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004765 SKIP(5);
4766 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004767 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004768 SKIP(6);
4769 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004770 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004771 SKIP(5);
4772 return(XML_ATTRIBUTE_IDREF);
4773 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4774 SKIP(2);
4775 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004776 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004777 SKIP(6);
4778 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004779 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004780 SKIP(8);
4781 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004782 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004783 SKIP(8);
4784 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004785 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004786 SKIP(7);
4787 return(XML_ATTRIBUTE_NMTOKEN);
4788 }
4789 return(xmlParseEnumeratedType(ctxt, tree));
4790}
4791
4792/**
4793 * xmlParseAttributeListDecl:
4794 * @ctxt: an XML parser context
4795 *
4796 * : parse the Attribute list def for an element
4797 *
4798 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4799 *
4800 * [53] AttDef ::= S Name S AttType S DefaultDecl
4801 *
4802 */
4803void
4804xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004805 const xmlChar *elemName;
4806 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 xmlEnumerationPtr tree;
4808
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 xmlParserInputPtr input = ctxt->input;
4811
4812 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004813 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004814 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004815 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004816 }
4817 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004818 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4821 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004822 return;
4823 }
4824 SKIP_BLANKS;
4825 GROW;
4826 while (RAW != '>') {
4827 const xmlChar *check = CUR_PTR;
4828 int type;
4829 int def;
4830 xmlChar *defaultValue = NULL;
4831
4832 GROW;
4833 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004834 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004835 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004836 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4837 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004838 break;
4839 }
4840 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004841 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004842 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004843 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004844 if (defaultValue != NULL)
4845 xmlFree(defaultValue);
4846 break;
4847 }
4848 SKIP_BLANKS;
4849
4850 type = xmlParseAttributeType(ctxt, &tree);
4851 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004852 if (defaultValue != NULL)
4853 xmlFree(defaultValue);
4854 break;
4855 }
4856
4857 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004858 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004859 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4860 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004861 if (defaultValue != NULL)
4862 xmlFree(defaultValue);
4863 if (tree != NULL)
4864 xmlFreeEnumeration(tree);
4865 break;
4866 }
4867 SKIP_BLANKS;
4868
4869 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4870 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004871 if (defaultValue != NULL)
4872 xmlFree(defaultValue);
4873 if (tree != NULL)
4874 xmlFreeEnumeration(tree);
4875 break;
4876 }
4877
4878 GROW;
4879 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004880 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004882 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004883 if (defaultValue != NULL)
4884 xmlFree(defaultValue);
4885 if (tree != NULL)
4886 xmlFreeEnumeration(tree);
4887 break;
4888 }
4889 SKIP_BLANKS;
4890 }
4891 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004892 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4893 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 if (defaultValue != NULL)
4895 xmlFree(defaultValue);
4896 if (tree != NULL)
4897 xmlFreeEnumeration(tree);
4898 break;
4899 }
4900 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4901 (ctxt->sax->attributeDecl != NULL))
4902 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4903 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004904 else if (tree != NULL)
4905 xmlFreeEnumeration(tree);
4906
4907 if ((ctxt->sax2) && (defaultValue != NULL) &&
4908 (def != XML_ATTRIBUTE_IMPLIED) &&
4909 (def != XML_ATTRIBUTE_REQUIRED)) {
4910 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4911 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004912 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4913 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4914 }
Owen Taylor3473f882001-02-23 17:55:21 +00004915 if (defaultValue != NULL)
4916 xmlFree(defaultValue);
4917 GROW;
4918 }
4919 if (RAW == '>') {
4920 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004921 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4922 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 NEXT;
4925 }
Owen Taylor3473f882001-02-23 17:55:21 +00004926 }
4927}
4928
4929/**
4930 * xmlParseElementMixedContentDecl:
4931 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004932 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004933 *
4934 * parse the declaration for a Mixed Element content
4935 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4936 *
4937 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4938 * '(' S? '#PCDATA' S? ')'
4939 *
4940 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4941 *
4942 * [ VC: No Duplicate Types ]
4943 * The same name must not appear more than once in a single
4944 * mixed-content declaration.
4945 *
4946 * returns: the list of the xmlElementContentPtr describing the element choices
4947 */
4948xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004949xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004950 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004951 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004952
4953 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004954 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004955 SKIP(7);
4956 SKIP_BLANKS;
4957 SHRINK;
4958 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004960 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4961"Element content declaration doesn't start and stop in the same entity\n",
4962 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004963 }
Owen Taylor3473f882001-02-23 17:55:21 +00004964 NEXT;
4965 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4966 if (RAW == '*') {
4967 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4968 NEXT;
4969 }
4970 return(ret);
4971 }
4972 if ((RAW == '(') || (RAW == '|')) {
4973 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4974 if (ret == NULL) return(NULL);
4975 }
4976 while (RAW == '|') {
4977 NEXT;
4978 if (elem == NULL) {
4979 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4980 if (ret == NULL) return(NULL);
4981 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004982 if (cur != NULL)
4983 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004984 cur = ret;
4985 } else {
4986 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4987 if (n == NULL) return(NULL);
4988 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004989 if (n->c1 != NULL)
4990 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004991 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004992 if (n != NULL)
4993 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004994 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004995 }
4996 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004997 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004999 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005000 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005001 xmlFreeElementContent(cur);
5002 return(NULL);
5003 }
5004 SKIP_BLANKS;
5005 GROW;
5006 }
5007 if ((RAW == ')') && (NXT(1) == '*')) {
5008 if (elem != NULL) {
5009 cur->c2 = xmlNewElementContent(elem,
5010 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005011 if (cur->c2 != NULL)
5012 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 }
5014 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005015 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005016 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5017"Element content declaration doesn't start and stop in the same entity\n",
5018 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005019 }
Owen Taylor3473f882001-02-23 17:55:21 +00005020 SKIP(2);
5021 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005023 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 return(NULL);
5025 }
5026
5027 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005028 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
5030 return(ret);
5031}
5032
5033/**
5034 * xmlParseElementChildrenContentDecl:
5035 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005036 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005037 *
5038 * parse the declaration for a Mixed Element content
5039 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5040 *
5041 *
5042 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5043 *
5044 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5045 *
5046 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5047 *
5048 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5049 *
5050 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5051 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005052 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005053 * opening or closing parentheses in a choice, seq, or Mixed
5054 * construct is contained in the replacement text for a parameter
5055 * entity, both must be contained in the same replacement text. For
5056 * interoperability, if a parameter-entity reference appears in a
5057 * choice, seq, or Mixed construct, its replacement text should not
5058 * be empty, and neither the first nor last non-blank character of
5059 * the replacement text should be a connector (| or ,).
5060 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005061 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005062 * hierarchy.
5063 */
5064xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005066 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005067 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 xmlChar type = 0;
5069
5070 SKIP_BLANKS;
5071 GROW;
5072 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005073 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005074
Owen Taylor3473f882001-02-23 17:55:21 +00005075 /* Recurse on first child */
5076 NEXT;
5077 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005078 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005079 SKIP_BLANKS;
5080 GROW;
5081 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005082 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005083 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005084 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005085 return(NULL);
5086 }
5087 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005088 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005089 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005090 return(NULL);
5091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 GROW;
5093 if (RAW == '?') {
5094 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5095 NEXT;
5096 } else if (RAW == '*') {
5097 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5098 NEXT;
5099 } else if (RAW == '+') {
5100 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5101 NEXT;
5102 } else {
5103 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5104 }
Owen Taylor3473f882001-02-23 17:55:21 +00005105 GROW;
5106 }
5107 SKIP_BLANKS;
5108 SHRINK;
5109 while (RAW != ')') {
5110 /*
5111 * Each loop we parse one separator and one element.
5112 */
5113 if (RAW == ',') {
5114 if (type == 0) type = CUR;
5115
5116 /*
5117 * Detect "Name | Name , Name" error
5118 */
5119 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005120 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005121 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005123 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005124 xmlFreeElementContent(last);
5125 if (ret != NULL)
5126 xmlFreeElementContent(ret);
5127 return(NULL);
5128 }
5129 NEXT;
5130
5131 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5132 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005133 if ((last != NULL) && (last != ret))
5134 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005135 xmlFreeElementContent(ret);
5136 return(NULL);
5137 }
5138 if (last == NULL) {
5139 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005140 if (ret != NULL)
5141 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005142 ret = cur = op;
5143 } else {
5144 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005145 if (op != NULL)
5146 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005147 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005148 if (last != NULL)
5149 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005150 cur =op;
5151 last = NULL;
5152 }
5153 } else if (RAW == '|') {
5154 if (type == 0) type = CUR;
5155
5156 /*
5157 * Detect "Name , Name | Name" error
5158 */
5159 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005160 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005161 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005162 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005163 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005164 xmlFreeElementContent(last);
5165 if (ret != NULL)
5166 xmlFreeElementContent(ret);
5167 return(NULL);
5168 }
5169 NEXT;
5170
5171 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5172 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005173 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005174 xmlFreeElementContent(last);
5175 if (ret != NULL)
5176 xmlFreeElementContent(ret);
5177 return(NULL);
5178 }
5179 if (last == NULL) {
5180 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005181 if (ret != NULL)
5182 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005183 ret = cur = op;
5184 } else {
5185 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005186 if (op != NULL)
5187 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005188 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005189 if (last != NULL)
5190 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005191 cur =op;
5192 last = NULL;
5193 }
5194 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005195 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if (ret != NULL)
5197 xmlFreeElementContent(ret);
5198 return(NULL);
5199 }
5200 GROW;
5201 SKIP_BLANKS;
5202 GROW;
5203 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005205 /* Recurse on second child */
5206 NEXT;
5207 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 SKIP_BLANKS;
5210 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005211 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005212 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005213 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005214 if (ret != NULL)
5215 xmlFreeElementContent(ret);
5216 return(NULL);
5217 }
5218 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (RAW == '?') {
5220 last->ocur = XML_ELEMENT_CONTENT_OPT;
5221 NEXT;
5222 } else if (RAW == '*') {
5223 last->ocur = XML_ELEMENT_CONTENT_MULT;
5224 NEXT;
5225 } else if (RAW == '+') {
5226 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5227 NEXT;
5228 } else {
5229 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5230 }
5231 }
5232 SKIP_BLANKS;
5233 GROW;
5234 }
5235 if ((cur != NULL) && (last != NULL)) {
5236 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005237 if (last != NULL)
5238 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005240 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005241 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5242"Element content declaration doesn't start and stop in the same entity\n",
5243 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005244 }
Owen Taylor3473f882001-02-23 17:55:21 +00005245 NEXT;
5246 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005247 if (ret != NULL)
5248 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005249 NEXT;
5250 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005251 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005252 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005253 cur = ret;
5254 /*
5255 * Some normalization:
5256 * (a | b* | c?)* == (a | b | c)*
5257 */
5258 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5259 if ((cur->c1 != NULL) &&
5260 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5261 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5262 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5263 if ((cur->c2 != NULL) &&
5264 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5265 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5266 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5267 cur = cur->c2;
5268 }
5269 }
Owen Taylor3473f882001-02-23 17:55:21 +00005270 NEXT;
5271 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005272 if (ret != NULL) {
5273 int found = 0;
5274
Daniel Veillarde470df72001-04-18 21:41:07 +00005275 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005276 /*
5277 * Some normalization:
5278 * (a | b*)+ == (a | b)*
5279 * (a | b?)+ == (a | b)*
5280 */
5281 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5282 if ((cur->c1 != NULL) &&
5283 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5284 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5285 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5286 found = 1;
5287 }
5288 if ((cur->c2 != NULL) &&
5289 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5290 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5291 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5292 found = 1;
5293 }
5294 cur = cur->c2;
5295 }
5296 if (found)
5297 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5298 }
Owen Taylor3473f882001-02-23 17:55:21 +00005299 NEXT;
5300 }
5301 return(ret);
5302}
5303
5304/**
5305 * xmlParseElementContentDecl:
5306 * @ctxt: an XML parser context
5307 * @name: the name of the element being defined.
5308 * @result: the Element Content pointer will be stored here if any
5309 *
5310 * parse the declaration for an Element content either Mixed or Children,
5311 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5312 *
5313 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5314 *
5315 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5316 */
5317
5318int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005319xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005320 xmlElementContentPtr *result) {
5321
5322 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005323 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005324 int res;
5325
5326 *result = NULL;
5327
5328 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005329 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005330 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005331 return(-1);
5332 }
5333 NEXT;
5334 GROW;
5335 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005336 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005337 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 res = XML_ELEMENT_TYPE_MIXED;
5339 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005340 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005341 res = XML_ELEMENT_TYPE_ELEMENT;
5342 }
Owen Taylor3473f882001-02-23 17:55:21 +00005343 SKIP_BLANKS;
5344 *result = tree;
5345 return(res);
5346}
5347
5348/**
5349 * xmlParseElementDecl:
5350 * @ctxt: an XML parser context
5351 *
5352 * parse an Element declaration.
5353 *
5354 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5355 *
5356 * [ VC: Unique Element Type Declaration ]
5357 * No element type may be declared more than once
5358 *
5359 * Returns the type of the element, or -1 in case of error
5360 */
5361int
5362xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005363 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005364 int ret = -1;
5365 xmlElementContentPtr content = NULL;
5366
5367 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005368 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005369 xmlParserInputPtr input = ctxt->input;
5370
5371 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005372 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5374 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005375 }
5376 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005377 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005378 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005379 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5380 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005381 return(-1);
5382 }
5383 while ((RAW == 0) && (ctxt->inputNr > 1))
5384 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005385 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5387 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005388 }
5389 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005390 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005391 SKIP(5);
5392 /*
5393 * Element must always be empty.
5394 */
5395 ret = XML_ELEMENT_TYPE_EMPTY;
5396 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5397 (NXT(2) == 'Y')) {
5398 SKIP(3);
5399 /*
5400 * Element is a generic container.
5401 */
5402 ret = XML_ELEMENT_TYPE_ANY;
5403 } else if (RAW == '(') {
5404 ret = xmlParseElementContentDecl(ctxt, name, &content);
5405 } else {
5406 /*
5407 * [ WFC: PEs in Internal Subset ] error handling.
5408 */
5409 if ((RAW == '%') && (ctxt->external == 0) &&
5410 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005411 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005412 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005413 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005414 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005415 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5416 }
Owen Taylor3473f882001-02-23 17:55:21 +00005417 return(-1);
5418 }
5419
5420 SKIP_BLANKS;
5421 /*
5422 * Pop-up of finished entities.
5423 */
5424 while ((RAW == 0) && (ctxt->inputNr > 1))
5425 xmlPopInput(ctxt);
5426 SKIP_BLANKS;
5427
5428 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005429 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005430 } else {
5431 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005432 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5433 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005434 }
5435
5436 NEXT;
5437 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5438 (ctxt->sax->elementDecl != NULL))
5439 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5440 content);
5441 }
5442 if (content != NULL) {
5443 xmlFreeElementContent(content);
5444 }
Owen Taylor3473f882001-02-23 17:55:21 +00005445 }
5446 return(ret);
5447}
5448
5449/**
Owen Taylor3473f882001-02-23 17:55:21 +00005450 * xmlParseConditionalSections
5451 * @ctxt: an XML parser context
5452 *
5453 * [61] conditionalSect ::= includeSect | ignoreSect
5454 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5455 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5456 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5457 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5458 */
5459
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005460static void
Owen Taylor3473f882001-02-23 17:55:21 +00005461xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5462 SKIP(3);
5463 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005464 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005465 SKIP(7);
5466 SKIP_BLANKS;
5467 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 } else {
5470 NEXT;
5471 }
5472 if (xmlParserDebugEntities) {
5473 if ((ctxt->input != NULL) && (ctxt->input->filename))
5474 xmlGenericError(xmlGenericErrorContext,
5475 "%s(%d): ", ctxt->input->filename,
5476 ctxt->input->line);
5477 xmlGenericError(xmlGenericErrorContext,
5478 "Entering INCLUDE Conditional Section\n");
5479 }
5480
5481 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5482 (NXT(2) != '>'))) {
5483 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005484 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005485
5486 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5487 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005488 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005489 NEXT;
5490 } else if (RAW == '%') {
5491 xmlParsePEReference(ctxt);
5492 } else
5493 xmlParseMarkupDecl(ctxt);
5494
5495 /*
5496 * Pop-up of finished entities.
5497 */
5498 while ((RAW == 0) && (ctxt->inputNr > 1))
5499 xmlPopInput(ctxt);
5500
Daniel Veillardfdc91562002-07-01 21:52:03 +00005501 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005502 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 break;
5504 }
5505 }
5506 if (xmlParserDebugEntities) {
5507 if ((ctxt->input != NULL) && (ctxt->input->filename))
5508 xmlGenericError(xmlGenericErrorContext,
5509 "%s(%d): ", ctxt->input->filename,
5510 ctxt->input->line);
5511 xmlGenericError(xmlGenericErrorContext,
5512 "Leaving INCLUDE Conditional Section\n");
5513 }
5514
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005517 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005518 int depth = 0;
5519
5520 SKIP(6);
5521 SKIP_BLANKS;
5522 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005523 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005524 } else {
5525 NEXT;
5526 }
5527 if (xmlParserDebugEntities) {
5528 if ((ctxt->input != NULL) && (ctxt->input->filename))
5529 xmlGenericError(xmlGenericErrorContext,
5530 "%s(%d): ", ctxt->input->filename,
5531 ctxt->input->line);
5532 xmlGenericError(xmlGenericErrorContext,
5533 "Entering IGNORE Conditional Section\n");
5534 }
5535
5536 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005537 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005538 * But disable SAX event generating DTD building in the meantime
5539 */
5540 state = ctxt->disableSAX;
5541 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005543 ctxt->instate = XML_PARSER_IGNORE;
5544
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005545 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005546 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5547 depth++;
5548 SKIP(3);
5549 continue;
5550 }
5551 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5552 if (--depth >= 0) SKIP(3);
5553 continue;
5554 }
5555 NEXT;
5556 continue;
5557 }
5558
5559 ctxt->disableSAX = state;
5560 ctxt->instate = instate;
5561
5562 if (xmlParserDebugEntities) {
5563 if ((ctxt->input != NULL) && (ctxt->input->filename))
5564 xmlGenericError(xmlGenericErrorContext,
5565 "%s(%d): ", ctxt->input->filename,
5566 ctxt->input->line);
5567 xmlGenericError(xmlGenericErrorContext,
5568 "Leaving IGNORE Conditional Section\n");
5569 }
5570
5571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005572 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005573 }
5574
5575 if (RAW == 0)
5576 SHRINK;
5577
5578 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 } else {
5581 SKIP(3);
5582 }
5583}
5584
5585/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005586 * xmlParseMarkupDecl:
5587 * @ctxt: an XML parser context
5588 *
5589 * parse Markup declarations
5590 *
5591 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5592 * NotationDecl | PI | Comment
5593 *
5594 * [ VC: Proper Declaration/PE Nesting ]
5595 * Parameter-entity replacement text must be properly nested with
5596 * markup declarations. That is to say, if either the first character
5597 * or the last character of a markup declaration (markupdecl above) is
5598 * contained in the replacement text for a parameter-entity reference,
5599 * both must be contained in the same replacement text.
5600 *
5601 * [ WFC: PEs in Internal Subset ]
5602 * In the internal DTD subset, parameter-entity references can occur
5603 * only where markup declarations can occur, not within markup declarations.
5604 * (This does not apply to references that occur in external parameter
5605 * entities or to the external subset.)
5606 */
5607void
5608xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5609 GROW;
5610 xmlParseElementDecl(ctxt);
5611 xmlParseAttributeListDecl(ctxt);
5612 xmlParseEntityDecl(ctxt);
5613 xmlParseNotationDecl(ctxt);
5614 xmlParsePI(ctxt);
5615 xmlParseComment(ctxt);
5616 /*
5617 * This is only for internal subset. On external entities,
5618 * the replacement is done before parsing stage
5619 */
5620 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5621 xmlParsePEReference(ctxt);
5622
5623 /*
5624 * Conditional sections are allowed from entities included
5625 * by PE References in the internal subset.
5626 */
5627 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5628 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5629 xmlParseConditionalSections(ctxt);
5630 }
5631 }
5632
5633 ctxt->instate = XML_PARSER_DTD;
5634}
5635
5636/**
5637 * xmlParseTextDecl:
5638 * @ctxt: an XML parser context
5639 *
5640 * parse an XML declaration header for external entities
5641 *
5642 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5643 *
5644 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5645 */
5646
5647void
5648xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5649 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005650 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005651
5652 /*
5653 * We know that '<?xml' is here.
5654 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005655 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005656 SKIP(5);
5657 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005658 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005659 return;
5660 }
5661
William M. Brack76e95df2003-10-18 16:20:14 +00005662 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005663 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5664 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005665 }
5666 SKIP_BLANKS;
5667
5668 /*
5669 * We may have the VersionInfo here.
5670 */
5671 version = xmlParseVersionInfo(ctxt);
5672 if (version == NULL)
5673 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005674 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005675 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005676 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5677 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005678 }
5679 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005680 ctxt->input->version = version;
5681
5682 /*
5683 * We must have the encoding declaration
5684 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005685 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005686 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5687 /*
5688 * The XML REC instructs us to stop parsing right here
5689 */
5690 return;
5691 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005692 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5693 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5694 "Missing encoding in text declaration\n");
5695 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005696
5697 SKIP_BLANKS;
5698 if ((RAW == '?') && (NXT(1) == '>')) {
5699 SKIP(2);
5700 } else if (RAW == '>') {
5701 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005702 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005703 NEXT;
5704 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005705 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005706 MOVETO_ENDTAG(CUR_PTR);
5707 NEXT;
5708 }
5709}
5710
5711/**
Owen Taylor3473f882001-02-23 17:55:21 +00005712 * xmlParseExternalSubset:
5713 * @ctxt: an XML parser context
5714 * @ExternalID: the external identifier
5715 * @SystemID: the system identifier (or URL)
5716 *
5717 * parse Markup declarations from an external subset
5718 *
5719 * [30] extSubset ::= textDecl? extSubsetDecl
5720 *
5721 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5722 */
5723void
5724xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5725 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005726 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005728 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005729 xmlParseTextDecl(ctxt);
5730 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5731 /*
5732 * The XML REC instructs us to stop parsing right here
5733 */
5734 ctxt->instate = XML_PARSER_EOF;
5735 return;
5736 }
5737 }
5738 if (ctxt->myDoc == NULL) {
5739 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5740 }
5741 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5742 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5743
5744 ctxt->instate = XML_PARSER_DTD;
5745 ctxt->external = 1;
5746 while (((RAW == '<') && (NXT(1) == '?')) ||
5747 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005748 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005749 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005750 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005751
5752 GROW;
5753 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5754 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005755 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005756 NEXT;
5757 } else if (RAW == '%') {
5758 xmlParsePEReference(ctxt);
5759 } else
5760 xmlParseMarkupDecl(ctxt);
5761
5762 /*
5763 * Pop-up of finished entities.
5764 */
5765 while ((RAW == 0) && (ctxt->inputNr > 1))
5766 xmlPopInput(ctxt);
5767
Daniel Veillardfdc91562002-07-01 21:52:03 +00005768 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 break;
5771 }
5772 }
5773
5774 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005775 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 }
5777
5778}
5779
5780/**
5781 * xmlParseReference:
5782 * @ctxt: an XML parser context
5783 *
5784 * parse and handle entity references in content, depending on the SAX
5785 * interface, this may end-up in a call to character() if this is a
5786 * CharRef, a predefined entity, if there is no reference() callback.
5787 * or if the parser was asked to switch to that mode.
5788 *
5789 * [67] Reference ::= EntityRef | CharRef
5790 */
5791void
5792xmlParseReference(xmlParserCtxtPtr ctxt) {
5793 xmlEntityPtr ent;
5794 xmlChar *val;
5795 if (RAW != '&') return;
5796
5797 if (NXT(1) == '#') {
5798 int i = 0;
5799 xmlChar out[10];
5800 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005801 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005802
5803 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5804 /*
5805 * So we are using non-UTF-8 buffers
5806 * Check that the char fit on 8bits, if not
5807 * generate a CharRef.
5808 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005809 if (value <= 0xFF) {
5810 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005811 out[1] = 0;
5812 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5813 (!ctxt->disableSAX))
5814 ctxt->sax->characters(ctxt->userData, out, 1);
5815 } else {
5816 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005817 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005818 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005819 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005820 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5821 (!ctxt->disableSAX))
5822 ctxt->sax->reference(ctxt->userData, out);
5823 }
5824 } else {
5825 /*
5826 * Just encode the value in UTF-8
5827 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005828 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005829 out[i] = 0;
5830 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5831 (!ctxt->disableSAX))
5832 ctxt->sax->characters(ctxt->userData, out, i);
5833 }
5834 } else {
5835 ent = xmlParseEntityRef(ctxt);
5836 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005837 if (!ctxt->wellFormed)
5838 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005839 if ((ent->name != NULL) &&
5840 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5841 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005842 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005843
5844
5845 /*
5846 * The first reference to the entity trigger a parsing phase
5847 * where the ent->children is filled with the result from
5848 * the parsing.
5849 */
5850 if (ent->children == NULL) {
5851 xmlChar *value;
5852 value = ent->content;
5853
5854 /*
5855 * Check that this entity is well formed
5856 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005857 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005858 (value[1] == 0) && (value[0] == '<') &&
5859 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5860 /*
5861 * DONE: get definite answer on this !!!
5862 * Lots of entity decls are used to declare a single
5863 * char
5864 * <!ENTITY lt "<">
5865 * Which seems to be valid since
5866 * 2.4: The ampersand character (&) and the left angle
5867 * bracket (<) may appear in their literal form only
5868 * when used ... They are also legal within the literal
5869 * entity value of an internal entity declaration;i
5870 * see "4.3.2 Well-Formed Parsed Entities".
5871 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5872 * Looking at the OASIS test suite and James Clark
5873 * tests, this is broken. However the XML REC uses
5874 * it. Is the XML REC not well-formed ????
5875 * This is a hack to avoid this problem
5876 *
5877 * ANSWER: since lt gt amp .. are already defined,
5878 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005879 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005880 * is lousy but acceptable.
5881 */
5882 list = xmlNewDocText(ctxt->myDoc, value);
5883 if (list != NULL) {
5884 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5885 (ent->children == NULL)) {
5886 ent->children = list;
5887 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005888 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005889 list->parent = (xmlNodePtr) ent;
5890 } else {
5891 xmlFreeNodeList(list);
5892 }
5893 } else if (list != NULL) {
5894 xmlFreeNodeList(list);
5895 }
5896 } else {
5897 /*
5898 * 4.3.2: An internal general parsed entity is well-formed
5899 * if its replacement text matches the production labeled
5900 * content.
5901 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005902
5903 void *user_data;
5904 /*
5905 * This is a bit hackish but this seems the best
5906 * way to make sure both SAX and DOM entity support
5907 * behaves okay.
5908 */
5909 if (ctxt->userData == ctxt)
5910 user_data = NULL;
5911 else
5912 user_data = ctxt->userData;
5913
Owen Taylor3473f882001-02-23 17:55:21 +00005914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5915 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005916 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5917 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 ctxt->depth--;
5919 } else if (ent->etype ==
5920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5921 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005923 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005924 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 ctxt->depth--;
5926 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005927 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5929 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005930 }
5931 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005933 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005934 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005935 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5936 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005937 (ent->children == NULL)) {
5938 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005939 if (ctxt->replaceEntities) {
5940 /*
5941 * Prune it directly in the generated document
5942 * except for single text nodes.
5943 */
5944 if ((list->type == XML_TEXT_NODE) &&
5945 (list->next == NULL)) {
5946 list->parent = (xmlNodePtr) ent;
5947 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005948 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005949 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005950 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005951 while (list != NULL) {
5952 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005953 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005954 if (list->next == NULL)
5955 ent->last = list;
5956 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005957 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005958 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005959#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005960 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5961 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005962#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005963 }
5964 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005965 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005966 while (list != NULL) {
5967 list->parent = (xmlNodePtr) ent;
5968 if (list->next == NULL)
5969 ent->last = list;
5970 list = list->next;
5971 }
Owen Taylor3473f882001-02-23 17:55:21 +00005972 }
5973 } else {
5974 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005975 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005976 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005977 } else if ((ret != XML_ERR_OK) &&
5978 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005979 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005980 } else if (list != NULL) {
5981 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005982 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984 }
5985 }
5986 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5987 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5988 /*
5989 * Create a node.
5990 */
5991 ctxt->sax->reference(ctxt->userData, ent->name);
5992 return;
5993 } else if (ctxt->replaceEntities) {
5994 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5995 /*
5996 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005997 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005999 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006000 if ((list == NULL) && (ent->owner == 0)) {
6001 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006002 cur = ent->children;
6003 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006004 nw = xmlCopyNode(cur, 1);
6005 if (nw != NULL) {
6006 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006007 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006008 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006009 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006010 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006011 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006012 if (cur == ent->last)
6013 break;
6014 cur = cur->next;
6015 }
Daniel Veillard81273902003-09-30 00:43:48 +00006016#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006017 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006018 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006019#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006020 } else if (list == NULL) {
6021 xmlNodePtr nw = NULL, cur, next, last,
6022 firstChild = NULL;
6023 /*
6024 * Copy the entity child list and make it the new
6025 * entity child list. The goal is to make sure any
6026 * ID or REF referenced will be the one from the
6027 * document content and not the entity copy.
6028 */
6029 cur = ent->children;
6030 ent->children = NULL;
6031 last = ent->last;
6032 ent->last = NULL;
6033 while (cur != NULL) {
6034 next = cur->next;
6035 cur->next = NULL;
6036 cur->parent = NULL;
6037 nw = xmlCopyNode(cur, 1);
6038 if (nw != NULL) {
6039 nw->_private = cur->_private;
6040 if (firstChild == NULL){
6041 firstChild = cur;
6042 }
6043 xmlAddChild((xmlNodePtr) ent, nw);
6044 xmlAddChild(ctxt->node, cur);
6045 }
6046 if (cur == last)
6047 break;
6048 cur = next;
6049 }
6050 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006051#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006052 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6053 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006054#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006055 } else {
6056 /*
6057 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006058 * node with a possible previous text one which
6059 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006060 */
6061 if (ent->children->type == XML_TEXT_NODE)
6062 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
6063 if ((ent->last != ent->children) &&
6064 (ent->last->type == XML_TEXT_NODE))
6065 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
6066 xmlAddChildList(ctxt->node, ent->children);
6067 }
6068
Owen Taylor3473f882001-02-23 17:55:21 +00006069 /*
6070 * This is to avoid a nasty side effect, see
6071 * characters() in SAX.c
6072 */
6073 ctxt->nodemem = 0;
6074 ctxt->nodelen = 0;
6075 return;
6076 } else {
6077 /*
6078 * Probably running in SAX mode
6079 */
6080 xmlParserInputPtr input;
6081
6082 input = xmlNewEntityInputStream(ctxt, ent);
6083 xmlPushInput(ctxt, input);
6084 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006085 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6086 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006087 xmlParseTextDecl(ctxt);
6088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6089 /*
6090 * The XML REC instructs us to stop parsing right here
6091 */
6092 ctxt->instate = XML_PARSER_EOF;
6093 return;
6094 }
6095 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6097 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
6099 }
6100 return;
6101 }
6102 }
6103 } else {
6104 val = ent->content;
6105 if (val == NULL) return;
6106 /*
6107 * inline the entity.
6108 */
6109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6110 (!ctxt->disableSAX))
6111 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6112 }
6113 }
6114}
6115
6116/**
6117 * xmlParseEntityRef:
6118 * @ctxt: an XML parser context
6119 *
6120 * parse ENTITY references declarations
6121 *
6122 * [68] EntityRef ::= '&' Name ';'
6123 *
6124 * [ WFC: Entity Declared ]
6125 * In a document without any DTD, a document with only an internal DTD
6126 * subset which contains no parameter entity references, or a document
6127 * with "standalone='yes'", the Name given in the entity reference
6128 * must match that in an entity declaration, except that well-formed
6129 * documents need not declare any of the following entities: amp, lt,
6130 * gt, apos, quot. The declaration of a parameter entity must precede
6131 * any reference to it. Similarly, the declaration of a general entity
6132 * must precede any reference to it which appears in a default value in an
6133 * attribute-list declaration. Note that if entities are declared in the
6134 * external subset or in external parameter entities, a non-validating
6135 * processor is not obligated to read and process their declarations;
6136 * for such documents, the rule that an entity must be declared is a
6137 * well-formedness constraint only if standalone='yes'.
6138 *
6139 * [ WFC: Parsed Entity ]
6140 * An entity reference must not contain the name of an unparsed entity
6141 *
6142 * Returns the xmlEntityPtr if found, or NULL otherwise.
6143 */
6144xmlEntityPtr
6145xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006146 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006147 xmlEntityPtr ent = NULL;
6148
6149 GROW;
6150
6151 if (RAW == '&') {
6152 NEXT;
6153 name = xmlParseName(ctxt);
6154 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006155 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6156 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006157 } else {
6158 if (RAW == ';') {
6159 NEXT;
6160 /*
6161 * Ask first SAX for entity resolution, otherwise try the
6162 * predefined set.
6163 */
6164 if (ctxt->sax != NULL) {
6165 if (ctxt->sax->getEntity != NULL)
6166 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006167 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006168 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006169 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6170 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006171 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006172 }
Owen Taylor3473f882001-02-23 17:55:21 +00006173 }
6174 /*
6175 * [ WFC: Entity Declared ]
6176 * In a document without any DTD, a document with only an
6177 * internal DTD subset which contains no parameter entity
6178 * references, or a document with "standalone='yes'", the
6179 * Name given in the entity reference must match that in an
6180 * entity declaration, except that well-formed documents
6181 * need not declare any of the following entities: amp, lt,
6182 * gt, apos, quot.
6183 * The declaration of a parameter entity must precede any
6184 * reference to it.
6185 * Similarly, the declaration of a general entity must
6186 * precede any reference to it which appears in a default
6187 * value in an attribute-list declaration. Note that if
6188 * entities are declared in the external subset or in
6189 * external parameter entities, a non-validating processor
6190 * is not obligated to read and process their declarations;
6191 * for such documents, the rule that an entity must be
6192 * declared is a well-formedness constraint only if
6193 * standalone='yes'.
6194 */
6195 if (ent == NULL) {
6196 if ((ctxt->standalone == 1) ||
6197 ((ctxt->hasExternalSubset == 0) &&
6198 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006199 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006200 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006201 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006202 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006203 "Entity '%s' not defined\n", name);
6204 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006205 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207
6208 /*
6209 * [ WFC: Parsed Entity ]
6210 * An entity reference must not contain the name of an
6211 * unparsed entity
6212 */
6213 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006214 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006215 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 }
6217
6218 /*
6219 * [ WFC: No External Entity References ]
6220 * Attribute values cannot contain direct or indirect
6221 * entity references to external entities.
6222 */
6223 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6224 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006225 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6226 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 }
6228 /*
6229 * [ WFC: No < in Attribute Values ]
6230 * The replacement text of any entity referred to directly or
6231 * indirectly in an attribute value (other than "&lt;") must
6232 * not contain a <.
6233 */
6234 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6235 (ent != NULL) &&
6236 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6237 (ent->content != NULL) &&
6238 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006239 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006240 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006241 }
6242
6243 /*
6244 * Internal check, no parameter entities here ...
6245 */
6246 else {
6247 switch (ent->etype) {
6248 case XML_INTERNAL_PARAMETER_ENTITY:
6249 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006250 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6251 "Attempt to reference the parameter entity '%s'\n",
6252 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006253 break;
6254 default:
6255 break;
6256 }
6257 }
6258
6259 /*
6260 * [ WFC: No Recursion ]
6261 * A parsed entity must not contain a recursive reference
6262 * to itself, either directly or indirectly.
6263 * Done somewhere else
6264 */
6265
6266 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006267 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 }
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270 }
6271 return(ent);
6272}
6273
6274/**
6275 * xmlParseStringEntityRef:
6276 * @ctxt: an XML parser context
6277 * @str: a pointer to an index in the string
6278 *
6279 * parse ENTITY references declarations, but this version parses it from
6280 * a string value.
6281 *
6282 * [68] EntityRef ::= '&' Name ';'
6283 *
6284 * [ WFC: Entity Declared ]
6285 * In a document without any DTD, a document with only an internal DTD
6286 * subset which contains no parameter entity references, or a document
6287 * with "standalone='yes'", the Name given in the entity reference
6288 * must match that in an entity declaration, except that well-formed
6289 * documents need not declare any of the following entities: amp, lt,
6290 * gt, apos, quot. The declaration of a parameter entity must precede
6291 * any reference to it. Similarly, the declaration of a general entity
6292 * must precede any reference to it which appears in a default value in an
6293 * attribute-list declaration. Note that if entities are declared in the
6294 * external subset or in external parameter entities, a non-validating
6295 * processor is not obligated to read and process their declarations;
6296 * for such documents, the rule that an entity must be declared is a
6297 * well-formedness constraint only if standalone='yes'.
6298 *
6299 * [ WFC: Parsed Entity ]
6300 * An entity reference must not contain the name of an unparsed entity
6301 *
6302 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6303 * is updated to the current location in the string.
6304 */
6305xmlEntityPtr
6306xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6307 xmlChar *name;
6308 const xmlChar *ptr;
6309 xmlChar cur;
6310 xmlEntityPtr ent = NULL;
6311
6312 if ((str == NULL) || (*str == NULL))
6313 return(NULL);
6314 ptr = *str;
6315 cur = *ptr;
6316 if (cur == '&') {
6317 ptr++;
6318 cur = *ptr;
6319 name = xmlParseStringName(ctxt, &ptr);
6320 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006321 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6322 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006323 } else {
6324 if (*ptr == ';') {
6325 ptr++;
6326 /*
6327 * Ask first SAX for entity resolution, otherwise try the
6328 * predefined set.
6329 */
6330 if (ctxt->sax != NULL) {
6331 if (ctxt->sax->getEntity != NULL)
6332 ent = ctxt->sax->getEntity(ctxt->userData, name);
6333 if (ent == NULL)
6334 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006335 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006336 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006337 }
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
6339 /*
6340 * [ WFC: Entity Declared ]
6341 * In a document without any DTD, a document with only an
6342 * internal DTD subset which contains no parameter entity
6343 * references, or a document with "standalone='yes'", the
6344 * Name given in the entity reference must match that in an
6345 * entity declaration, except that well-formed documents
6346 * need not declare any of the following entities: amp, lt,
6347 * gt, apos, quot.
6348 * The declaration of a parameter entity must precede any
6349 * reference to it.
6350 * Similarly, the declaration of a general entity must
6351 * precede any reference to it which appears in a default
6352 * value in an attribute-list declaration. Note that if
6353 * entities are declared in the external subset or in
6354 * external parameter entities, a non-validating processor
6355 * is not obligated to read and process their declarations;
6356 * for such documents, the rule that an entity must be
6357 * declared is a well-formedness constraint only if
6358 * standalone='yes'.
6359 */
6360 if (ent == NULL) {
6361 if ((ctxt->standalone == 1) ||
6362 ((ctxt->hasExternalSubset == 0) &&
6363 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006364 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006365 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006367 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006368 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006369 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006371 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006372 }
6373
6374 /*
6375 * [ WFC: Parsed Entity ]
6376 * An entity reference must not contain the name of an
6377 * unparsed entity
6378 */
6379 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006380 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006381 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 }
6383
6384 /*
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect
6387 * entity references to external entities.
6388 */
6389 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6390 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006391 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006392 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 /*
6395 * [ WFC: No < in Attribute Values ]
6396 * The replacement text of any entity referred to directly or
6397 * indirectly in an attribute value (other than "&lt;") must
6398 * not contain a <.
6399 */
6400 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6401 (ent != NULL) &&
6402 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6403 (ent->content != NULL) &&
6404 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006405 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6406 "'<' in entity '%s' is not allowed in attributes values\n",
6407 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006408 }
6409
6410 /*
6411 * Internal check, no parameter entities here ...
6412 */
6413 else {
6414 switch (ent->etype) {
6415 case XML_INTERNAL_PARAMETER_ENTITY:
6416 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6418 "Attempt to reference the parameter entity '%s'\n",
6419 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 break;
6421 default:
6422 break;
6423 }
6424 }
6425
6426 /*
6427 * [ WFC: No Recursion ]
6428 * A parsed entity must not contain a recursive reference
6429 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006430 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006431 */
6432
6433 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006434 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006435 }
6436 xmlFree(name);
6437 }
6438 }
6439 *str = ptr;
6440 return(ent);
6441}
6442
6443/**
6444 * xmlParsePEReference:
6445 * @ctxt: an XML parser context
6446 *
6447 * parse PEReference declarations
6448 * The entity content is handled directly by pushing it's content as
6449 * a new input stream.
6450 *
6451 * [69] PEReference ::= '%' Name ';'
6452 *
6453 * [ WFC: No Recursion ]
6454 * A parsed entity must not contain a recursive
6455 * reference to itself, either directly or indirectly.
6456 *
6457 * [ WFC: Entity Declared ]
6458 * In a document without any DTD, a document with only an internal DTD
6459 * subset which contains no parameter entity references, or a document
6460 * with "standalone='yes'", ... ... The declaration of a parameter
6461 * entity must precede any reference to it...
6462 *
6463 * [ VC: Entity Declared ]
6464 * In a document with an external subset or external parameter entities
6465 * with "standalone='no'", ... ... The declaration of a parameter entity
6466 * must precede any reference to it...
6467 *
6468 * [ WFC: In DTD ]
6469 * Parameter-entity references may only appear in the DTD.
6470 * NOTE: misleading but this is handled.
6471 */
6472void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006473xmlParsePEReference(xmlParserCtxtPtr ctxt)
6474{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006475 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 xmlEntityPtr entity = NULL;
6477 xmlParserInputPtr input;
6478
6479 if (RAW == '%') {
6480 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006481 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006482 if (name == NULL) {
6483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6484 "xmlParsePEReference: no name\n");
6485 } else {
6486 if (RAW == ';') {
6487 NEXT;
6488 if ((ctxt->sax != NULL) &&
6489 (ctxt->sax->getParameterEntity != NULL))
6490 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6491 name);
6492 if (entity == NULL) {
6493 /*
6494 * [ WFC: Entity Declared ]
6495 * In a document without any DTD, a document with only an
6496 * internal DTD subset which contains no parameter entity
6497 * references, or a document with "standalone='yes'", ...
6498 * ... The declaration of a parameter entity must precede
6499 * any reference to it...
6500 */
6501 if ((ctxt->standalone == 1) ||
6502 ((ctxt->hasExternalSubset == 0) &&
6503 (ctxt->hasPErefs == 0))) {
6504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6505 "PEReference: %%%s; not found\n",
6506 name);
6507 } else {
6508 /*
6509 * [ VC: Entity Declared ]
6510 * In a document with an external subset or external
6511 * parameter entities with "standalone='no'", ...
6512 * ... The declaration of a parameter entity must
6513 * precede any reference to it...
6514 */
6515 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6516 "PEReference: %%%s; not found\n",
6517 name, NULL);
6518 ctxt->valid = 0;
6519 }
6520 } else {
6521 /*
6522 * Internal checking in case the entity quest barfed
6523 */
6524 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6525 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6526 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6527 "Internal: %%%s; is not a parameter entity\n",
6528 name, NULL);
6529 } else if (ctxt->input->free != deallocblankswrapper) {
6530 input =
6531 xmlNewBlanksWrapperInputStream(ctxt, entity);
6532 xmlPushInput(ctxt, input);
6533 } else {
6534 /*
6535 * TODO !!!
6536 * handle the extra spaces added before and after
6537 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6538 */
6539 input = xmlNewEntityInputStream(ctxt, entity);
6540 xmlPushInput(ctxt, input);
6541 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006542 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006543 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006544 xmlParseTextDecl(ctxt);
6545 if (ctxt->errNo ==
6546 XML_ERR_UNSUPPORTED_ENCODING) {
6547 /*
6548 * The XML REC instructs us to stop parsing
6549 * right here
6550 */
6551 ctxt->instate = XML_PARSER_EOF;
6552 return;
6553 }
6554 }
6555 }
6556 }
6557 ctxt->hasPErefs = 1;
6558 } else {
6559 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6560 }
6561 }
Owen Taylor3473f882001-02-23 17:55:21 +00006562 }
6563}
6564
6565/**
6566 * xmlParseStringPEReference:
6567 * @ctxt: an XML parser context
6568 * @str: a pointer to an index in the string
6569 *
6570 * parse PEReference declarations
6571 *
6572 * [69] PEReference ::= '%' Name ';'
6573 *
6574 * [ WFC: No Recursion ]
6575 * A parsed entity must not contain a recursive
6576 * reference to itself, either directly or indirectly.
6577 *
6578 * [ WFC: Entity Declared ]
6579 * In a document without any DTD, a document with only an internal DTD
6580 * subset which contains no parameter entity references, or a document
6581 * with "standalone='yes'", ... ... The declaration of a parameter
6582 * entity must precede any reference to it...
6583 *
6584 * [ VC: Entity Declared ]
6585 * In a document with an external subset or external parameter entities
6586 * with "standalone='no'", ... ... The declaration of a parameter entity
6587 * must precede any reference to it...
6588 *
6589 * [ WFC: In DTD ]
6590 * Parameter-entity references may only appear in the DTD.
6591 * NOTE: misleading but this is handled.
6592 *
6593 * Returns the string of the entity content.
6594 * str is updated to the current value of the index
6595 */
6596xmlEntityPtr
6597xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6598 const xmlChar *ptr;
6599 xmlChar cur;
6600 xmlChar *name;
6601 xmlEntityPtr entity = NULL;
6602
6603 if ((str == NULL) || (*str == NULL)) return(NULL);
6604 ptr = *str;
6605 cur = *ptr;
6606 if (cur == '%') {
6607 ptr++;
6608 cur = *ptr;
6609 name = xmlParseStringName(ctxt, &ptr);
6610 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006613 } else {
6614 cur = *ptr;
6615 if (cur == ';') {
6616 ptr++;
6617 cur = *ptr;
6618 if ((ctxt->sax != NULL) &&
6619 (ctxt->sax->getParameterEntity != NULL))
6620 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6621 name);
6622 if (entity == NULL) {
6623 /*
6624 * [ WFC: Entity Declared ]
6625 * In a document without any DTD, a document with only an
6626 * internal DTD subset which contains no parameter entity
6627 * references, or a document with "standalone='yes'", ...
6628 * ... The declaration of a parameter entity must precede
6629 * any reference to it...
6630 */
6631 if ((ctxt->standalone == 1) ||
6632 ((ctxt->hasExternalSubset == 0) &&
6633 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006634 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006635 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 } else {
6637 /*
6638 * [ VC: Entity Declared ]
6639 * In a document with an external subset or external
6640 * parameter entities with "standalone='no'", ...
6641 * ... The declaration of a parameter entity must
6642 * precede any reference to it...
6643 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006644 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6645 "PEReference: %%%s; not found\n",
6646 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 ctxt->valid = 0;
6648 }
6649 } else {
6650 /*
6651 * Internal checking in case the entity quest barfed
6652 */
6653 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6654 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006655 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6656 "%%%s; is not a parameter entity\n",
6657 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659 }
6660 ctxt->hasPErefs = 1;
6661 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006662 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664 xmlFree(name);
6665 }
6666 }
6667 *str = ptr;
6668 return(entity);
6669}
6670
6671/**
6672 * xmlParseDocTypeDecl:
6673 * @ctxt: an XML parser context
6674 *
6675 * parse a DOCTYPE declaration
6676 *
6677 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6678 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6679 *
6680 * [ VC: Root Element Type ]
6681 * The Name in the document type declaration must match the element
6682 * type of the root element.
6683 */
6684
6685void
6686xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006687 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006688 xmlChar *ExternalID = NULL;
6689 xmlChar *URI = NULL;
6690
6691 /*
6692 * We know that '<!DOCTYPE' has been detected.
6693 */
6694 SKIP(9);
6695
6696 SKIP_BLANKS;
6697
6698 /*
6699 * Parse the DOCTYPE name.
6700 */
6701 name = xmlParseName(ctxt);
6702 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006703 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6704 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006705 }
6706 ctxt->intSubName = name;
6707
6708 SKIP_BLANKS;
6709
6710 /*
6711 * Check for SystemID and ExternalID
6712 */
6713 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6714
6715 if ((URI != NULL) || (ExternalID != NULL)) {
6716 ctxt->hasExternalSubset = 1;
6717 }
6718 ctxt->extSubURI = URI;
6719 ctxt->extSubSystem = ExternalID;
6720
6721 SKIP_BLANKS;
6722
6723 /*
6724 * Create and update the internal subset.
6725 */
6726 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6727 (!ctxt->disableSAX))
6728 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6729
6730 /*
6731 * Is there any internal subset declarations ?
6732 * they are handled separately in xmlParseInternalSubset()
6733 */
6734 if (RAW == '[')
6735 return;
6736
6737 /*
6738 * We should be at the end of the DOCTYPE declaration.
6739 */
6740 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006741 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006742 }
6743 NEXT;
6744}
6745
6746/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006747 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006748 * @ctxt: an XML parser context
6749 *
6750 * parse the internal subset declaration
6751 *
6752 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6753 */
6754
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006755static void
Owen Taylor3473f882001-02-23 17:55:21 +00006756xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6757 /*
6758 * Is there any DTD definition ?
6759 */
6760 if (RAW == '[') {
6761 ctxt->instate = XML_PARSER_DTD;
6762 NEXT;
6763 /*
6764 * Parse the succession of Markup declarations and
6765 * PEReferences.
6766 * Subsequence (markupdecl | PEReference | S)*
6767 */
6768 while (RAW != ']') {
6769 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006770 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006771
6772 SKIP_BLANKS;
6773 xmlParseMarkupDecl(ctxt);
6774 xmlParsePEReference(ctxt);
6775
6776 /*
6777 * Pop-up of finished entities.
6778 */
6779 while ((RAW == 0) && (ctxt->inputNr > 1))
6780 xmlPopInput(ctxt);
6781
6782 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006783 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006784 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006785 break;
6786 }
6787 }
6788 if (RAW == ']') {
6789 NEXT;
6790 SKIP_BLANKS;
6791 }
6792 }
6793
6794 /*
6795 * We should be at the end of the DOCTYPE declaration.
6796 */
6797 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006798 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006799 }
6800 NEXT;
6801}
6802
Daniel Veillard81273902003-09-30 00:43:48 +00006803#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006804/**
6805 * xmlParseAttribute:
6806 * @ctxt: an XML parser context
6807 * @value: a xmlChar ** used to store the value of the attribute
6808 *
6809 * parse an attribute
6810 *
6811 * [41] Attribute ::= Name Eq AttValue
6812 *
6813 * [ WFC: No External Entity References ]
6814 * Attribute values cannot contain direct or indirect entity references
6815 * to external entities.
6816 *
6817 * [ WFC: No < in Attribute Values ]
6818 * The replacement text of any entity referred to directly or indirectly in
6819 * an attribute value (other than "&lt;") must not contain a <.
6820 *
6821 * [ VC: Attribute Value Type ]
6822 * The attribute must have been declared; the value must be of the type
6823 * declared for it.
6824 *
6825 * [25] Eq ::= S? '=' S?
6826 *
6827 * With namespace:
6828 *
6829 * [NS 11] Attribute ::= QName Eq AttValue
6830 *
6831 * Also the case QName == xmlns:??? is handled independently as a namespace
6832 * definition.
6833 *
6834 * Returns the attribute name, and the value in *value.
6835 */
6836
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006837const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006838xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006839 const xmlChar *name;
6840 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006841
6842 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006843 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006844 name = xmlParseName(ctxt);
6845 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006846 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006847 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006848 return(NULL);
6849 }
6850
6851 /*
6852 * read the value
6853 */
6854 SKIP_BLANKS;
6855 if (RAW == '=') {
6856 NEXT;
6857 SKIP_BLANKS;
6858 val = xmlParseAttValue(ctxt);
6859 ctxt->instate = XML_PARSER_CONTENT;
6860 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006861 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006862 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006863 return(NULL);
6864 }
6865
6866 /*
6867 * Check that xml:lang conforms to the specification
6868 * No more registered as an error, just generate a warning now
6869 * since this was deprecated in XML second edition
6870 */
6871 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6872 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006873 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6874 "Malformed value for xml:lang : %s\n",
6875 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006876 }
6877 }
6878
6879 /*
6880 * Check that xml:space conforms to the specification
6881 */
6882 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6883 if (xmlStrEqual(val, BAD_CAST "default"))
6884 *(ctxt->space) = 0;
6885 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6886 *(ctxt->space) = 1;
6887 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006888 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006889"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006890 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006891 }
6892 }
6893
6894 *value = val;
6895 return(name);
6896}
6897
6898/**
6899 * xmlParseStartTag:
6900 * @ctxt: an XML parser context
6901 *
6902 * parse a start of tag either for rule element or
6903 * EmptyElement. In both case we don't parse the tag closing chars.
6904 *
6905 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6906 *
6907 * [ WFC: Unique Att Spec ]
6908 * No attribute name may appear more than once in the same start-tag or
6909 * empty-element tag.
6910 *
6911 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6912 *
6913 * [ WFC: Unique Att Spec ]
6914 * No attribute name may appear more than once in the same start-tag or
6915 * empty-element tag.
6916 *
6917 * With namespace:
6918 *
6919 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6920 *
6921 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6922 *
6923 * Returns the element name parsed
6924 */
6925
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006926const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006927xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006928 const xmlChar *name;
6929 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006930 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006931 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006932 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006933 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006934 int i;
6935
6936 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006937 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006938
6939 name = xmlParseName(ctxt);
6940 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006941 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006942 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006943 return(NULL);
6944 }
6945
6946 /*
6947 * Now parse the attributes, it ends up with the ending
6948 *
6949 * (S Attribute)* S?
6950 */
6951 SKIP_BLANKS;
6952 GROW;
6953
Daniel Veillard21a0f912001-02-25 19:54:14 +00006954 while ((RAW != '>') &&
6955 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006956 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006957 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006958 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006959
6960 attname = xmlParseAttribute(ctxt, &attvalue);
6961 if ((attname != NULL) && (attvalue != NULL)) {
6962 /*
6963 * [ WFC: Unique Att Spec ]
6964 * No attribute name may appear more than once in the same
6965 * start-tag or empty-element tag.
6966 */
6967 for (i = 0; i < nbatts;i += 2) {
6968 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006969 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006970 xmlFree(attvalue);
6971 goto failed;
6972 }
6973 }
Owen Taylor3473f882001-02-23 17:55:21 +00006974 /*
6975 * Add the pair to atts
6976 */
6977 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006978 maxatts = 22; /* allow for 10 attrs by default */
6979 atts = (const xmlChar **)
6980 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006981 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006982 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006983 if (attvalue != NULL)
6984 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006985 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006986 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006987 ctxt->atts = atts;
6988 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006989 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006990 const xmlChar **n;
6991
Owen Taylor3473f882001-02-23 17:55:21 +00006992 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006993 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006994 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006995 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006996 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006997 if (attvalue != NULL)
6998 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006999 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007000 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007001 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007002 ctxt->atts = atts;
7003 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007004 }
7005 atts[nbatts++] = attname;
7006 atts[nbatts++] = attvalue;
7007 atts[nbatts] = NULL;
7008 atts[nbatts + 1] = NULL;
7009 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (attvalue != NULL)
7011 xmlFree(attvalue);
7012 }
7013
7014failed:
7015
Daniel Veillard3772de32002-12-17 10:31:45 +00007016 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007017 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7018 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007019 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7021 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007022 }
7023 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007024 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7025 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007026 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7027 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007028 break;
7029 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007030 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 GROW;
7032 }
7033
7034 /*
7035 * SAX: Start of Element !
7036 */
7037 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007038 (!ctxt->disableSAX)) {
7039 if (nbatts > 0)
7040 ctxt->sax->startElement(ctxt->userData, name, atts);
7041 else
7042 ctxt->sax->startElement(ctxt->userData, name, NULL);
7043 }
Owen Taylor3473f882001-02-23 17:55:21 +00007044
7045 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007046 /* Free only the content strings */
7047 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007048 if (atts[i] != NULL)
7049 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 }
7051 return(name);
7052}
7053
7054/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007055 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007056 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007057 * @line: line of the start tag
7058 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007059 *
7060 * parse an end of tag
7061 *
7062 * [42] ETag ::= '</' Name S? '>'
7063 *
7064 * With namespace
7065 *
7066 * [NS 9] ETag ::= '</' QName S? '>'
7067 */
7068
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007069static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007070xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007071 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007072
7073 GROW;
7074 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007075 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007076 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007077 return;
7078 }
7079 SKIP(2);
7080
Daniel Veillard46de64e2002-05-29 08:21:33 +00007081 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007082
7083 /*
7084 * We should definitely be at the ending "S? '>'" part
7085 */
7086 GROW;
7087 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007088 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007089 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007090 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007091 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007092
7093 /*
7094 * [ WFC: Element Type Match ]
7095 * The Name in an element's end-tag must match the element type in the
7096 * start-tag.
7097 *
7098 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007099 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007100 if (name == NULL) name = BAD_CAST "unparseable";
7101 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007102 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007103 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 }
7105
7106 /*
7107 * SAX: End of Tag
7108 */
7109 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7110 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007111 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007112
Daniel Veillarde57ec792003-09-10 10:50:59 +00007113 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007114 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007115 return;
7116}
7117
7118/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007119 * xmlParseEndTag:
7120 * @ctxt: an XML parser context
7121 *
7122 * parse an end of tag
7123 *
7124 * [42] ETag ::= '</' Name S? '>'
7125 *
7126 * With namespace
7127 *
7128 * [NS 9] ETag ::= '</' QName S? '>'
7129 */
7130
7131void
7132xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007133 xmlParseEndTag1(ctxt, 0);
7134}
Daniel Veillard81273902003-09-30 00:43:48 +00007135#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007136
7137/************************************************************************
7138 * *
7139 * SAX 2 specific operations *
7140 * *
7141 ************************************************************************/
7142
7143static const xmlChar *
7144xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7145 int len = 0, l;
7146 int c;
7147 int count = 0;
7148
7149 /*
7150 * Handler for more complex cases
7151 */
7152 GROW;
7153 c = CUR_CHAR(l);
7154 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007155 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007156 return(NULL);
7157 }
7158
7159 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007160 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007161 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007162 (IS_COMBINING(c)) ||
7163 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007164 if (count++ > 100) {
7165 count = 0;
7166 GROW;
7167 }
7168 len += l;
7169 NEXTL(l);
7170 c = CUR_CHAR(l);
7171 }
7172 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7173}
7174
7175/*
7176 * xmlGetNamespace:
7177 * @ctxt: an XML parser context
7178 * @prefix: the prefix to lookup
7179 *
7180 * Lookup the namespace name for the @prefix (which ca be NULL)
7181 * The prefix must come from the @ctxt->dict dictionnary
7182 *
7183 * Returns the namespace name or NULL if not bound
7184 */
7185static const xmlChar *
7186xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7187 int i;
7188
Daniel Veillarde57ec792003-09-10 10:50:59 +00007189 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007191 if (ctxt->nsTab[i] == prefix) {
7192 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7193 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007195 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007196 return(NULL);
7197}
7198
7199/**
7200 * xmlParseNCName:
7201 * @ctxt: an XML parser context
7202 *
7203 * parse an XML name.
7204 *
7205 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7206 * CombiningChar | Extender
7207 *
7208 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7209 *
7210 * Returns the Name parsed or NULL
7211 */
7212
7213static const xmlChar *
7214xmlParseNCName(xmlParserCtxtPtr ctxt) {
7215 const xmlChar *in;
7216 const xmlChar *ret;
7217 int count = 0;
7218
7219 /*
7220 * Accelerator for simple ASCII names
7221 */
7222 in = ctxt->input->cur;
7223 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7224 ((*in >= 0x41) && (*in <= 0x5A)) ||
7225 (*in == '_')) {
7226 in++;
7227 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7228 ((*in >= 0x41) && (*in <= 0x5A)) ||
7229 ((*in >= 0x30) && (*in <= 0x39)) ||
7230 (*in == '_') || (*in == '-') ||
7231 (*in == '.'))
7232 in++;
7233 if ((*in > 0) && (*in < 0x80)) {
7234 count = in - ctxt->input->cur;
7235 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7236 ctxt->input->cur = in;
7237 ctxt->nbChars += count;
7238 ctxt->input->col += count;
7239 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007240 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 }
7242 return(ret);
7243 }
7244 }
7245 return(xmlParseNCNameComplex(ctxt));
7246}
7247
7248/**
7249 * xmlParseQName:
7250 * @ctxt: an XML parser context
7251 * @prefix: pointer to store the prefix part
7252 *
7253 * parse an XML Namespace QName
7254 *
7255 * [6] QName ::= (Prefix ':')? LocalPart
7256 * [7] Prefix ::= NCName
7257 * [8] LocalPart ::= NCName
7258 *
7259 * Returns the Name parsed or NULL
7260 */
7261
7262static const xmlChar *
7263xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7264 const xmlChar *l, *p;
7265
7266 GROW;
7267
7268 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007269 if (l == NULL) {
7270 if (CUR == ':') {
7271 l = xmlParseName(ctxt);
7272 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007273 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7274 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007275 *prefix = NULL;
7276 return(l);
7277 }
7278 }
7279 return(NULL);
7280 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007281 if (CUR == ':') {
7282 NEXT;
7283 p = l;
7284 l = xmlParseNCName(ctxt);
7285 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007286 xmlChar *tmp;
7287
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007288 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7289 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007290 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7291 p = xmlDictLookup(ctxt->dict, tmp, -1);
7292 if (tmp != NULL) xmlFree(tmp);
7293 *prefix = NULL;
7294 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007295 }
7296 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007297 xmlChar *tmp;
7298
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007299 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7300 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007301 NEXT;
7302 tmp = (xmlChar *) xmlParseName(ctxt);
7303 if (tmp != NULL) {
7304 tmp = xmlBuildQName(tmp, l, NULL, 0);
7305 l = xmlDictLookup(ctxt->dict, tmp, -1);
7306 if (tmp != NULL) xmlFree(tmp);
7307 *prefix = p;
7308 return(l);
7309 }
7310 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7311 l = xmlDictLookup(ctxt->dict, tmp, -1);
7312 if (tmp != NULL) xmlFree(tmp);
7313 *prefix = p;
7314 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 }
7316 *prefix = p;
7317 } else
7318 *prefix = NULL;
7319 return(l);
7320}
7321
7322/**
7323 * xmlParseQNameAndCompare:
7324 * @ctxt: an XML parser context
7325 * @name: the localname
7326 * @prefix: the prefix, if any.
7327 *
7328 * parse an XML name and compares for match
7329 * (specialized for endtag parsing)
7330 *
7331 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7332 * and the name for mismatch
7333 */
7334
7335static const xmlChar *
7336xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7337 xmlChar const *prefix) {
7338 const xmlChar *cmp = name;
7339 const xmlChar *in;
7340 const xmlChar *ret;
7341 const xmlChar *prefix2;
7342
7343 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7344
7345 GROW;
7346 in = ctxt->input->cur;
7347
7348 cmp = prefix;
7349 while (*in != 0 && *in == *cmp) {
7350 ++in;
7351 ++cmp;
7352 }
7353 if ((*cmp == 0) && (*in == ':')) {
7354 in++;
7355 cmp = name;
7356 while (*in != 0 && *in == *cmp) {
7357 ++in;
7358 ++cmp;
7359 }
William M. Brack76e95df2003-10-18 16:20:14 +00007360 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 /* success */
7362 ctxt->input->cur = in;
7363 return((const xmlChar*) 1);
7364 }
7365 }
7366 /*
7367 * all strings coms from the dictionary, equality can be done directly
7368 */
7369 ret = xmlParseQName (ctxt, &prefix2);
7370 if ((ret == name) && (prefix == prefix2))
7371 return((const xmlChar*) 1);
7372 return ret;
7373}
7374
7375/**
7376 * xmlParseAttValueInternal:
7377 * @ctxt: an XML parser context
7378 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007379 * @alloc: whether the attribute was reallocated as a new string
7380 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 *
7382 * parse a value for an attribute.
7383 * NOTE: if no normalization is needed, the routine will return pointers
7384 * directly from the data buffer.
7385 *
7386 * 3.3.3 Attribute-Value Normalization:
7387 * Before the value of an attribute is passed to the application or
7388 * checked for validity, the XML processor must normalize it as follows:
7389 * - a character reference is processed by appending the referenced
7390 * character to the attribute value
7391 * - an entity reference is processed by recursively processing the
7392 * replacement text of the entity
7393 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7394 * appending #x20 to the normalized value, except that only a single
7395 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7396 * parsed entity or the literal entity value of an internal parsed entity
7397 * - other characters are processed by appending them to the normalized value
7398 * If the declared value is not CDATA, then the XML processor must further
7399 * process the normalized attribute value by discarding any leading and
7400 * trailing space (#x20) characters, and by replacing sequences of space
7401 * (#x20) characters by a single space (#x20) character.
7402 * All attributes for which no declaration has been read should be treated
7403 * by a non-validating parser as if declared CDATA.
7404 *
7405 * Returns the AttValue parsed or NULL. The value has to be freed by the
7406 * caller if it was copied, this can be detected by val[*len] == 0.
7407 */
7408
7409static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007410xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7411 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007412{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007413 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007414 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007415 xmlChar *ret = NULL;
7416
7417 GROW;
7418 in = (xmlChar *) CUR_PTR;
7419 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007420 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007421 return (NULL);
7422 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007424
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007425 /*
7426 * try to handle in this routine the most common case where no
7427 * allocation of a new string is required and where content is
7428 * pure ASCII.
7429 */
7430 limit = *in++;
7431 end = ctxt->input->end;
7432 start = in;
7433 if (in >= end) {
7434 const xmlChar *oldbase = ctxt->input->base;
7435 GROW;
7436 if (oldbase != ctxt->input->base) {
7437 long delta = ctxt->input->base - oldbase;
7438 start = start + delta;
7439 in = in + delta;
7440 }
7441 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007443 if (normalize) {
7444 /*
7445 * Skip any leading spaces
7446 */
7447 while ((in < end) && (*in != limit) &&
7448 ((*in == 0x20) || (*in == 0x9) ||
7449 (*in == 0xA) || (*in == 0xD))) {
7450 in++;
7451 start = in;
7452 if (in >= end) {
7453 const xmlChar *oldbase = ctxt->input->base;
7454 GROW;
7455 if (oldbase != ctxt->input->base) {
7456 long delta = ctxt->input->base - oldbase;
7457 start = start + delta;
7458 in = in + delta;
7459 }
7460 end = ctxt->input->end;
7461 }
7462 }
7463 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7464 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7465 if ((*in++ == 0x20) && (*in == 0x20)) break;
7466 if (in >= end) {
7467 const xmlChar *oldbase = ctxt->input->base;
7468 GROW;
7469 if (oldbase != ctxt->input->base) {
7470 long delta = ctxt->input->base - oldbase;
7471 start = start + delta;
7472 in = in + delta;
7473 }
7474 end = ctxt->input->end;
7475 }
7476 }
7477 last = in;
7478 /*
7479 * skip the trailing blanks
7480 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007481 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007482 while ((in < end) && (*in != limit) &&
7483 ((*in == 0x20) || (*in == 0x9) ||
7484 (*in == 0xA) || (*in == 0xD))) {
7485 in++;
7486 if (in >= end) {
7487 const xmlChar *oldbase = ctxt->input->base;
7488 GROW;
7489 if (oldbase != ctxt->input->base) {
7490 long delta = ctxt->input->base - oldbase;
7491 start = start + delta;
7492 in = in + delta;
7493 last = last + delta;
7494 }
7495 end = ctxt->input->end;
7496 }
7497 }
7498 if (*in != limit) goto need_complex;
7499 } else {
7500 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7501 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7502 in++;
7503 if (in >= end) {
7504 const xmlChar *oldbase = ctxt->input->base;
7505 GROW;
7506 if (oldbase != ctxt->input->base) {
7507 long delta = ctxt->input->base - oldbase;
7508 start = start + delta;
7509 in = in + delta;
7510 }
7511 end = ctxt->input->end;
7512 }
7513 }
7514 last = in;
7515 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007517 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007519 *len = last - start;
7520 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007522 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007523 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 }
7525 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007526 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007527 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007528need_complex:
7529 if (alloc) *alloc = 1;
7530 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531}
7532
7533/**
7534 * xmlParseAttribute2:
7535 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007536 * @pref: the element prefix
7537 * @elem: the element name
7538 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007539 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007540 * @len: an int * to save the length of the attribute
7541 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 *
7543 * parse an attribute in the new SAX2 framework.
7544 *
7545 * Returns the attribute name, and the value in *value, .
7546 */
7547
7548static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007549xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7550 const xmlChar *pref, const xmlChar *elem,
7551 const xmlChar **prefix, xmlChar **value,
7552 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007553 const xmlChar *name;
7554 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007555 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007556
7557 *value = NULL;
7558 GROW;
7559 name = xmlParseQName(ctxt, prefix);
7560 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007561 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7562 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007563 return(NULL);
7564 }
7565
7566 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007567 * get the type if needed
7568 */
7569 if (ctxt->attsSpecial != NULL) {
7570 int type;
7571
7572 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7573 pref, elem, *prefix, name);
7574 if (type != 0) normalize = 1;
7575 }
7576
7577 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007578 * read the value
7579 */
7580 SKIP_BLANKS;
7581 if (RAW == '=') {
7582 NEXT;
7583 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007584 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007585 ctxt->instate = XML_PARSER_CONTENT;
7586 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007587 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007589 return(NULL);
7590 }
7591
7592 /*
7593 * Check that xml:lang conforms to the specification
7594 * No more registered as an error, just generate a warning now
7595 * since this was deprecated in XML second edition
7596 */
7597 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7598 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007599 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7600 "Malformed value for xml:lang : %s\n",
7601 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007602 }
7603 }
7604
7605 /*
7606 * Check that xml:space conforms to the specification
7607 */
7608 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7609 if (xmlStrEqual(val, BAD_CAST "default"))
7610 *(ctxt->space) = 0;
7611 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7612 *(ctxt->space) = 1;
7613 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007614 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007615"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7616 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 }
7618 }
7619
7620 *value = val;
7621 return(name);
7622}
7623
7624/**
7625 * xmlParseStartTag2:
7626 * @ctxt: an XML parser context
7627 *
7628 * parse a start of tag either for rule element or
7629 * EmptyElement. In both case we don't parse the tag closing chars.
7630 * This routine is called when running SAX2 parsing
7631 *
7632 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7633 *
7634 * [ WFC: Unique Att Spec ]
7635 * No attribute name may appear more than once in the same start-tag or
7636 * empty-element tag.
7637 *
7638 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7639 *
7640 * [ WFC: Unique Att Spec ]
7641 * No attribute name may appear more than once in the same start-tag or
7642 * empty-element tag.
7643 *
7644 * With namespace:
7645 *
7646 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7647 *
7648 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7649 *
7650 * Returns the element name parsed
7651 */
7652
7653static const xmlChar *
7654xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7655 const xmlChar **URI) {
7656 const xmlChar *localname;
7657 const xmlChar *prefix;
7658 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007659 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007660 const xmlChar *nsname;
7661 xmlChar *attvalue;
7662 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007664 int nratts, nbatts, nbdef;
7665 int i, j, nbNs, attval;
7666 const xmlChar *base;
7667 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668
7669 if (RAW != '<') return(NULL);
7670 NEXT1;
7671
7672 /*
7673 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7674 * point since the attribute values may be stored as pointers to
7675 * the buffer and calling SHRINK would destroy them !
7676 * The Shrinking is only possible once the full set of attribute
7677 * callbacks have been done.
7678 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007679reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007680 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007681 base = ctxt->input->base;
7682 cur = ctxt->input->cur - ctxt->input->base;
7683 nbatts = 0;
7684 nratts = 0;
7685 nbdef = 0;
7686 nbNs = 0;
7687 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688
7689 localname = xmlParseQName(ctxt, &prefix);
7690 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007691 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7692 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 return(NULL);
7694 }
7695
7696 /*
7697 * Now parse the attributes, it ends up with the ending
7698 *
7699 * (S Attribute)* S?
7700 */
7701 SKIP_BLANKS;
7702 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007703 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704
7705 while ((RAW != '>') &&
7706 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007707 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 const xmlChar *q = CUR_PTR;
7709 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007710 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007711
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007712 attname = xmlParseAttribute2(ctxt, prefix, localname,
7713 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 if ((attname != NULL) && (attvalue != NULL)) {
7715 if (len < 0) len = xmlStrlen(attvalue);
7716 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007717 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7718 xmlURIPtr uri;
7719
7720 if (*URL != 0) {
7721 uri = xmlParseURI((const char *) URL);
7722 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007723 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7724 "xmlns: %s not a valid URI\n",
7725 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007726 } else {
7727 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007728 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7729 "xmlns: URI %s is not absolute\n",
7730 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007731 }
7732 xmlFreeURI(uri);
7733 }
7734 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007736 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007738 for (j = 1;j <= nbNs;j++)
7739 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7740 break;
7741 if (j <= nbNs)
7742 xmlErrAttributeDup(ctxt, NULL, attname);
7743 else
7744 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007745 if (alloc != 0) xmlFree(attvalue);
7746 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007747 continue;
7748 }
7749 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007750 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7751 xmlURIPtr uri;
7752
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007753 if (attname == ctxt->str_xml) {
7754 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007755 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7756 "xml namespace prefix mapped to wrong URI\n",
7757 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007758 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007759 /*
7760 * Do not keep a namespace definition node
7761 */
7762 if (alloc != 0) xmlFree(attvalue);
7763 SKIP_BLANKS;
7764 continue;
7765 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 uri = xmlParseURI((const char *) URL);
7767 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007768 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7769 "xmlns:%s: '%s' is not a valid URI\n",
7770 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007771 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007772 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007773 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7774 "xmlns:%s: URI %s is not absolute\n",
7775 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007776 }
7777 xmlFreeURI(uri);
7778 }
7779
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007781 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007783 for (j = 1;j <= nbNs;j++)
7784 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7785 break;
7786 if (j <= nbNs)
7787 xmlErrAttributeDup(ctxt, aprefix, attname);
7788 else
7789 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007790 if (alloc != 0) xmlFree(attvalue);
7791 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007792 continue;
7793 }
7794
7795 /*
7796 * Add the pair to atts
7797 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7799 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800 if (attvalue[len] == 0)
7801 xmlFree(attvalue);
7802 goto failed;
7803 }
7804 maxatts = ctxt->maxatts;
7805 atts = ctxt->atts;
7806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007807 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 atts[nbatts++] = attname;
7809 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007810 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007811 atts[nbatts++] = attvalue;
7812 attvalue += len;
7813 atts[nbatts++] = attvalue;
7814 /*
7815 * tag if some deallocation is needed
7816 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007817 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007818 } else {
7819 if ((attvalue != NULL) && (attvalue[len] == 0))
7820 xmlFree(attvalue);
7821 }
7822
7823failed:
7824
7825 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007826 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007827 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7828 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007829 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007830 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7831 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007832 }
7833 SKIP_BLANKS;
7834 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7835 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007836 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007837 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007838 break;
7839 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007840 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007841 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007842 }
7843
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007845 * The attributes defaulting
7846 */
7847 if (ctxt->attsDefault != NULL) {
7848 xmlDefAttrsPtr defaults;
7849
7850 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7851 if (defaults != NULL) {
7852 for (i = 0;i < defaults->nbAttrs;i++) {
7853 attname = defaults->values[4 * i];
7854 aprefix = defaults->values[4 * i + 1];
7855
7856 /*
7857 * special work for namespaces defaulted defs
7858 */
7859 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7860 /*
7861 * check that it's not a defined namespace
7862 */
7863 for (j = 1;j <= nbNs;j++)
7864 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7865 break;
7866 if (j <= nbNs) continue;
7867
7868 nsname = xmlGetNamespace(ctxt, NULL);
7869 if (nsname != defaults->values[4 * i + 2]) {
7870 if (nsPush(ctxt, NULL,
7871 defaults->values[4 * i + 2]) > 0)
7872 nbNs++;
7873 }
7874 } else if (aprefix == ctxt->str_xmlns) {
7875 /*
7876 * check that it's not a defined namespace
7877 */
7878 for (j = 1;j <= nbNs;j++)
7879 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7880 break;
7881 if (j <= nbNs) continue;
7882
7883 nsname = xmlGetNamespace(ctxt, attname);
7884 if (nsname != defaults->values[2]) {
7885 if (nsPush(ctxt, attname,
7886 defaults->values[4 * i + 2]) > 0)
7887 nbNs++;
7888 }
7889 } else {
7890 /*
7891 * check that it's not a defined attribute
7892 */
7893 for (j = 0;j < nbatts;j+=5) {
7894 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7895 break;
7896 }
7897 if (j < nbatts) continue;
7898
7899 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7900 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007901 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007902 }
7903 maxatts = ctxt->maxatts;
7904 atts = ctxt->atts;
7905 }
7906 atts[nbatts++] = attname;
7907 atts[nbatts++] = aprefix;
7908 if (aprefix == NULL)
7909 atts[nbatts++] = NULL;
7910 else
7911 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7912 atts[nbatts++] = defaults->values[4 * i + 2];
7913 atts[nbatts++] = defaults->values[4 * i + 3];
7914 nbdef++;
7915 }
7916 }
7917 }
7918 }
7919
Daniel Veillarde70c8772003-11-25 07:21:18 +00007920 /*
7921 * The attributes checkings
7922 */
7923 for (i = 0; i < nbatts;i += 5) {
7924 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7925 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7926 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7927 "Namespace prefix %s for %s on %s is not defined\n",
7928 atts[i + 1], atts[i], localname);
7929 }
7930 atts[i + 2] = nsname;
7931 /*
7932 * [ WFC: Unique Att Spec ]
7933 * No attribute name may appear more than once in the same
7934 * start-tag or empty-element tag.
7935 * As extended by the Namespace in XML REC.
7936 */
7937 for (j = 0; j < i;j += 5) {
7938 if (atts[i] == atts[j]) {
7939 if (atts[i+1] == atts[j+1]) {
7940 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7941 break;
7942 }
7943 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7944 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7945 "Namespaced Attribute %s in '%s' redefined\n",
7946 atts[i], nsname, NULL);
7947 break;
7948 }
7949 }
7950 }
7951 }
7952
Daniel Veillarde57ec792003-09-10 10:50:59 +00007953 nsname = xmlGetNamespace(ctxt, prefix);
7954 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007955 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7956 "Namespace prefix %s on %s is not defined\n",
7957 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007958 }
7959 *pref = prefix;
7960 *URI = nsname;
7961
7962 /*
7963 * SAX: Start of Element !
7964 */
7965 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7966 (!ctxt->disableSAX)) {
7967 if (nbNs > 0)
7968 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7969 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7970 nbatts / 5, nbdef, atts);
7971 else
7972 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7973 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7974 }
7975
7976 /*
7977 * Free up attribute allocated strings if needed
7978 */
7979 if (attval != 0) {
7980 for (i = 3,j = 0; j < nratts;i += 5,j++)
7981 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7982 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 }
7984
7985 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007986
7987base_changed:
7988 /*
7989 * the attribute strings are valid iif the base didn't changed
7990 */
7991 if (attval != 0) {
7992 for (i = 3,j = 0; j < nratts;i += 5,j++)
7993 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7994 xmlFree((xmlChar *) atts[i]);
7995 }
7996 ctxt->input->cur = ctxt->input->base + cur;
7997 if (ctxt->wellFormed == 1) {
7998 goto reparse;
7999 }
8000 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008001}
8002
8003/**
8004 * xmlParseEndTag2:
8005 * @ctxt: an XML parser context
8006 * @line: line of the start tag
8007 * @nsNr: number of namespaces on the start tag
8008 *
8009 * parse an end of tag
8010 *
8011 * [42] ETag ::= '</' Name S? '>'
8012 *
8013 * With namespace
8014 *
8015 * [NS 9] ETag ::= '</' QName S? '>'
8016 */
8017
8018static void
8019xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8020 const xmlChar *URI, int line, int nsNr) {
8021 const xmlChar *name;
8022
8023 GROW;
8024 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008025 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026 return;
8027 }
8028 SKIP(2);
8029
8030 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8031
8032 /*
8033 * We should definitely be at the ending "S? '>'" part
8034 */
8035 GROW;
8036 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008037 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008038 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008039 } else
8040 NEXT1;
8041
8042 /*
8043 * [ WFC: Element Type Match ]
8044 * The Name in an element's end-tag must match the element type in the
8045 * start-tag.
8046 *
8047 */
8048 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008049 if (name == NULL) name = BAD_CAST "unparseable";
8050 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008052 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008053 }
8054
8055 /*
8056 * SAX: End of Tag
8057 */
8058 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8059 (!ctxt->disableSAX))
8060 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8061
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 spacePop(ctxt);
8063 if (nsNr != 0)
8064 nsPop(ctxt, nsNr);
8065 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008066}
8067
8068/**
Owen Taylor3473f882001-02-23 17:55:21 +00008069 * xmlParseCDSect:
8070 * @ctxt: an XML parser context
8071 *
8072 * Parse escaped pure raw content.
8073 *
8074 * [18] CDSect ::= CDStart CData CDEnd
8075 *
8076 * [19] CDStart ::= '<![CDATA['
8077 *
8078 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8079 *
8080 * [21] CDEnd ::= ']]>'
8081 */
8082void
8083xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8084 xmlChar *buf = NULL;
8085 int len = 0;
8086 int size = XML_PARSER_BUFFER_SIZE;
8087 int r, rl;
8088 int s, sl;
8089 int cur, l;
8090 int count = 0;
8091
Daniel Veillard8f597c32003-10-06 08:19:27 +00008092 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008093 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008094 SKIP(9);
8095 } else
8096 return;
8097
8098 ctxt->instate = XML_PARSER_CDATA_SECTION;
8099 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008100 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008101 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008102 ctxt->instate = XML_PARSER_CONTENT;
8103 return;
8104 }
8105 NEXTL(rl);
8106 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008107 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008108 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008109 ctxt->instate = XML_PARSER_CONTENT;
8110 return;
8111 }
8112 NEXTL(sl);
8113 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008114 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008115 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008116 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008117 return;
8118 }
William M. Brack871611b2003-10-18 04:53:14 +00008119 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008120 ((r != ']') || (s != ']') || (cur != '>'))) {
8121 if (len + 5 >= size) {
8122 size *= 2;
8123 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008126 return;
8127 }
8128 }
8129 COPY_BUF(rl,buf,len,r);
8130 r = s;
8131 rl = sl;
8132 s = cur;
8133 sl = l;
8134 count++;
8135 if (count > 50) {
8136 GROW;
8137 count = 0;
8138 }
8139 NEXTL(l);
8140 cur = CUR_CHAR(l);
8141 }
8142 buf[len] = 0;
8143 ctxt->instate = XML_PARSER_CONTENT;
8144 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008145 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008146 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008147 xmlFree(buf);
8148 return;
8149 }
8150 NEXTL(l);
8151
8152 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008153 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008154 */
8155 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8156 if (ctxt->sax->cdataBlock != NULL)
8157 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008158 else if (ctxt->sax->characters != NULL)
8159 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008160 }
8161 xmlFree(buf);
8162}
8163
8164/**
8165 * xmlParseContent:
8166 * @ctxt: an XML parser context
8167 *
8168 * Parse a content:
8169 *
8170 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8171 */
8172
8173void
8174xmlParseContent(xmlParserCtxtPtr ctxt) {
8175 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008176 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008177 ((RAW != '<') || (NXT(1) != '/'))) {
8178 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008179 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008180 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008181
8182 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008183 * First case : a Processing Instruction.
8184 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008185 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008186 xmlParsePI(ctxt);
8187 }
8188
8189 /*
8190 * Second case : a CDSection
8191 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008192 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008193 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008194 xmlParseCDSect(ctxt);
8195 }
8196
8197 /*
8198 * Third case : a comment
8199 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008200 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008201 (NXT(2) == '-') && (NXT(3) == '-')) {
8202 xmlParseComment(ctxt);
8203 ctxt->instate = XML_PARSER_CONTENT;
8204 }
8205
8206 /*
8207 * Fourth case : a sub-element.
8208 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008209 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008210 xmlParseElement(ctxt);
8211 }
8212
8213 /*
8214 * Fifth case : a reference. If if has not been resolved,
8215 * parsing returns it's Name, create the node
8216 */
8217
Daniel Veillard21a0f912001-02-25 19:54:14 +00008218 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008219 xmlParseReference(ctxt);
8220 }
8221
8222 /*
8223 * Last case, text. Note that References are handled directly.
8224 */
8225 else {
8226 xmlParseCharData(ctxt, 0);
8227 }
8228
8229 GROW;
8230 /*
8231 * Pop-up of finished entities.
8232 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008233 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008234 xmlPopInput(ctxt);
8235 SHRINK;
8236
Daniel Veillardfdc91562002-07-01 21:52:03 +00008237 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008238 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8239 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008240 ctxt->instate = XML_PARSER_EOF;
8241 break;
8242 }
8243 }
8244}
8245
8246/**
8247 * xmlParseElement:
8248 * @ctxt: an XML parser context
8249 *
8250 * parse an XML element, this is highly recursive
8251 *
8252 * [39] element ::= EmptyElemTag | STag content ETag
8253 *
8254 * [ WFC: Element Type Match ]
8255 * The Name in an element's end-tag must match the element type in the
8256 * start-tag.
8257 *
Owen Taylor3473f882001-02-23 17:55:21 +00008258 */
8259
8260void
8261xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008262 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008263 const xmlChar *prefix;
8264 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008265 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008266 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008267 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008269
8270 /* Capture start position */
8271 if (ctxt->record_info) {
8272 node_info.begin_pos = ctxt->input->consumed +
8273 (CUR_PTR - ctxt->input->base);
8274 node_info.begin_line = ctxt->input->line;
8275 }
8276
8277 if (ctxt->spaceNr == 0)
8278 spacePush(ctxt, -1);
8279 else
8280 spacePush(ctxt, *ctxt->space);
8281
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008282 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008283#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008284 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008285#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008286 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008287#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 else
8289 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008290#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008291 if (name == NULL) {
8292 spacePop(ctxt);
8293 return;
8294 }
8295 namePush(ctxt, name);
8296 ret = ctxt->node;
8297
Daniel Veillard4432df22003-09-28 18:58:27 +00008298#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008299 /*
8300 * [ VC: Root Element Type ]
8301 * The Name in the document type declaration must match the element
8302 * type of the root element.
8303 */
8304 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8305 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8306 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008307#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008308
8309 /*
8310 * Check for an Empty Element.
8311 */
8312 if ((RAW == '/') && (NXT(1) == '>')) {
8313 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 if (ctxt->sax2) {
8315 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8316 (!ctxt->disableSAX))
8317 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008318#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008319 } else {
8320 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8321 (!ctxt->disableSAX))
8322 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008323#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008324 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008325 namePop(ctxt);
8326 spacePop(ctxt);
8327 if (nsNr != ctxt->nsNr)
8328 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 if ( ret != NULL && ctxt->record_info ) {
8330 node_info.end_pos = ctxt->input->consumed +
8331 (CUR_PTR - ctxt->input->base);
8332 node_info.end_line = ctxt->input->line;
8333 node_info.node = ret;
8334 xmlParserAddNodeInfo(ctxt, &node_info);
8335 }
8336 return;
8337 }
8338 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008339 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008340 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008341 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8342 "Couldn't find end of Start Tag %s line %d\n",
8343 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008344
8345 /*
8346 * end of parsing of this node.
8347 */
8348 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008349 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008350 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008351 if (nsNr != ctxt->nsNr)
8352 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008353
8354 /*
8355 * Capture end position and add node
8356 */
8357 if ( ret != NULL && ctxt->record_info ) {
8358 node_info.end_pos = ctxt->input->consumed +
8359 (CUR_PTR - ctxt->input->base);
8360 node_info.end_line = ctxt->input->line;
8361 node_info.node = ret;
8362 xmlParserAddNodeInfo(ctxt, &node_info);
8363 }
8364 return;
8365 }
8366
8367 /*
8368 * Parse the content of the element:
8369 */
8370 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008371 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008372 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008373 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008374 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008375
8376 /*
8377 * end of parsing of this node.
8378 */
8379 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008380 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008381 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008382 if (nsNr != ctxt->nsNr)
8383 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 return;
8385 }
8386
8387 /*
8388 * parse the end of tag: '</' should be here.
8389 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008390 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008392 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008393 }
8394#ifdef LIBXML_SAX1_ENABLED
8395 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008396 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008397#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008398
8399 /*
8400 * Capture end position and add node
8401 */
8402 if ( ret != NULL && ctxt->record_info ) {
8403 node_info.end_pos = ctxt->input->consumed +
8404 (CUR_PTR - ctxt->input->base);
8405 node_info.end_line = ctxt->input->line;
8406 node_info.node = ret;
8407 xmlParserAddNodeInfo(ctxt, &node_info);
8408 }
8409}
8410
8411/**
8412 * xmlParseVersionNum:
8413 * @ctxt: an XML parser context
8414 *
8415 * parse the XML version value.
8416 *
8417 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8418 *
8419 * Returns the string giving the XML version number, or NULL
8420 */
8421xmlChar *
8422xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8423 xmlChar *buf = NULL;
8424 int len = 0;
8425 int size = 10;
8426 xmlChar cur;
8427
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008428 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008429 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008430 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008431 return(NULL);
8432 }
8433 cur = CUR;
8434 while (((cur >= 'a') && (cur <= 'z')) ||
8435 ((cur >= 'A') && (cur <= 'Z')) ||
8436 ((cur >= '0') && (cur <= '9')) ||
8437 (cur == '_') || (cur == '.') ||
8438 (cur == ':') || (cur == '-')) {
8439 if (len + 1 >= size) {
8440 size *= 2;
8441 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8442 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 return(NULL);
8445 }
8446 }
8447 buf[len++] = cur;
8448 NEXT;
8449 cur=CUR;
8450 }
8451 buf[len] = 0;
8452 return(buf);
8453}
8454
8455/**
8456 * xmlParseVersionInfo:
8457 * @ctxt: an XML parser context
8458 *
8459 * parse the XML version.
8460 *
8461 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8462 *
8463 * [25] Eq ::= S? '=' S?
8464 *
8465 * Returns the version string, e.g. "1.0"
8466 */
8467
8468xmlChar *
8469xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8470 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008471
Daniel Veillarda07050d2003-10-19 14:46:32 +00008472 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008473 SKIP(7);
8474 SKIP_BLANKS;
8475 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008476 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008477 return(NULL);
8478 }
8479 NEXT;
8480 SKIP_BLANKS;
8481 if (RAW == '"') {
8482 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008483 version = xmlParseVersionNum(ctxt);
8484 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008485 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008486 } else
8487 NEXT;
8488 } else if (RAW == '\''){
8489 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008490 version = xmlParseVersionNum(ctxt);
8491 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008492 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008493 } else
8494 NEXT;
8495 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008496 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 }
8498 }
8499 return(version);
8500}
8501
8502/**
8503 * xmlParseEncName:
8504 * @ctxt: an XML parser context
8505 *
8506 * parse the XML encoding name
8507 *
8508 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8509 *
8510 * Returns the encoding name value or NULL
8511 */
8512xmlChar *
8513xmlParseEncName(xmlParserCtxtPtr ctxt) {
8514 xmlChar *buf = NULL;
8515 int len = 0;
8516 int size = 10;
8517 xmlChar cur;
8518
8519 cur = CUR;
8520 if (((cur >= 'a') && (cur <= 'z')) ||
8521 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008522 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008523 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008524 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008525 return(NULL);
8526 }
8527
8528 buf[len++] = cur;
8529 NEXT;
8530 cur = CUR;
8531 while (((cur >= 'a') && (cur <= 'z')) ||
8532 ((cur >= 'A') && (cur <= 'Z')) ||
8533 ((cur >= '0') && (cur <= '9')) ||
8534 (cur == '.') || (cur == '_') ||
8535 (cur == '-')) {
8536 if (len + 1 >= size) {
8537 size *= 2;
8538 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8539 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008540 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008541 return(NULL);
8542 }
8543 }
8544 buf[len++] = cur;
8545 NEXT;
8546 cur = CUR;
8547 if (cur == 0) {
8548 SHRINK;
8549 GROW;
8550 cur = CUR;
8551 }
8552 }
8553 buf[len] = 0;
8554 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008555 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008556 }
8557 return(buf);
8558}
8559
8560/**
8561 * xmlParseEncodingDecl:
8562 * @ctxt: an XML parser context
8563 *
8564 * parse the XML encoding declaration
8565 *
8566 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8567 *
8568 * this setups the conversion filters.
8569 *
8570 * Returns the encoding value or NULL
8571 */
8572
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008573const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008574xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8575 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008576
8577 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008578 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008579 SKIP(8);
8580 SKIP_BLANKS;
8581 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008582 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 return(NULL);
8584 }
8585 NEXT;
8586 SKIP_BLANKS;
8587 if (RAW == '"') {
8588 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008589 encoding = xmlParseEncName(ctxt);
8590 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008591 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 } else
8593 NEXT;
8594 } else if (RAW == '\''){
8595 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008596 encoding = xmlParseEncName(ctxt);
8597 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 } else
8600 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008601 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008602 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008604 /*
8605 * UTF-16 encoding stwich has already taken place at this stage,
8606 * more over the little-endian/big-endian selection is already done
8607 */
8608 if ((encoding != NULL) &&
8609 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8610 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008611 if (ctxt->encoding != NULL)
8612 xmlFree((xmlChar *) ctxt->encoding);
8613 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008614 }
8615 /*
8616 * UTF-8 encoding is handled natively
8617 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008618 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008619 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8620 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008621 if (ctxt->encoding != NULL)
8622 xmlFree((xmlChar *) ctxt->encoding);
8623 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008624 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008625 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008626 xmlCharEncodingHandlerPtr handler;
8627
8628 if (ctxt->input->encoding != NULL)
8629 xmlFree((xmlChar *) ctxt->input->encoding);
8630 ctxt->input->encoding = encoding;
8631
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008632 handler = xmlFindCharEncodingHandler((const char *) encoding);
8633 if (handler != NULL) {
8634 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008635 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008636 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008637 "Unsupported encoding %s\n", encoding);
8638 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 }
8640 }
8641 }
8642 return(encoding);
8643}
8644
8645/**
8646 * xmlParseSDDecl:
8647 * @ctxt: an XML parser context
8648 *
8649 * parse the XML standalone declaration
8650 *
8651 * [32] SDDecl ::= S 'standalone' Eq
8652 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8653 *
8654 * [ VC: Standalone Document Declaration ]
8655 * TODO The standalone document declaration must have the value "no"
8656 * if any external markup declarations contain declarations of:
8657 * - attributes with default values, if elements to which these
8658 * attributes apply appear in the document without specifications
8659 * of values for these attributes, or
8660 * - entities (other than amp, lt, gt, apos, quot), if references
8661 * to those entities appear in the document, or
8662 * - attributes with values subject to normalization, where the
8663 * attribute appears in the document with a value which will change
8664 * as a result of normalization, or
8665 * - element types with element content, if white space occurs directly
8666 * within any instance of those types.
8667 *
8668 * Returns 1 if standalone, 0 otherwise
8669 */
8670
8671int
8672xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8673 int standalone = -1;
8674
8675 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008676 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008677 SKIP(10);
8678 SKIP_BLANKS;
8679 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008680 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008681 return(standalone);
8682 }
8683 NEXT;
8684 SKIP_BLANKS;
8685 if (RAW == '\''){
8686 NEXT;
8687 if ((RAW == 'n') && (NXT(1) == 'o')) {
8688 standalone = 0;
8689 SKIP(2);
8690 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8691 (NXT(2) == 's')) {
8692 standalone = 1;
8693 SKIP(3);
8694 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 }
8697 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 } else
8700 NEXT;
8701 } else if (RAW == '"'){
8702 NEXT;
8703 if ((RAW == 'n') && (NXT(1) == 'o')) {
8704 standalone = 0;
8705 SKIP(2);
8706 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8707 (NXT(2) == 's')) {
8708 standalone = 1;
8709 SKIP(3);
8710 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008711 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008712 }
8713 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008714 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008715 } else
8716 NEXT;
8717 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008718 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008719 }
8720 }
8721 return(standalone);
8722}
8723
8724/**
8725 * xmlParseXMLDecl:
8726 * @ctxt: an XML parser context
8727 *
8728 * parse an XML declaration header
8729 *
8730 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8731 */
8732
8733void
8734xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8735 xmlChar *version;
8736
8737 /*
8738 * We know that '<?xml' is here.
8739 */
8740 SKIP(5);
8741
William M. Brack76e95df2003-10-18 16:20:14 +00008742 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8744 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008745 }
8746 SKIP_BLANKS;
8747
8748 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008749 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008750 */
8751 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008752 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008753 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008754 } else {
8755 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8756 /*
8757 * TODO: Blueberry should be detected here
8758 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008759 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8760 "Unsupported version '%s'\n",
8761 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008762 }
8763 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008764 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008765 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008766 }
Owen Taylor3473f882001-02-23 17:55:21 +00008767
8768 /*
8769 * We may have the encoding declaration
8770 */
William M. Brack76e95df2003-10-18 16:20:14 +00008771 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008772 if ((RAW == '?') && (NXT(1) == '>')) {
8773 SKIP(2);
8774 return;
8775 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008777 }
8778 xmlParseEncodingDecl(ctxt);
8779 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8780 /*
8781 * The XML REC instructs us to stop parsing right here
8782 */
8783 return;
8784 }
8785
8786 /*
8787 * We may have the standalone status.
8788 */
William M. Brack76e95df2003-10-18 16:20:14 +00008789 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008790 if ((RAW == '?') && (NXT(1) == '>')) {
8791 SKIP(2);
8792 return;
8793 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008794 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008795 }
8796 SKIP_BLANKS;
8797 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8798
8799 SKIP_BLANKS;
8800 if ((RAW == '?') && (NXT(1) == '>')) {
8801 SKIP(2);
8802 } else if (RAW == '>') {
8803 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008804 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008805 NEXT;
8806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008807 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008808 MOVETO_ENDTAG(CUR_PTR);
8809 NEXT;
8810 }
8811}
8812
8813/**
8814 * xmlParseMisc:
8815 * @ctxt: an XML parser context
8816 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008817 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008818 *
8819 * [27] Misc ::= Comment | PI | S
8820 */
8821
8822void
8823xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008824 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008825 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008826 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008827 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008828 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008829 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008830 NEXT;
8831 } else
8832 xmlParseComment(ctxt);
8833 }
8834}
8835
8836/**
8837 * xmlParseDocument:
8838 * @ctxt: an XML parser context
8839 *
8840 * parse an XML document (and build a tree if using the standard SAX
8841 * interface).
8842 *
8843 * [1] document ::= prolog element Misc*
8844 *
8845 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8846 *
8847 * Returns 0, -1 in case of error. the parser context is augmented
8848 * as a result of the parsing.
8849 */
8850
8851int
8852xmlParseDocument(xmlParserCtxtPtr ctxt) {
8853 xmlChar start[4];
8854 xmlCharEncoding enc;
8855
8856 xmlInitParser();
8857
8858 GROW;
8859
8860 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861 * SAX: detecting the level.
8862 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864
8865 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008866 * SAX: beginning of the document processing.
8867 */
8868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8869 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8870
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008871 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8872 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008873 /*
8874 * Get the 4 first bytes and decode the charset
8875 * if enc != XML_CHAR_ENCODING_NONE
8876 * plug some encoding conversion routines.
8877 */
8878 start[0] = RAW;
8879 start[1] = NXT(1);
8880 start[2] = NXT(2);
8881 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008882 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008883 if (enc != XML_CHAR_ENCODING_NONE) {
8884 xmlSwitchEncoding(ctxt, enc);
8885 }
Owen Taylor3473f882001-02-23 17:55:21 +00008886 }
8887
8888
8889 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008890 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008891 }
8892
8893 /*
8894 * Check for the XMLDecl in the Prolog.
8895 */
8896 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008897 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008898
8899 /*
8900 * Note that we will switch encoding on the fly.
8901 */
8902 xmlParseXMLDecl(ctxt);
8903 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8904 /*
8905 * The XML REC instructs us to stop parsing right here
8906 */
8907 return(-1);
8908 }
8909 ctxt->standalone = ctxt->input->standalone;
8910 SKIP_BLANKS;
8911 } else {
8912 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8913 }
8914 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8915 ctxt->sax->startDocument(ctxt->userData);
8916
8917 /*
8918 * The Misc part of the Prolog
8919 */
8920 GROW;
8921 xmlParseMisc(ctxt);
8922
8923 /*
8924 * Then possibly doc type declaration(s) and more Misc
8925 * (doctypedecl Misc*)?
8926 */
8927 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008928 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008929
8930 ctxt->inSubset = 1;
8931 xmlParseDocTypeDecl(ctxt);
8932 if (RAW == '[') {
8933 ctxt->instate = XML_PARSER_DTD;
8934 xmlParseInternalSubset(ctxt);
8935 }
8936
8937 /*
8938 * Create and update the external subset.
8939 */
8940 ctxt->inSubset = 2;
8941 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8942 (!ctxt->disableSAX))
8943 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8944 ctxt->extSubSystem, ctxt->extSubURI);
8945 ctxt->inSubset = 0;
8946
8947
8948 ctxt->instate = XML_PARSER_PROLOG;
8949 xmlParseMisc(ctxt);
8950 }
8951
8952 /*
8953 * Time to start parsing the tree itself
8954 */
8955 GROW;
8956 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008957 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8958 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008959 } else {
8960 ctxt->instate = XML_PARSER_CONTENT;
8961 xmlParseElement(ctxt);
8962 ctxt->instate = XML_PARSER_EPILOG;
8963
8964
8965 /*
8966 * The Misc part at the end
8967 */
8968 xmlParseMisc(ctxt);
8969
Daniel Veillard561b7f82002-03-20 21:55:57 +00008970 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008971 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008972 }
8973 ctxt->instate = XML_PARSER_EOF;
8974 }
8975
8976 /*
8977 * SAX: end of the document processing.
8978 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008979 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008980 ctxt->sax->endDocument(ctxt->userData);
8981
Daniel Veillard5997aca2002-03-18 18:36:20 +00008982 /*
8983 * Remove locally kept entity definitions if the tree was not built
8984 */
8985 if ((ctxt->myDoc != NULL) &&
8986 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8987 xmlFreeDoc(ctxt->myDoc);
8988 ctxt->myDoc = NULL;
8989 }
8990
Daniel Veillardc7612992002-02-17 22:47:37 +00008991 if (! ctxt->wellFormed) {
8992 ctxt->valid = 0;
8993 return(-1);
8994 }
Owen Taylor3473f882001-02-23 17:55:21 +00008995 return(0);
8996}
8997
8998/**
8999 * xmlParseExtParsedEnt:
9000 * @ctxt: an XML parser context
9001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009002 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009003 * An external general parsed entity is well-formed if it matches the
9004 * production labeled extParsedEnt.
9005 *
9006 * [78] extParsedEnt ::= TextDecl? content
9007 *
9008 * Returns 0, -1 in case of error. the parser context is augmented
9009 * as a result of the parsing.
9010 */
9011
9012int
9013xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9014 xmlChar start[4];
9015 xmlCharEncoding enc;
9016
9017 xmlDefaultSAXHandlerInit();
9018
Daniel Veillard309f81d2003-09-23 09:02:53 +00009019 xmlDetectSAX2(ctxt);
9020
Owen Taylor3473f882001-02-23 17:55:21 +00009021 GROW;
9022
9023 /*
9024 * SAX: beginning of the document processing.
9025 */
9026 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9027 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9028
9029 /*
9030 * Get the 4 first bytes and decode the charset
9031 * if enc != XML_CHAR_ENCODING_NONE
9032 * plug some encoding conversion routines.
9033 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009034 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9035 start[0] = RAW;
9036 start[1] = NXT(1);
9037 start[2] = NXT(2);
9038 start[3] = NXT(3);
9039 enc = xmlDetectCharEncoding(start, 4);
9040 if (enc != XML_CHAR_ENCODING_NONE) {
9041 xmlSwitchEncoding(ctxt, enc);
9042 }
Owen Taylor3473f882001-02-23 17:55:21 +00009043 }
9044
9045
9046 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009047 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009048 }
9049
9050 /*
9051 * Check for the XMLDecl in the Prolog.
9052 */
9053 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009054 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009055
9056 /*
9057 * Note that we will switch encoding on the fly.
9058 */
9059 xmlParseXMLDecl(ctxt);
9060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9061 /*
9062 * The XML REC instructs us to stop parsing right here
9063 */
9064 return(-1);
9065 }
9066 SKIP_BLANKS;
9067 } else {
9068 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9069 }
9070 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9071 ctxt->sax->startDocument(ctxt->userData);
9072
9073 /*
9074 * Doing validity checking on chunk doesn't make sense
9075 */
9076 ctxt->instate = XML_PARSER_CONTENT;
9077 ctxt->validate = 0;
9078 ctxt->loadsubset = 0;
9079 ctxt->depth = 0;
9080
9081 xmlParseContent(ctxt);
9082
9083 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009084 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009085 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009086 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009087 }
9088
9089 /*
9090 * SAX: end of the document processing.
9091 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009092 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009093 ctxt->sax->endDocument(ctxt->userData);
9094
9095 if (! ctxt->wellFormed) return(-1);
9096 return(0);
9097}
9098
Daniel Veillard73b013f2003-09-30 12:36:01 +00009099#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009100/************************************************************************
9101 * *
9102 * Progressive parsing interfaces *
9103 * *
9104 ************************************************************************/
9105
9106/**
9107 * xmlParseLookupSequence:
9108 * @ctxt: an XML parser context
9109 * @first: the first char to lookup
9110 * @next: the next char to lookup or zero
9111 * @third: the next char to lookup or zero
9112 *
9113 * Try to find if a sequence (first, next, third) or just (first next) or
9114 * (first) is available in the input stream.
9115 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9116 * to avoid rescanning sequences of bytes, it DOES change the state of the
9117 * parser, do not use liberally.
9118 *
9119 * Returns the index to the current parsing point if the full sequence
9120 * is available, -1 otherwise.
9121 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009122static int
Owen Taylor3473f882001-02-23 17:55:21 +00009123xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9124 xmlChar next, xmlChar third) {
9125 int base, len;
9126 xmlParserInputPtr in;
9127 const xmlChar *buf;
9128
9129 in = ctxt->input;
9130 if (in == NULL) return(-1);
9131 base = in->cur - in->base;
9132 if (base < 0) return(-1);
9133 if (ctxt->checkIndex > base)
9134 base = ctxt->checkIndex;
9135 if (in->buf == NULL) {
9136 buf = in->base;
9137 len = in->length;
9138 } else {
9139 buf = in->buf->buffer->content;
9140 len = in->buf->buffer->use;
9141 }
9142 /* take into account the sequence length */
9143 if (third) len -= 2;
9144 else if (next) len --;
9145 for (;base < len;base++) {
9146 if (buf[base] == first) {
9147 if (third != 0) {
9148 if ((buf[base + 1] != next) ||
9149 (buf[base + 2] != third)) continue;
9150 } else if (next != 0) {
9151 if (buf[base + 1] != next) continue;
9152 }
9153 ctxt->checkIndex = 0;
9154#ifdef DEBUG_PUSH
9155 if (next == 0)
9156 xmlGenericError(xmlGenericErrorContext,
9157 "PP: lookup '%c' found at %d\n",
9158 first, base);
9159 else if (third == 0)
9160 xmlGenericError(xmlGenericErrorContext,
9161 "PP: lookup '%c%c' found at %d\n",
9162 first, next, base);
9163 else
9164 xmlGenericError(xmlGenericErrorContext,
9165 "PP: lookup '%c%c%c' found at %d\n",
9166 first, next, third, base);
9167#endif
9168 return(base - (in->cur - in->base));
9169 }
9170 }
9171 ctxt->checkIndex = base;
9172#ifdef DEBUG_PUSH
9173 if (next == 0)
9174 xmlGenericError(xmlGenericErrorContext,
9175 "PP: lookup '%c' failed\n", first);
9176 else if (third == 0)
9177 xmlGenericError(xmlGenericErrorContext,
9178 "PP: lookup '%c%c' failed\n", first, next);
9179 else
9180 xmlGenericError(xmlGenericErrorContext,
9181 "PP: lookup '%c%c%c' failed\n", first, next, third);
9182#endif
9183 return(-1);
9184}
9185
9186/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009187 * xmlParseGetLasts:
9188 * @ctxt: an XML parser context
9189 * @lastlt: pointer to store the last '<' from the input
9190 * @lastgt: pointer to store the last '>' from the input
9191 *
9192 * Lookup the last < and > in the current chunk
9193 */
9194static void
9195xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9196 const xmlChar **lastgt) {
9197 const xmlChar *tmp;
9198
9199 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9200 xmlGenericError(xmlGenericErrorContext,
9201 "Internal error: xmlParseGetLasts\n");
9202 return;
9203 }
9204 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9205 tmp = ctxt->input->end;
9206 tmp--;
9207 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9208 (*tmp != '>')) tmp--;
9209 if (tmp < ctxt->input->base) {
9210 *lastlt = NULL;
9211 *lastgt = NULL;
9212 } else if (*tmp == '<') {
9213 *lastlt = tmp;
9214 tmp--;
9215 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9216 if (tmp < ctxt->input->base)
9217 *lastgt = NULL;
9218 else
9219 *lastgt = tmp;
9220 } else {
9221 *lastgt = tmp;
9222 tmp--;
9223 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9224 if (tmp < ctxt->input->base)
9225 *lastlt = NULL;
9226 else
9227 *lastlt = tmp;
9228 }
9229
9230 } else {
9231 *lastlt = NULL;
9232 *lastgt = NULL;
9233 }
9234}
9235/**
Owen Taylor3473f882001-02-23 17:55:21 +00009236 * xmlParseTryOrFinish:
9237 * @ctxt: an XML parser context
9238 * @terminate: last chunk indicator
9239 *
9240 * Try to progress on parsing
9241 *
9242 * Returns zero if no parsing was possible
9243 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009244static int
Owen Taylor3473f882001-02-23 17:55:21 +00009245xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9246 int ret = 0;
9247 int avail;
9248 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009249 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009250
9251#ifdef DEBUG_PUSH
9252 switch (ctxt->instate) {
9253 case XML_PARSER_EOF:
9254 xmlGenericError(xmlGenericErrorContext,
9255 "PP: try EOF\n"); break;
9256 case XML_PARSER_START:
9257 xmlGenericError(xmlGenericErrorContext,
9258 "PP: try START\n"); break;
9259 case XML_PARSER_MISC:
9260 xmlGenericError(xmlGenericErrorContext,
9261 "PP: try MISC\n");break;
9262 case XML_PARSER_COMMENT:
9263 xmlGenericError(xmlGenericErrorContext,
9264 "PP: try COMMENT\n");break;
9265 case XML_PARSER_PROLOG:
9266 xmlGenericError(xmlGenericErrorContext,
9267 "PP: try PROLOG\n");break;
9268 case XML_PARSER_START_TAG:
9269 xmlGenericError(xmlGenericErrorContext,
9270 "PP: try START_TAG\n");break;
9271 case XML_PARSER_CONTENT:
9272 xmlGenericError(xmlGenericErrorContext,
9273 "PP: try CONTENT\n");break;
9274 case XML_PARSER_CDATA_SECTION:
9275 xmlGenericError(xmlGenericErrorContext,
9276 "PP: try CDATA_SECTION\n");break;
9277 case XML_PARSER_END_TAG:
9278 xmlGenericError(xmlGenericErrorContext,
9279 "PP: try END_TAG\n");break;
9280 case XML_PARSER_ENTITY_DECL:
9281 xmlGenericError(xmlGenericErrorContext,
9282 "PP: try ENTITY_DECL\n");break;
9283 case XML_PARSER_ENTITY_VALUE:
9284 xmlGenericError(xmlGenericErrorContext,
9285 "PP: try ENTITY_VALUE\n");break;
9286 case XML_PARSER_ATTRIBUTE_VALUE:
9287 xmlGenericError(xmlGenericErrorContext,
9288 "PP: try ATTRIBUTE_VALUE\n");break;
9289 case XML_PARSER_DTD:
9290 xmlGenericError(xmlGenericErrorContext,
9291 "PP: try DTD\n");break;
9292 case XML_PARSER_EPILOG:
9293 xmlGenericError(xmlGenericErrorContext,
9294 "PP: try EPILOG\n");break;
9295 case XML_PARSER_PI:
9296 xmlGenericError(xmlGenericErrorContext,
9297 "PP: try PI\n");break;
9298 case XML_PARSER_IGNORE:
9299 xmlGenericError(xmlGenericErrorContext,
9300 "PP: try IGNORE\n");break;
9301 }
9302#endif
9303
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009304 if ((ctxt->input != NULL) &&
9305 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009306 xmlSHRINK(ctxt);
9307 ctxt->checkIndex = 0;
9308 }
9309 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009310
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009312 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9313 return(0);
9314
9315
Owen Taylor3473f882001-02-23 17:55:21 +00009316 /*
9317 * Pop-up of finished entities.
9318 */
9319 while ((RAW == 0) && (ctxt->inputNr > 1))
9320 xmlPopInput(ctxt);
9321
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009322 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009323 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009324 avail = ctxt->input->length -
9325 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009326 else {
9327 /*
9328 * If we are operating on converted input, try to flush
9329 * remainng chars to avoid them stalling in the non-converted
9330 * buffer.
9331 */
9332 if ((ctxt->input->buf->raw != NULL) &&
9333 (ctxt->input->buf->raw->use > 0)) {
9334 int base = ctxt->input->base -
9335 ctxt->input->buf->buffer->content;
9336 int current = ctxt->input->cur - ctxt->input->base;
9337
9338 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9339 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9340 ctxt->input->cur = ctxt->input->base + current;
9341 ctxt->input->end =
9342 &ctxt->input->buf->buffer->content[
9343 ctxt->input->buf->buffer->use];
9344 }
9345 avail = ctxt->input->buf->buffer->use -
9346 (ctxt->input->cur - ctxt->input->base);
9347 }
Owen Taylor3473f882001-02-23 17:55:21 +00009348 if (avail < 1)
9349 goto done;
9350 switch (ctxt->instate) {
9351 case XML_PARSER_EOF:
9352 /*
9353 * Document parsing is done !
9354 */
9355 goto done;
9356 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009357 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9358 xmlChar start[4];
9359 xmlCharEncoding enc;
9360
9361 /*
9362 * Very first chars read from the document flow.
9363 */
9364 if (avail < 4)
9365 goto done;
9366
9367 /*
9368 * Get the 4 first bytes and decode the charset
9369 * if enc != XML_CHAR_ENCODING_NONE
9370 * plug some encoding conversion routines.
9371 */
9372 start[0] = RAW;
9373 start[1] = NXT(1);
9374 start[2] = NXT(2);
9375 start[3] = NXT(3);
9376 enc = xmlDetectCharEncoding(start, 4);
9377 if (enc != XML_CHAR_ENCODING_NONE) {
9378 xmlSwitchEncoding(ctxt, enc);
9379 }
9380 break;
9381 }
Owen Taylor3473f882001-02-23 17:55:21 +00009382
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009383 if (avail < 2)
9384 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009385 cur = ctxt->input->cur[0];
9386 next = ctxt->input->cur[1];
9387 if (cur == 0) {
9388 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9389 ctxt->sax->setDocumentLocator(ctxt->userData,
9390 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009391 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009392 ctxt->instate = XML_PARSER_EOF;
9393#ifdef DEBUG_PUSH
9394 xmlGenericError(xmlGenericErrorContext,
9395 "PP: entering EOF\n");
9396#endif
9397 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9398 ctxt->sax->endDocument(ctxt->userData);
9399 goto done;
9400 }
9401 if ((cur == '<') && (next == '?')) {
9402 /* PI or XML decl */
9403 if (avail < 5) return(ret);
9404 if ((!terminate) &&
9405 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9406 return(ret);
9407 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9408 ctxt->sax->setDocumentLocator(ctxt->userData,
9409 &xmlDefaultSAXLocator);
9410 if ((ctxt->input->cur[2] == 'x') &&
9411 (ctxt->input->cur[3] == 'm') &&
9412 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009413 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009414 ret += 5;
9415#ifdef DEBUG_PUSH
9416 xmlGenericError(xmlGenericErrorContext,
9417 "PP: Parsing XML Decl\n");
9418#endif
9419 xmlParseXMLDecl(ctxt);
9420 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9421 /*
9422 * The XML REC instructs us to stop parsing right
9423 * here
9424 */
9425 ctxt->instate = XML_PARSER_EOF;
9426 return(0);
9427 }
9428 ctxt->standalone = ctxt->input->standalone;
9429 if ((ctxt->encoding == NULL) &&
9430 (ctxt->input->encoding != NULL))
9431 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9432 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9433 (!ctxt->disableSAX))
9434 ctxt->sax->startDocument(ctxt->userData);
9435 ctxt->instate = XML_PARSER_MISC;
9436#ifdef DEBUG_PUSH
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: entering MISC\n");
9439#endif
9440 } else {
9441 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9442 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9443 (!ctxt->disableSAX))
9444 ctxt->sax->startDocument(ctxt->userData);
9445 ctxt->instate = XML_PARSER_MISC;
9446#ifdef DEBUG_PUSH
9447 xmlGenericError(xmlGenericErrorContext,
9448 "PP: entering MISC\n");
9449#endif
9450 }
9451 } else {
9452 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9453 ctxt->sax->setDocumentLocator(ctxt->userData,
9454 &xmlDefaultSAXLocator);
9455 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9456 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9457 (!ctxt->disableSAX))
9458 ctxt->sax->startDocument(ctxt->userData);
9459 ctxt->instate = XML_PARSER_MISC;
9460#ifdef DEBUG_PUSH
9461 xmlGenericError(xmlGenericErrorContext,
9462 "PP: entering MISC\n");
9463#endif
9464 }
9465 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009467 const xmlChar *name;
9468 const xmlChar *prefix;
9469 const xmlChar *URI;
9470 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009471
9472 if ((avail < 2) && (ctxt->inputNr == 1))
9473 goto done;
9474 cur = ctxt->input->cur[0];
9475 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009476 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009478 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9479 ctxt->sax->endDocument(ctxt->userData);
9480 goto done;
9481 }
9482 if (!terminate) {
9483 if (ctxt->progressive) {
9484 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9485 goto done;
9486 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9487 goto done;
9488 }
9489 }
9490 if (ctxt->spaceNr == 0)
9491 spacePush(ctxt, -1);
9492 else
9493 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009494#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009495 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009496#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009497 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009498#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499 else
9500 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009501#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009502 if (name == NULL) {
9503 spacePop(ctxt);
9504 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009505 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9506 ctxt->sax->endDocument(ctxt->userData);
9507 goto done;
9508 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009509#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009510 /*
9511 * [ VC: Root Element Type ]
9512 * The Name in the document type declaration must match
9513 * the element type of the root element.
9514 */
9515 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9516 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9517 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009518#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009519
9520 /*
9521 * Check for an Empty Element.
9522 */
9523 if ((RAW == '/') && (NXT(1) == '>')) {
9524 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009525
9526 if (ctxt->sax2) {
9527 if ((ctxt->sax != NULL) &&
9528 (ctxt->sax->endElementNs != NULL) &&
9529 (!ctxt->disableSAX))
9530 ctxt->sax->endElementNs(ctxt->userData, name,
9531 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009532#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009533 } else {
9534 if ((ctxt->sax != NULL) &&
9535 (ctxt->sax->endElement != NULL) &&
9536 (!ctxt->disableSAX))
9537 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009540 spacePop(ctxt);
9541 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009543 } else {
9544 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009545 }
9546 break;
9547 }
9548 if (RAW == '>') {
9549 NEXT;
9550 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009551 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 "Couldn't find end of Start Tag %s\n",
9553 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009556 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 if (ctxt->sax2)
9558 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009559#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560 else
9561 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009562#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009563
Daniel Veillarda880b122003-04-21 21:36:41 +00009564 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009565 break;
9566 }
9567 case XML_PARSER_CONTENT: {
9568 const xmlChar *test;
9569 unsigned int cons;
9570 if ((avail < 2) && (ctxt->inputNr == 1))
9571 goto done;
9572 cur = ctxt->input->cur[0];
9573 next = ctxt->input->cur[1];
9574
9575 test = CUR_PTR;
9576 cons = ctxt->input->consumed;
9577 if ((cur == '<') && (next == '/')) {
9578 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009579 break;
9580 } else if ((cur == '<') && (next == '?')) {
9581 if ((!terminate) &&
9582 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9583 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 xmlParsePI(ctxt);
9585 } else if ((cur == '<') && (next != '!')) {
9586 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 break;
9588 } else if ((cur == '<') && (next == '!') &&
9589 (ctxt->input->cur[2] == '-') &&
9590 (ctxt->input->cur[3] == '-')) {
9591 if ((!terminate) &&
9592 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9593 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009594 xmlParseComment(ctxt);
9595 ctxt->instate = XML_PARSER_CONTENT;
9596 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9597 (ctxt->input->cur[2] == '[') &&
9598 (ctxt->input->cur[3] == 'C') &&
9599 (ctxt->input->cur[4] == 'D') &&
9600 (ctxt->input->cur[5] == 'A') &&
9601 (ctxt->input->cur[6] == 'T') &&
9602 (ctxt->input->cur[7] == 'A') &&
9603 (ctxt->input->cur[8] == '[')) {
9604 SKIP(9);
9605 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009606 break;
9607 } else if ((cur == '<') && (next == '!') &&
9608 (avail < 9)) {
9609 goto done;
9610 } else if (cur == '&') {
9611 if ((!terminate) &&
9612 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9613 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009614 xmlParseReference(ctxt);
9615 } else {
9616 /* TODO Avoid the extra copy, handle directly !!! */
9617 /*
9618 * Goal of the following test is:
9619 * - minimize calls to the SAX 'character' callback
9620 * when they are mergeable
9621 * - handle an problem for isBlank when we only parse
9622 * a sequence of blank chars and the next one is
9623 * not available to check against '<' presence.
9624 * - tries to homogenize the differences in SAX
9625 * callbacks between the push and pull versions
9626 * of the parser.
9627 */
9628 if ((ctxt->inputNr == 1) &&
9629 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9630 if (!terminate) {
9631 if (ctxt->progressive) {
9632 if ((lastlt == NULL) ||
9633 (ctxt->input->cur > lastlt))
9634 goto done;
9635 } else if (xmlParseLookupSequence(ctxt,
9636 '<', 0, 0) < 0) {
9637 goto done;
9638 }
9639 }
9640 }
9641 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009642 xmlParseCharData(ctxt, 0);
9643 }
9644 /*
9645 * Pop-up of finished entities.
9646 */
9647 while ((RAW == 0) && (ctxt->inputNr > 1))
9648 xmlPopInput(ctxt);
9649 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009650 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9651 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009652 ctxt->instate = XML_PARSER_EOF;
9653 break;
9654 }
9655 break;
9656 }
9657 case XML_PARSER_END_TAG:
9658 if (avail < 2)
9659 goto done;
9660 if (!terminate) {
9661 if (ctxt->progressive) {
9662 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9663 goto done;
9664 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9665 goto done;
9666 }
9667 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009668 if (ctxt->sax2) {
9669 xmlParseEndTag2(ctxt,
9670 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9671 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9672 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9673 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009674 }
9675#ifdef LIBXML_SAX1_ENABLED
9676 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009677 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009678#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009679 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 } else {
9682 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009683 }
9684 break;
9685 case XML_PARSER_CDATA_SECTION: {
9686 /*
9687 * The Push mode need to have the SAX callback for
9688 * cdataBlock merge back contiguous callbacks.
9689 */
9690 int base;
9691
9692 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9693 if (base < 0) {
9694 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9695 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9696 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009697 ctxt->sax->cdataBlock(ctxt->userData,
9698 ctxt->input->cur,
9699 XML_PARSER_BIG_BUFFER_SIZE);
9700 else if (ctxt->sax->characters != NULL)
9701 ctxt->sax->characters(ctxt->userData,
9702 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009703 XML_PARSER_BIG_BUFFER_SIZE);
9704 }
9705 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9706 ctxt->checkIndex = 0;
9707 }
9708 goto done;
9709 } else {
9710 if ((ctxt->sax != NULL) && (base > 0) &&
9711 (!ctxt->disableSAX)) {
9712 if (ctxt->sax->cdataBlock != NULL)
9713 ctxt->sax->cdataBlock(ctxt->userData,
9714 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009715 else if (ctxt->sax->characters != NULL)
9716 ctxt->sax->characters(ctxt->userData,
9717 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009718 }
9719 SKIP(base + 3);
9720 ctxt->checkIndex = 0;
9721 ctxt->instate = XML_PARSER_CONTENT;
9722#ifdef DEBUG_PUSH
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: entering CONTENT\n");
9725#endif
9726 }
9727 break;
9728 }
Owen Taylor3473f882001-02-23 17:55:21 +00009729 case XML_PARSER_MISC:
9730 SKIP_BLANKS;
9731 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009732 avail = ctxt->input->length -
9733 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009734 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009735 avail = ctxt->input->buf->buffer->use -
9736 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009737 if (avail < 2)
9738 goto done;
9739 cur = ctxt->input->cur[0];
9740 next = ctxt->input->cur[1];
9741 if ((cur == '<') && (next == '?')) {
9742 if ((!terminate) &&
9743 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9744 goto done;
9745#ifdef DEBUG_PUSH
9746 xmlGenericError(xmlGenericErrorContext,
9747 "PP: Parsing PI\n");
9748#endif
9749 xmlParsePI(ctxt);
9750 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 (ctxt->input->cur[2] == '-') &&
9752 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 if ((!terminate) &&
9754 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9755 goto done;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: Parsing Comment\n");
9759#endif
9760 xmlParseComment(ctxt);
9761 ctxt->instate = XML_PARSER_MISC;
9762 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009763 (ctxt->input->cur[2] == 'D') &&
9764 (ctxt->input->cur[3] == 'O') &&
9765 (ctxt->input->cur[4] == 'C') &&
9766 (ctxt->input->cur[5] == 'T') &&
9767 (ctxt->input->cur[6] == 'Y') &&
9768 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009769 (ctxt->input->cur[8] == 'E')) {
9770 if ((!terminate) &&
9771 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9772 goto done;
9773#ifdef DEBUG_PUSH
9774 xmlGenericError(xmlGenericErrorContext,
9775 "PP: Parsing internal subset\n");
9776#endif
9777 ctxt->inSubset = 1;
9778 xmlParseDocTypeDecl(ctxt);
9779 if (RAW == '[') {
9780 ctxt->instate = XML_PARSER_DTD;
9781#ifdef DEBUG_PUSH
9782 xmlGenericError(xmlGenericErrorContext,
9783 "PP: entering DTD\n");
9784#endif
9785 } else {
9786 /*
9787 * Create and update the external subset.
9788 */
9789 ctxt->inSubset = 2;
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9791 (ctxt->sax->externalSubset != NULL))
9792 ctxt->sax->externalSubset(ctxt->userData,
9793 ctxt->intSubName, ctxt->extSubSystem,
9794 ctxt->extSubURI);
9795 ctxt->inSubset = 0;
9796 ctxt->instate = XML_PARSER_PROLOG;
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: entering PROLOG\n");
9800#endif
9801 }
9802 } else if ((cur == '<') && (next == '!') &&
9803 (avail < 9)) {
9804 goto done;
9805 } else {
9806 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009807 ctxt->progressive = 1;
9808 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009809#ifdef DEBUG_PUSH
9810 xmlGenericError(xmlGenericErrorContext,
9811 "PP: entering START_TAG\n");
9812#endif
9813 }
9814 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009815 case XML_PARSER_PROLOG:
9816 SKIP_BLANKS;
9817 if (ctxt->input->buf == NULL)
9818 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9819 else
9820 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9821 if (avail < 2)
9822 goto done;
9823 cur = ctxt->input->cur[0];
9824 next = ctxt->input->cur[1];
9825 if ((cur == '<') && (next == '?')) {
9826 if ((!terminate) &&
9827 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9828 goto done;
9829#ifdef DEBUG_PUSH
9830 xmlGenericError(xmlGenericErrorContext,
9831 "PP: Parsing PI\n");
9832#endif
9833 xmlParsePI(ctxt);
9834 } else if ((cur == '<') && (next == '!') &&
9835 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9836 if ((!terminate) &&
9837 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9838 goto done;
9839#ifdef DEBUG_PUSH
9840 xmlGenericError(xmlGenericErrorContext,
9841 "PP: Parsing Comment\n");
9842#endif
9843 xmlParseComment(ctxt);
9844 ctxt->instate = XML_PARSER_PROLOG;
9845 } else if ((cur == '<') && (next == '!') &&
9846 (avail < 4)) {
9847 goto done;
9848 } else {
9849 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 ctxt->progressive = 1;
9851 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009852#ifdef DEBUG_PUSH
9853 xmlGenericError(xmlGenericErrorContext,
9854 "PP: entering START_TAG\n");
9855#endif
9856 }
9857 break;
9858 case XML_PARSER_EPILOG:
9859 SKIP_BLANKS;
9860 if (ctxt->input->buf == NULL)
9861 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9862 else
9863 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9864 if (avail < 2)
9865 goto done;
9866 cur = ctxt->input->cur[0];
9867 next = ctxt->input->cur[1];
9868 if ((cur == '<') && (next == '?')) {
9869 if ((!terminate) &&
9870 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9871 goto done;
9872#ifdef DEBUG_PUSH
9873 xmlGenericError(xmlGenericErrorContext,
9874 "PP: Parsing PI\n");
9875#endif
9876 xmlParsePI(ctxt);
9877 ctxt->instate = XML_PARSER_EPILOG;
9878 } else if ((cur == '<') && (next == '!') &&
9879 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9880 if ((!terminate) &&
9881 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9882 goto done;
9883#ifdef DEBUG_PUSH
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: Parsing Comment\n");
9886#endif
9887 xmlParseComment(ctxt);
9888 ctxt->instate = XML_PARSER_EPILOG;
9889 } else if ((cur == '<') && (next == '!') &&
9890 (avail < 4)) {
9891 goto done;
9892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009893 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009894 ctxt->instate = XML_PARSER_EOF;
9895#ifdef DEBUG_PUSH
9896 xmlGenericError(xmlGenericErrorContext,
9897 "PP: entering EOF\n");
9898#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009899 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009900 ctxt->sax->endDocument(ctxt->userData);
9901 goto done;
9902 }
9903 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009904 case XML_PARSER_DTD: {
9905 /*
9906 * Sorry but progressive parsing of the internal subset
9907 * is not expected to be supported. We first check that
9908 * the full content of the internal subset is available and
9909 * the parsing is launched only at that point.
9910 * Internal subset ends up with "']' S? '>'" in an unescaped
9911 * section and not in a ']]>' sequence which are conditional
9912 * sections (whoever argued to keep that crap in XML deserve
9913 * a place in hell !).
9914 */
9915 int base, i;
9916 xmlChar *buf;
9917 xmlChar quote = 0;
9918
9919 base = ctxt->input->cur - ctxt->input->base;
9920 if (base < 0) return(0);
9921 if (ctxt->checkIndex > base)
9922 base = ctxt->checkIndex;
9923 buf = ctxt->input->buf->buffer->content;
9924 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9925 base++) {
9926 if (quote != 0) {
9927 if (buf[base] == quote)
9928 quote = 0;
9929 continue;
9930 }
9931 if (buf[base] == '"') {
9932 quote = '"';
9933 continue;
9934 }
9935 if (buf[base] == '\'') {
9936 quote = '\'';
9937 continue;
9938 }
9939 if (buf[base] == ']') {
9940 if ((unsigned int) base +1 >=
9941 ctxt->input->buf->buffer->use)
9942 break;
9943 if (buf[base + 1] == ']') {
9944 /* conditional crap, skip both ']' ! */
9945 base++;
9946 continue;
9947 }
9948 for (i = 0;
9949 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9950 i++) {
9951 if (buf[base + i] == '>')
9952 goto found_end_int_subset;
9953 }
9954 break;
9955 }
9956 }
9957 /*
9958 * We didn't found the end of the Internal subset
9959 */
9960 if (quote == 0)
9961 ctxt->checkIndex = base;
9962#ifdef DEBUG_PUSH
9963 if (next == 0)
9964 xmlGenericError(xmlGenericErrorContext,
9965 "PP: lookup of int subset end filed\n");
9966#endif
9967 goto done;
9968
9969found_end_int_subset:
9970 xmlParseInternalSubset(ctxt);
9971 ctxt->inSubset = 2;
9972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9973 (ctxt->sax->externalSubset != NULL))
9974 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9975 ctxt->extSubSystem, ctxt->extSubURI);
9976 ctxt->inSubset = 0;
9977 ctxt->instate = XML_PARSER_PROLOG;
9978 ctxt->checkIndex = 0;
9979#ifdef DEBUG_PUSH
9980 xmlGenericError(xmlGenericErrorContext,
9981 "PP: entering PROLOG\n");
9982#endif
9983 break;
9984 }
9985 case XML_PARSER_COMMENT:
9986 xmlGenericError(xmlGenericErrorContext,
9987 "PP: internal error, state == COMMENT\n");
9988 ctxt->instate = XML_PARSER_CONTENT;
9989#ifdef DEBUG_PUSH
9990 xmlGenericError(xmlGenericErrorContext,
9991 "PP: entering CONTENT\n");
9992#endif
9993 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009994 case XML_PARSER_IGNORE:
9995 xmlGenericError(xmlGenericErrorContext,
9996 "PP: internal error, state == IGNORE");
9997 ctxt->instate = XML_PARSER_DTD;
9998#ifdef DEBUG_PUSH
9999 xmlGenericError(xmlGenericErrorContext,
10000 "PP: entering DTD\n");
10001#endif
10002 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010003 case XML_PARSER_PI:
10004 xmlGenericError(xmlGenericErrorContext,
10005 "PP: internal error, state == PI\n");
10006 ctxt->instate = XML_PARSER_CONTENT;
10007#ifdef DEBUG_PUSH
10008 xmlGenericError(xmlGenericErrorContext,
10009 "PP: entering CONTENT\n");
10010#endif
10011 break;
10012 case XML_PARSER_ENTITY_DECL:
10013 xmlGenericError(xmlGenericErrorContext,
10014 "PP: internal error, state == ENTITY_DECL\n");
10015 ctxt->instate = XML_PARSER_DTD;
10016#ifdef DEBUG_PUSH
10017 xmlGenericError(xmlGenericErrorContext,
10018 "PP: entering DTD\n");
10019#endif
10020 break;
10021 case XML_PARSER_ENTITY_VALUE:
10022 xmlGenericError(xmlGenericErrorContext,
10023 "PP: internal error, state == ENTITY_VALUE\n");
10024 ctxt->instate = XML_PARSER_CONTENT;
10025#ifdef DEBUG_PUSH
10026 xmlGenericError(xmlGenericErrorContext,
10027 "PP: entering DTD\n");
10028#endif
10029 break;
10030 case XML_PARSER_ATTRIBUTE_VALUE:
10031 xmlGenericError(xmlGenericErrorContext,
10032 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10033 ctxt->instate = XML_PARSER_START_TAG;
10034#ifdef DEBUG_PUSH
10035 xmlGenericError(xmlGenericErrorContext,
10036 "PP: entering START_TAG\n");
10037#endif
10038 break;
10039 case XML_PARSER_SYSTEM_LITERAL:
10040 xmlGenericError(xmlGenericErrorContext,
10041 "PP: internal error, state == SYSTEM_LITERAL\n");
10042 ctxt->instate = XML_PARSER_START_TAG;
10043#ifdef DEBUG_PUSH
10044 xmlGenericError(xmlGenericErrorContext,
10045 "PP: entering START_TAG\n");
10046#endif
10047 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010048 case XML_PARSER_PUBLIC_LITERAL:
10049 xmlGenericError(xmlGenericErrorContext,
10050 "PP: internal error, state == PUBLIC_LITERAL\n");
10051 ctxt->instate = XML_PARSER_START_TAG;
10052#ifdef DEBUG_PUSH
10053 xmlGenericError(xmlGenericErrorContext,
10054 "PP: entering START_TAG\n");
10055#endif
10056 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010057 }
10058 }
10059done:
10060#ifdef DEBUG_PUSH
10061 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10062#endif
10063 return(ret);
10064}
10065
10066/**
Owen Taylor3473f882001-02-23 17:55:21 +000010067 * xmlParseChunk:
10068 * @ctxt: an XML parser context
10069 * @chunk: an char array
10070 * @size: the size in byte of the chunk
10071 * @terminate: last chunk indicator
10072 *
10073 * Parse a Chunk of memory
10074 *
10075 * Returns zero if no error, the xmlParserErrors otherwise.
10076 */
10077int
10078xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10079 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010080 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10081 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010082 if (ctxt->instate == XML_PARSER_START)
10083 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010084 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10085 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10086 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10087 int cur = ctxt->input->cur - ctxt->input->base;
10088
10089 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10090 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10091 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010092 ctxt->input->end =
10093 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010094#ifdef DEBUG_PUSH
10095 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10096#endif
10097
Owen Taylor3473f882001-02-23 17:55:21 +000010098 } else if (ctxt->instate != XML_PARSER_EOF) {
10099 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10100 xmlParserInputBufferPtr in = ctxt->input->buf;
10101 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10102 (in->raw != NULL)) {
10103 int nbchars;
10104
10105 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10106 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010107 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010108 xmlGenericError(xmlGenericErrorContext,
10109 "xmlParseChunk: encoder error\n");
10110 return(XML_ERR_INVALID_ENCODING);
10111 }
10112 }
10113 }
10114 }
10115 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010116 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10117 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 if (terminate) {
10119 /*
10120 * Check for termination
10121 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010122 int avail = 0;
10123 if (ctxt->input->buf == NULL)
10124 avail = ctxt->input->length -
10125 (ctxt->input->cur - ctxt->input->base);
10126 else
10127 avail = ctxt->input->buf->buffer->use -
10128 (ctxt->input->cur - ctxt->input->base);
10129
Owen Taylor3473f882001-02-23 17:55:21 +000010130 if ((ctxt->instate != XML_PARSER_EOF) &&
10131 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010132 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010134 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010135 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010136 }
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010138 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010139 ctxt->sax->endDocument(ctxt->userData);
10140 }
10141 ctxt->instate = XML_PARSER_EOF;
10142 }
10143 return((xmlParserErrors) ctxt->errNo);
10144}
10145
10146/************************************************************************
10147 * *
10148 * I/O front end functions to the parser *
10149 * *
10150 ************************************************************************/
10151
10152/**
10153 * xmlStopParser:
10154 * @ctxt: an XML parser context
10155 *
10156 * Blocks further parser processing
10157 */
10158void
10159xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +000010160 if (ctxt == NULL)
10161 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010162 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +000010163 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010164 if (ctxt->input != NULL)
10165 ctxt->input->cur = BAD_CAST"";
10166}
10167
10168/**
10169 * xmlCreatePushParserCtxt:
10170 * @sax: a SAX handler
10171 * @user_data: The user data returned on SAX callbacks
10172 * @chunk: a pointer to an array of chars
10173 * @size: number of chars in the array
10174 * @filename: an optional file name or URI
10175 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010176 * Create a parser context for using the XML parser in push mode.
10177 * If @buffer and @size are non-NULL, the data is used to detect
10178 * the encoding. The remaining characters will be parsed so they
10179 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010180 * To allow content encoding detection, @size should be >= 4
10181 * The value of @filename is used for fetching external entities
10182 * and error/warning reports.
10183 *
10184 * Returns the new parser context or NULL
10185 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010186
Owen Taylor3473f882001-02-23 17:55:21 +000010187xmlParserCtxtPtr
10188xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10189 const char *chunk, int size, const char *filename) {
10190 xmlParserCtxtPtr ctxt;
10191 xmlParserInputPtr inputStream;
10192 xmlParserInputBufferPtr buf;
10193 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10194
10195 /*
10196 * plug some encoding conversion routines
10197 */
10198 if ((chunk != NULL) && (size >= 4))
10199 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10200
10201 buf = xmlAllocParserInputBuffer(enc);
10202 if (buf == NULL) return(NULL);
10203
10204 ctxt = xmlNewParserCtxt();
10205 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010206 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010207 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 return(NULL);
10209 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010210 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10211 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010212 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010213 xmlFreeParserInputBuffer(buf);
10214 xmlFreeParserCtxt(ctxt);
10215 return(NULL);
10216 }
Owen Taylor3473f882001-02-23 17:55:21 +000010217 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010218#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010219 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010220#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010221 xmlFree(ctxt->sax);
10222 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10223 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010224 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010225 xmlFreeParserInputBuffer(buf);
10226 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return(NULL);
10228 }
10229 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10230 if (user_data != NULL)
10231 ctxt->userData = user_data;
10232 }
10233 if (filename == NULL) {
10234 ctxt->directory = NULL;
10235 } else {
10236 ctxt->directory = xmlParserGetDirectory(filename);
10237 }
10238
10239 inputStream = xmlNewInputStream(ctxt);
10240 if (inputStream == NULL) {
10241 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010242 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010243 return(NULL);
10244 }
10245
10246 if (filename == NULL)
10247 inputStream->filename = NULL;
10248 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010249 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010250 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010251 inputStream->buf = buf;
10252 inputStream->base = inputStream->buf->buffer->content;
10253 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010254 inputStream->end =
10255 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010256
10257 inputPush(ctxt, inputStream);
10258
10259 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10260 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010261 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10262 int cur = ctxt->input->cur - ctxt->input->base;
10263
Owen Taylor3473f882001-02-23 17:55:21 +000010264 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010265
10266 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10267 ctxt->input->cur = ctxt->input->base + cur;
10268 ctxt->input->end =
10269 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010270#ifdef DEBUG_PUSH
10271 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10272#endif
10273 }
10274
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010275 if (enc != XML_CHAR_ENCODING_NONE) {
10276 xmlSwitchEncoding(ctxt, enc);
10277 }
10278
Owen Taylor3473f882001-02-23 17:55:21 +000010279 return(ctxt);
10280}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010281#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283/**
10284 * xmlCreateIOParserCtxt:
10285 * @sax: a SAX handler
10286 * @user_data: The user data returned on SAX callbacks
10287 * @ioread: an I/O read function
10288 * @ioclose: an I/O close function
10289 * @ioctx: an I/O handler
10290 * @enc: the charset encoding if known
10291 *
10292 * Create a parser context for using the XML parser with an existing
10293 * I/O stream
10294 *
10295 * Returns the new parser context or NULL
10296 */
10297xmlParserCtxtPtr
10298xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10299 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10300 void *ioctx, xmlCharEncoding enc) {
10301 xmlParserCtxtPtr ctxt;
10302 xmlParserInputPtr inputStream;
10303 xmlParserInputBufferPtr buf;
10304
10305 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10306 if (buf == NULL) return(NULL);
10307
10308 ctxt = xmlNewParserCtxt();
10309 if (ctxt == NULL) {
10310 xmlFree(buf);
10311 return(NULL);
10312 }
10313 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010314#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010315 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010316#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010317 xmlFree(ctxt->sax);
10318 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10319 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010320 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010321 xmlFree(ctxt);
10322 return(NULL);
10323 }
10324 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10325 if (user_data != NULL)
10326 ctxt->userData = user_data;
10327 }
10328
10329 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10330 if (inputStream == NULL) {
10331 xmlFreeParserCtxt(ctxt);
10332 return(NULL);
10333 }
10334 inputPush(ctxt, inputStream);
10335
10336 return(ctxt);
10337}
10338
Daniel Veillard4432df22003-09-28 18:58:27 +000010339#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010340/************************************************************************
10341 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010342 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010343 * *
10344 ************************************************************************/
10345
10346/**
10347 * xmlIOParseDTD:
10348 * @sax: the SAX handler block or NULL
10349 * @input: an Input Buffer
10350 * @enc: the charset encoding if known
10351 *
10352 * Load and parse a DTD
10353 *
10354 * Returns the resulting xmlDtdPtr or NULL in case of error.
10355 * @input will be freed at parsing end.
10356 */
10357
10358xmlDtdPtr
10359xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10360 xmlCharEncoding enc) {
10361 xmlDtdPtr ret = NULL;
10362 xmlParserCtxtPtr ctxt;
10363 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010364 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010365
10366 if (input == NULL)
10367 return(NULL);
10368
10369 ctxt = xmlNewParserCtxt();
10370 if (ctxt == NULL) {
10371 return(NULL);
10372 }
10373
10374 /*
10375 * Set-up the SAX context
10376 */
10377 if (sax != NULL) {
10378 if (ctxt->sax != NULL)
10379 xmlFree(ctxt->sax);
10380 ctxt->sax = sax;
10381 ctxt->userData = NULL;
10382 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010383 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010384
10385 /*
10386 * generate a parser input from the I/O handler
10387 */
10388
Daniel Veillard43caefb2003-12-07 19:32:22 +000010389 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010390 if (pinput == NULL) {
10391 if (sax != NULL) ctxt->sax = NULL;
10392 xmlFreeParserCtxt(ctxt);
10393 return(NULL);
10394 }
10395
10396 /*
10397 * plug some encoding conversion routines here.
10398 */
10399 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010400 if (enc != XML_CHAR_ENCODING_NONE) {
10401 xmlSwitchEncoding(ctxt, enc);
10402 }
Owen Taylor3473f882001-02-23 17:55:21 +000010403
10404 pinput->filename = NULL;
10405 pinput->line = 1;
10406 pinput->col = 1;
10407 pinput->base = ctxt->input->cur;
10408 pinput->cur = ctxt->input->cur;
10409 pinput->free = NULL;
10410
10411 /*
10412 * let's parse that entity knowing it's an external subset.
10413 */
10414 ctxt->inSubset = 2;
10415 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10416 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10417 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010418
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010419 if ((enc == XML_CHAR_ENCODING_NONE) &&
10420 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010421 /*
10422 * Get the 4 first bytes and decode the charset
10423 * if enc != XML_CHAR_ENCODING_NONE
10424 * plug some encoding conversion routines.
10425 */
10426 start[0] = RAW;
10427 start[1] = NXT(1);
10428 start[2] = NXT(2);
10429 start[3] = NXT(3);
10430 enc = xmlDetectCharEncoding(start, 4);
10431 if (enc != XML_CHAR_ENCODING_NONE) {
10432 xmlSwitchEncoding(ctxt, enc);
10433 }
10434 }
10435
Owen Taylor3473f882001-02-23 17:55:21 +000010436 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10437
10438 if (ctxt->myDoc != NULL) {
10439 if (ctxt->wellFormed) {
10440 ret = ctxt->myDoc->extSubset;
10441 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010442 if (ret != NULL) {
10443 xmlNodePtr tmp;
10444
10445 ret->doc = NULL;
10446 tmp = ret->children;
10447 while (tmp != NULL) {
10448 tmp->doc = NULL;
10449 tmp = tmp->next;
10450 }
10451 }
Owen Taylor3473f882001-02-23 17:55:21 +000010452 } else {
10453 ret = NULL;
10454 }
10455 xmlFreeDoc(ctxt->myDoc);
10456 ctxt->myDoc = NULL;
10457 }
10458 if (sax != NULL) ctxt->sax = NULL;
10459 xmlFreeParserCtxt(ctxt);
10460
10461 return(ret);
10462}
10463
10464/**
10465 * xmlSAXParseDTD:
10466 * @sax: the SAX handler block
10467 * @ExternalID: a NAME* containing the External ID of the DTD
10468 * @SystemID: a NAME* containing the URL to the DTD
10469 *
10470 * Load and parse an external subset.
10471 *
10472 * Returns the resulting xmlDtdPtr or NULL in case of error.
10473 */
10474
10475xmlDtdPtr
10476xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10477 const xmlChar *SystemID) {
10478 xmlDtdPtr ret = NULL;
10479 xmlParserCtxtPtr ctxt;
10480 xmlParserInputPtr input = NULL;
10481 xmlCharEncoding enc;
10482
10483 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10484
10485 ctxt = xmlNewParserCtxt();
10486 if (ctxt == NULL) {
10487 return(NULL);
10488 }
10489
10490 /*
10491 * Set-up the SAX context
10492 */
10493 if (sax != NULL) {
10494 if (ctxt->sax != NULL)
10495 xmlFree(ctxt->sax);
10496 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010497 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010498 }
10499
10500 /*
10501 * Ask the Entity resolver to load the damn thing
10502 */
10503
10504 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010505 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 if (input == NULL) {
10507 if (sax != NULL) ctxt->sax = NULL;
10508 xmlFreeParserCtxt(ctxt);
10509 return(NULL);
10510 }
10511
10512 /*
10513 * plug some encoding conversion routines here.
10514 */
10515 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010516 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10517 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10518 xmlSwitchEncoding(ctxt, enc);
10519 }
Owen Taylor3473f882001-02-23 17:55:21 +000010520
10521 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010522 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010523 input->line = 1;
10524 input->col = 1;
10525 input->base = ctxt->input->cur;
10526 input->cur = ctxt->input->cur;
10527 input->free = NULL;
10528
10529 /*
10530 * let's parse that entity knowing it's an external subset.
10531 */
10532 ctxt->inSubset = 2;
10533 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10534 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10535 ExternalID, SystemID);
10536 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10537
10538 if (ctxt->myDoc != NULL) {
10539 if (ctxt->wellFormed) {
10540 ret = ctxt->myDoc->extSubset;
10541 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010542 if (ret != NULL) {
10543 xmlNodePtr tmp;
10544
10545 ret->doc = NULL;
10546 tmp = ret->children;
10547 while (tmp != NULL) {
10548 tmp->doc = NULL;
10549 tmp = tmp->next;
10550 }
10551 }
Owen Taylor3473f882001-02-23 17:55:21 +000010552 } else {
10553 ret = NULL;
10554 }
10555 xmlFreeDoc(ctxt->myDoc);
10556 ctxt->myDoc = NULL;
10557 }
10558 if (sax != NULL) ctxt->sax = NULL;
10559 xmlFreeParserCtxt(ctxt);
10560
10561 return(ret);
10562}
10563
Daniel Veillard4432df22003-09-28 18:58:27 +000010564
Owen Taylor3473f882001-02-23 17:55:21 +000010565/**
10566 * xmlParseDTD:
10567 * @ExternalID: a NAME* containing the External ID of the DTD
10568 * @SystemID: a NAME* containing the URL to the DTD
10569 *
10570 * Load and parse an external subset.
10571 *
10572 * Returns the resulting xmlDtdPtr or NULL in case of error.
10573 */
10574
10575xmlDtdPtr
10576xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10577 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10578}
Daniel Veillard4432df22003-09-28 18:58:27 +000010579#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010580
10581/************************************************************************
10582 * *
10583 * Front ends when parsing an Entity *
10584 * *
10585 ************************************************************************/
10586
10587/**
Owen Taylor3473f882001-02-23 17:55:21 +000010588 * xmlParseCtxtExternalEntity:
10589 * @ctx: the existing parsing context
10590 * @URL: the URL for the entity to load
10591 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010592 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010593 *
10594 * Parse an external general entity within an existing parsing context
10595 * An external general parsed entity is well-formed if it matches the
10596 * production labeled extParsedEnt.
10597 *
10598 * [78] extParsedEnt ::= TextDecl? content
10599 *
10600 * Returns 0 if the entity is well formed, -1 in case of args problem and
10601 * the parser error code otherwise
10602 */
10603
10604int
10605xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010606 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010607 xmlParserCtxtPtr ctxt;
10608 xmlDocPtr newDoc;
10609 xmlSAXHandlerPtr oldsax = NULL;
10610 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010611 xmlChar start[4];
10612 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010613
10614 if (ctx->depth > 40) {
10615 return(XML_ERR_ENTITY_LOOP);
10616 }
10617
Daniel Veillardcda96922001-08-21 10:56:31 +000010618 if (lst != NULL)
10619 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010620 if ((URL == NULL) && (ID == NULL))
10621 return(-1);
10622 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10623 return(-1);
10624
10625
10626 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10627 if (ctxt == NULL) return(-1);
10628 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010629 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010630 oldsax = ctxt->sax;
10631 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010632 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010633 newDoc = xmlNewDoc(BAD_CAST "1.0");
10634 if (newDoc == NULL) {
10635 xmlFreeParserCtxt(ctxt);
10636 return(-1);
10637 }
10638 if (ctx->myDoc != NULL) {
10639 newDoc->intSubset = ctx->myDoc->intSubset;
10640 newDoc->extSubset = ctx->myDoc->extSubset;
10641 }
10642 if (ctx->myDoc->URL != NULL) {
10643 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10644 }
10645 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10646 if (newDoc->children == NULL) {
10647 ctxt->sax = oldsax;
10648 xmlFreeParserCtxt(ctxt);
10649 newDoc->intSubset = NULL;
10650 newDoc->extSubset = NULL;
10651 xmlFreeDoc(newDoc);
10652 return(-1);
10653 }
10654 nodePush(ctxt, newDoc->children);
10655 if (ctx->myDoc == NULL) {
10656 ctxt->myDoc = newDoc;
10657 } else {
10658 ctxt->myDoc = ctx->myDoc;
10659 newDoc->children->doc = ctx->myDoc;
10660 }
10661
Daniel Veillard87a764e2001-06-20 17:41:10 +000010662 /*
10663 * Get the 4 first bytes and decode the charset
10664 * if enc != XML_CHAR_ENCODING_NONE
10665 * plug some encoding conversion routines.
10666 */
10667 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010668 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10669 start[0] = RAW;
10670 start[1] = NXT(1);
10671 start[2] = NXT(2);
10672 start[3] = NXT(3);
10673 enc = xmlDetectCharEncoding(start, 4);
10674 if (enc != XML_CHAR_ENCODING_NONE) {
10675 xmlSwitchEncoding(ctxt, enc);
10676 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010677 }
10678
Owen Taylor3473f882001-02-23 17:55:21 +000010679 /*
10680 * Parse a possible text declaration first
10681 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010682 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010683 xmlParseTextDecl(ctxt);
10684 }
10685
10686 /*
10687 * Doing validity checking on chunk doesn't make sense
10688 */
10689 ctxt->instate = XML_PARSER_CONTENT;
10690 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010691 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010692 ctxt->loadsubset = ctx->loadsubset;
10693 ctxt->depth = ctx->depth + 1;
10694 ctxt->replaceEntities = ctx->replaceEntities;
10695 if (ctxt->validate) {
10696 ctxt->vctxt.error = ctx->vctxt.error;
10697 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010698 } else {
10699 ctxt->vctxt.error = NULL;
10700 ctxt->vctxt.warning = NULL;
10701 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010702 ctxt->vctxt.nodeTab = NULL;
10703 ctxt->vctxt.nodeNr = 0;
10704 ctxt->vctxt.nodeMax = 0;
10705 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010706 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10707 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010708 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10709 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10710 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010711 ctxt->dictNames = ctx->dictNames;
10712 ctxt->attsDefault = ctx->attsDefault;
10713 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010714
10715 xmlParseContent(ctxt);
10716
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010717 ctx->validate = ctxt->validate;
10718 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010719 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010720 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010721 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010722 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010723 }
10724 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010725 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010726 }
10727
10728 if (!ctxt->wellFormed) {
10729 if (ctxt->errNo == 0)
10730 ret = 1;
10731 else
10732 ret = ctxt->errNo;
10733 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010734 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010735 xmlNodePtr cur;
10736
10737 /*
10738 * Return the newly created nodeset after unlinking it from
10739 * they pseudo parent.
10740 */
10741 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010742 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010743 while (cur != NULL) {
10744 cur->parent = NULL;
10745 cur = cur->next;
10746 }
10747 newDoc->children->children = NULL;
10748 }
10749 ret = 0;
10750 }
10751 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010752 ctxt->dict = NULL;
10753 ctxt->attsDefault = NULL;
10754 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010755 xmlFreeParserCtxt(ctxt);
10756 newDoc->intSubset = NULL;
10757 newDoc->extSubset = NULL;
10758 xmlFreeDoc(newDoc);
10759
10760 return(ret);
10761}
10762
10763/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010764 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010765 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010766 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010767 * @sax: the SAX handler bloc (possibly NULL)
10768 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10769 * @depth: Used for loop detection, use 0
10770 * @URL: the URL for the entity to load
10771 * @ID: the System ID for the entity to load
10772 * @list: the return value for the set of parsed nodes
10773 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010774 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010775 *
10776 * Returns 0 if the entity is well formed, -1 in case of args problem and
10777 * the parser error code otherwise
10778 */
10779
Daniel Veillard7d515752003-09-26 19:12:37 +000010780static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010781xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10782 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010783 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010784 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010785 xmlParserCtxtPtr ctxt;
10786 xmlDocPtr newDoc;
10787 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010788 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010789 xmlChar start[4];
10790 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010791
10792 if (depth > 40) {
10793 return(XML_ERR_ENTITY_LOOP);
10794 }
10795
10796
10797
10798 if (list != NULL)
10799 *list = NULL;
10800 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010801 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010802 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010803 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010804
10805
10806 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010807 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010808 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010809 if (oldctxt != NULL) {
10810 ctxt->_private = oldctxt->_private;
10811 ctxt->loadsubset = oldctxt->loadsubset;
10812 ctxt->validate = oldctxt->validate;
10813 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010814 ctxt->record_info = oldctxt->record_info;
10815 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10816 ctxt->node_seq.length = oldctxt->node_seq.length;
10817 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010818 } else {
10819 /*
10820 * Doing validity checking on chunk without context
10821 * doesn't make sense
10822 */
10823 ctxt->_private = NULL;
10824 ctxt->validate = 0;
10825 ctxt->external = 2;
10826 ctxt->loadsubset = 0;
10827 }
Owen Taylor3473f882001-02-23 17:55:21 +000010828 if (sax != NULL) {
10829 oldsax = ctxt->sax;
10830 ctxt->sax = sax;
10831 if (user_data != NULL)
10832 ctxt->userData = user_data;
10833 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010834 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010835 newDoc = xmlNewDoc(BAD_CAST "1.0");
10836 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010837 ctxt->node_seq.maximum = 0;
10838 ctxt->node_seq.length = 0;
10839 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010840 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010841 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010842 }
10843 if (doc != NULL) {
10844 newDoc->intSubset = doc->intSubset;
10845 newDoc->extSubset = doc->extSubset;
10846 }
10847 if (doc->URL != NULL) {
10848 newDoc->URL = xmlStrdup(doc->URL);
10849 }
10850 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10851 if (newDoc->children == NULL) {
10852 if (sax != NULL)
10853 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010854 ctxt->node_seq.maximum = 0;
10855 ctxt->node_seq.length = 0;
10856 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010857 xmlFreeParserCtxt(ctxt);
10858 newDoc->intSubset = NULL;
10859 newDoc->extSubset = NULL;
10860 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010861 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010862 }
10863 nodePush(ctxt, newDoc->children);
10864 if (doc == NULL) {
10865 ctxt->myDoc = newDoc;
10866 } else {
10867 ctxt->myDoc = doc;
10868 newDoc->children->doc = doc;
10869 }
10870
Daniel Veillard87a764e2001-06-20 17:41:10 +000010871 /*
10872 * Get the 4 first bytes and decode the charset
10873 * if enc != XML_CHAR_ENCODING_NONE
10874 * plug some encoding conversion routines.
10875 */
10876 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010877 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10878 start[0] = RAW;
10879 start[1] = NXT(1);
10880 start[2] = NXT(2);
10881 start[3] = NXT(3);
10882 enc = xmlDetectCharEncoding(start, 4);
10883 if (enc != XML_CHAR_ENCODING_NONE) {
10884 xmlSwitchEncoding(ctxt, enc);
10885 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010886 }
10887
Owen Taylor3473f882001-02-23 17:55:21 +000010888 /*
10889 * Parse a possible text declaration first
10890 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010891 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010892 xmlParseTextDecl(ctxt);
10893 }
10894
Owen Taylor3473f882001-02-23 17:55:21 +000010895 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010896 ctxt->depth = depth;
10897
10898 xmlParseContent(ctxt);
10899
Daniel Veillard561b7f82002-03-20 21:55:57 +000010900 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010901 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010902 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010903 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010904 }
10905 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010906 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010907 }
10908
10909 if (!ctxt->wellFormed) {
10910 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010911 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010912 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010913 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010914 } else {
10915 if (list != NULL) {
10916 xmlNodePtr cur;
10917
10918 /*
10919 * Return the newly created nodeset after unlinking it from
10920 * they pseudo parent.
10921 */
10922 cur = newDoc->children->children;
10923 *list = cur;
10924 while (cur != NULL) {
10925 cur->parent = NULL;
10926 cur = cur->next;
10927 }
10928 newDoc->children->children = NULL;
10929 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010930 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010931 }
10932 if (sax != NULL)
10933 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010934 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10935 oldctxt->node_seq.length = ctxt->node_seq.length;
10936 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010937 ctxt->node_seq.maximum = 0;
10938 ctxt->node_seq.length = 0;
10939 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010940 xmlFreeParserCtxt(ctxt);
10941 newDoc->intSubset = NULL;
10942 newDoc->extSubset = NULL;
10943 xmlFreeDoc(newDoc);
10944
10945 return(ret);
10946}
10947
Daniel Veillard81273902003-09-30 00:43:48 +000010948#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010949/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010950 * xmlParseExternalEntity:
10951 * @doc: the document the chunk pertains to
10952 * @sax: the SAX handler bloc (possibly NULL)
10953 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10954 * @depth: Used for loop detection, use 0
10955 * @URL: the URL for the entity to load
10956 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010957 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010958 *
10959 * Parse an external general entity
10960 * An external general parsed entity is well-formed if it matches the
10961 * production labeled extParsedEnt.
10962 *
10963 * [78] extParsedEnt ::= TextDecl? content
10964 *
10965 * Returns 0 if the entity is well formed, -1 in case of args problem and
10966 * the parser error code otherwise
10967 */
10968
10969int
10970xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010971 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010972 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010973 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010974}
10975
10976/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010977 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010978 * @doc: the document the chunk pertains to
10979 * @sax: the SAX handler bloc (possibly NULL)
10980 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10981 * @depth: Used for loop detection, use 0
10982 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010983 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010984 *
10985 * Parse a well-balanced chunk of an XML document
10986 * called by the parser
10987 * The allowed sequence for the Well Balanced Chunk is the one defined by
10988 * the content production in the XML grammar:
10989 *
10990 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10991 *
10992 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10993 * the parser error code otherwise
10994 */
10995
10996int
10997xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010998 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010999 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11000 depth, string, lst, 0 );
11001}
Daniel Veillard81273902003-09-30 00:43:48 +000011002#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011003
11004/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011005 * xmlParseBalancedChunkMemoryInternal:
11006 * @oldctxt: the existing parsing context
11007 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11008 * @user_data: the user data field for the parser context
11009 * @lst: the return value for the set of parsed nodes
11010 *
11011 *
11012 * Parse a well-balanced chunk of an XML document
11013 * called by the parser
11014 * The allowed sequence for the Well Balanced Chunk is the one defined by
11015 * the content production in the XML grammar:
11016 *
11017 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11018 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011019 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11020 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011021 *
11022 * In case recover is set to 1, the nodelist will not be empty even if
11023 * the parsed chunk is not well balanced.
11024 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011025static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011026xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11027 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11028 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011029 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011030 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011031 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011032 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011033 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011034
11035 if (oldctxt->depth > 40) {
11036 return(XML_ERR_ENTITY_LOOP);
11037 }
11038
11039
11040 if (lst != NULL)
11041 *lst = NULL;
11042 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011043 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011044
11045 size = xmlStrlen(string);
11046
11047 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011048 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011049 if (user_data != NULL)
11050 ctxt->userData = user_data;
11051 else
11052 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011053 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11054 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011055 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11056 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11057 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011058
11059 oldsax = ctxt->sax;
11060 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011061 xmlDetectSAX2(ctxt);
11062
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011063 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011064 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011065 newDoc = xmlNewDoc(BAD_CAST "1.0");
11066 if (newDoc == NULL) {
11067 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011068 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011069 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011070 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011071 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011072 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011073 } else {
11074 ctxt->myDoc = oldctxt->myDoc;
11075 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011076 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011077 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011078 BAD_CAST "pseudoroot", NULL);
11079 if (ctxt->myDoc->children == NULL) {
11080 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011081 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011082 xmlFreeParserCtxt(ctxt);
11083 if (newDoc != NULL)
11084 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011085 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011086 }
11087 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011088 ctxt->instate = XML_PARSER_CONTENT;
11089 ctxt->depth = oldctxt->depth + 1;
11090
Daniel Veillard328f48c2002-11-15 15:24:34 +000011091 ctxt->validate = 0;
11092 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011093 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11094 /*
11095 * ID/IDREF registration will be done in xmlValidateElement below
11096 */
11097 ctxt->loadsubset |= XML_SKIP_IDS;
11098 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011099 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011100 ctxt->attsDefault = oldctxt->attsDefault;
11101 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011102
Daniel Veillard68e9e742002-11-16 15:35:11 +000011103 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011104 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011105 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011106 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011107 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011108 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011109 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011110 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011111 }
11112
11113 if (!ctxt->wellFormed) {
11114 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011115 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011116 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011117 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011118 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011119 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011120 }
11121
William M. Brack7b9154b2003-09-27 19:23:50 +000011122 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011123 xmlNodePtr cur;
11124
11125 /*
11126 * Return the newly created nodeset after unlinking it from
11127 * they pseudo parent.
11128 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011129 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011130 *lst = cur;
11131 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011132#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011133 if (oldctxt->validate && oldctxt->wellFormed &&
11134 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11135 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11136 oldctxt->myDoc, cur);
11137 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011138#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011139 cur->parent = NULL;
11140 cur = cur->next;
11141 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011142 ctxt->myDoc->children->children = NULL;
11143 }
11144 if (ctxt->myDoc != NULL) {
11145 xmlFreeNode(ctxt->myDoc->children);
11146 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011147 }
11148
11149 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011150 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011151 ctxt->attsDefault = NULL;
11152 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011153 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011154 if (newDoc != NULL)
11155 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011156
11157 return(ret);
11158}
11159
Daniel Veillard81273902003-09-30 00:43:48 +000011160#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011161/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011162 * xmlParseBalancedChunkMemoryRecover:
11163 * @doc: the document the chunk pertains to
11164 * @sax: the SAX handler bloc (possibly NULL)
11165 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11166 * @depth: Used for loop detection, use 0
11167 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11168 * @lst: the return value for the set of parsed nodes
11169 * @recover: return nodes even if the data is broken (use 0)
11170 *
11171 *
11172 * Parse a well-balanced chunk of an XML document
11173 * called by the parser
11174 * The allowed sequence for the Well Balanced Chunk is the one defined by
11175 * the content production in the XML grammar:
11176 *
11177 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11178 *
11179 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11180 * the parser error code otherwise
11181 *
11182 * In case recover is set to 1, the nodelist will not be empty even if
11183 * the parsed chunk is not well balanced.
11184 */
11185int
11186xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11187 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11188 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011189 xmlParserCtxtPtr ctxt;
11190 xmlDocPtr newDoc;
11191 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011192 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011193 int size;
11194 int ret = 0;
11195
11196 if (depth > 40) {
11197 return(XML_ERR_ENTITY_LOOP);
11198 }
11199
11200
Daniel Veillardcda96922001-08-21 10:56:31 +000011201 if (lst != NULL)
11202 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011203 if (string == NULL)
11204 return(-1);
11205
11206 size = xmlStrlen(string);
11207
11208 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11209 if (ctxt == NULL) return(-1);
11210 ctxt->userData = ctxt;
11211 if (sax != NULL) {
11212 oldsax = ctxt->sax;
11213 ctxt->sax = sax;
11214 if (user_data != NULL)
11215 ctxt->userData = user_data;
11216 }
11217 newDoc = xmlNewDoc(BAD_CAST "1.0");
11218 if (newDoc == NULL) {
11219 xmlFreeParserCtxt(ctxt);
11220 return(-1);
11221 }
11222 if (doc != NULL) {
11223 newDoc->intSubset = doc->intSubset;
11224 newDoc->extSubset = doc->extSubset;
11225 }
11226 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11227 if (newDoc->children == NULL) {
11228 if (sax != NULL)
11229 ctxt->sax = oldsax;
11230 xmlFreeParserCtxt(ctxt);
11231 newDoc->intSubset = NULL;
11232 newDoc->extSubset = NULL;
11233 xmlFreeDoc(newDoc);
11234 return(-1);
11235 }
11236 nodePush(ctxt, newDoc->children);
11237 if (doc == NULL) {
11238 ctxt->myDoc = newDoc;
11239 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011240 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011241 newDoc->children->doc = doc;
11242 }
11243 ctxt->instate = XML_PARSER_CONTENT;
11244 ctxt->depth = depth;
11245
11246 /*
11247 * Doing validity checking on chunk doesn't make sense
11248 */
11249 ctxt->validate = 0;
11250 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011251 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011252
Daniel Veillardb39bc392002-10-26 19:29:51 +000011253 if ( doc != NULL ){
11254 content = doc->children;
11255 doc->children = NULL;
11256 xmlParseContent(ctxt);
11257 doc->children = content;
11258 }
11259 else {
11260 xmlParseContent(ctxt);
11261 }
Owen Taylor3473f882001-02-23 17:55:21 +000011262 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011263 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011265 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011266 }
11267 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011268 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011269 }
11270
11271 if (!ctxt->wellFormed) {
11272 if (ctxt->errNo == 0)
11273 ret = 1;
11274 else
11275 ret = ctxt->errNo;
11276 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011277 ret = 0;
11278 }
11279
11280 if (lst != NULL && (ret == 0 || recover == 1)) {
11281 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011282
11283 /*
11284 * Return the newly created nodeset after unlinking it from
11285 * they pseudo parent.
11286 */
11287 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011288 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011289 while (cur != NULL) {
11290 cur->parent = NULL;
11291 cur = cur->next;
11292 }
11293 newDoc->children->children = NULL;
11294 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011295
Owen Taylor3473f882001-02-23 17:55:21 +000011296 if (sax != NULL)
11297 ctxt->sax = oldsax;
11298 xmlFreeParserCtxt(ctxt);
11299 newDoc->intSubset = NULL;
11300 newDoc->extSubset = NULL;
11301 xmlFreeDoc(newDoc);
11302
11303 return(ret);
11304}
11305
11306/**
11307 * xmlSAXParseEntity:
11308 * @sax: the SAX handler block
11309 * @filename: the filename
11310 *
11311 * parse an XML external entity out of context and build a tree.
11312 * It use the given SAX function block to handle the parsing callback.
11313 * If sax is NULL, fallback to the default DOM tree building routines.
11314 *
11315 * [78] extParsedEnt ::= TextDecl? content
11316 *
11317 * This correspond to a "Well Balanced" chunk
11318 *
11319 * Returns the resulting document tree
11320 */
11321
11322xmlDocPtr
11323xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11324 xmlDocPtr ret;
11325 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011326
11327 ctxt = xmlCreateFileParserCtxt(filename);
11328 if (ctxt == NULL) {
11329 return(NULL);
11330 }
11331 if (sax != NULL) {
11332 if (ctxt->sax != NULL)
11333 xmlFree(ctxt->sax);
11334 ctxt->sax = sax;
11335 ctxt->userData = NULL;
11336 }
11337
Owen Taylor3473f882001-02-23 17:55:21 +000011338 xmlParseExtParsedEnt(ctxt);
11339
11340 if (ctxt->wellFormed)
11341 ret = ctxt->myDoc;
11342 else {
11343 ret = NULL;
11344 xmlFreeDoc(ctxt->myDoc);
11345 ctxt->myDoc = NULL;
11346 }
11347 if (sax != NULL)
11348 ctxt->sax = NULL;
11349 xmlFreeParserCtxt(ctxt);
11350
11351 return(ret);
11352}
11353
11354/**
11355 * xmlParseEntity:
11356 * @filename: the filename
11357 *
11358 * parse an XML external entity out of context and build a tree.
11359 *
11360 * [78] extParsedEnt ::= TextDecl? content
11361 *
11362 * This correspond to a "Well Balanced" chunk
11363 *
11364 * Returns the resulting document tree
11365 */
11366
11367xmlDocPtr
11368xmlParseEntity(const char *filename) {
11369 return(xmlSAXParseEntity(NULL, filename));
11370}
Daniel Veillard81273902003-09-30 00:43:48 +000011371#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011372
11373/**
11374 * xmlCreateEntityParserCtxt:
11375 * @URL: the entity URL
11376 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011377 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011378 *
11379 * Create a parser context for an external entity
11380 * Automatic support for ZLIB/Compress compressed document is provided
11381 * by default if found at compile-time.
11382 *
11383 * Returns the new parser context or NULL
11384 */
11385xmlParserCtxtPtr
11386xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11387 const xmlChar *base) {
11388 xmlParserCtxtPtr ctxt;
11389 xmlParserInputPtr inputStream;
11390 char *directory = NULL;
11391 xmlChar *uri;
11392
11393 ctxt = xmlNewParserCtxt();
11394 if (ctxt == NULL) {
11395 return(NULL);
11396 }
11397
11398 uri = xmlBuildURI(URL, base);
11399
11400 if (uri == NULL) {
11401 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11402 if (inputStream == NULL) {
11403 xmlFreeParserCtxt(ctxt);
11404 return(NULL);
11405 }
11406
11407 inputPush(ctxt, inputStream);
11408
11409 if ((ctxt->directory == NULL) && (directory == NULL))
11410 directory = xmlParserGetDirectory((char *)URL);
11411 if ((ctxt->directory == NULL) && (directory != NULL))
11412 ctxt->directory = directory;
11413 } else {
11414 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11415 if (inputStream == NULL) {
11416 xmlFree(uri);
11417 xmlFreeParserCtxt(ctxt);
11418 return(NULL);
11419 }
11420
11421 inputPush(ctxt, inputStream);
11422
11423 if ((ctxt->directory == NULL) && (directory == NULL))
11424 directory = xmlParserGetDirectory((char *)uri);
11425 if ((ctxt->directory == NULL) && (directory != NULL))
11426 ctxt->directory = directory;
11427 xmlFree(uri);
11428 }
Owen Taylor3473f882001-02-23 17:55:21 +000011429 return(ctxt);
11430}
11431
11432/************************************************************************
11433 * *
11434 * Front ends when parsing from a file *
11435 * *
11436 ************************************************************************/
11437
11438/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011439 * xmlCreateURLParserCtxt:
11440 * @filename: the filename or URL
11441 * @options: a combination of xmlParserOption(s)
Owen Taylor3473f882001-02-23 17:55:21 +000011442 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011443 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011444 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011445 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011446 *
11447 * Returns the new parser context or NULL
11448 */
11449xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011450xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011451{
11452 xmlParserCtxtPtr ctxt;
11453 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011454 char *directory = NULL;
11455
Owen Taylor3473f882001-02-23 17:55:21 +000011456 ctxt = xmlNewParserCtxt();
11457 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011458 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011459 return(NULL);
11460 }
11461
Daniel Veillard61b93382003-11-03 14:28:31 +000011462 if (options != 0)
11463 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011464
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011465 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011466 if (inputStream == NULL) {
11467 xmlFreeParserCtxt(ctxt);
11468 return(NULL);
11469 }
11470
Owen Taylor3473f882001-02-23 17:55:21 +000011471 inputPush(ctxt, inputStream);
11472 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011473 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011474 if ((ctxt->directory == NULL) && (directory != NULL))
11475 ctxt->directory = directory;
11476
11477 return(ctxt);
11478}
11479
Daniel Veillard61b93382003-11-03 14:28:31 +000011480/**
11481 * xmlCreateFileParserCtxt:
11482 * @filename: the filename
11483 *
11484 * Create a parser context for a file content.
11485 * Automatic support for ZLIB/Compress compressed document is provided
11486 * by default if found at compile-time.
11487 *
11488 * Returns the new parser context or NULL
11489 */
11490xmlParserCtxtPtr
11491xmlCreateFileParserCtxt(const char *filename)
11492{
11493 return(xmlCreateURLParserCtxt(filename, 0));
11494}
11495
Daniel Veillard81273902003-09-30 00:43:48 +000011496#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011497/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011498 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011499 * @sax: the SAX handler block
11500 * @filename: the filename
11501 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11502 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011503 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011504 *
11505 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11506 * compressed document is provided by default if found at compile-time.
11507 * It use the given SAX function block to handle the parsing callback.
11508 * If sax is NULL, fallback to the default DOM tree building routines.
11509 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011510 * User data (void *) is stored within the parser context in the
11511 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011512 *
Owen Taylor3473f882001-02-23 17:55:21 +000011513 * Returns the resulting document tree
11514 */
11515
11516xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011517xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11518 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011519 xmlDocPtr ret;
11520 xmlParserCtxtPtr ctxt;
11521 char *directory = NULL;
11522
Daniel Veillard635ef722001-10-29 11:48:19 +000011523 xmlInitParser();
11524
Owen Taylor3473f882001-02-23 17:55:21 +000011525 ctxt = xmlCreateFileParserCtxt(filename);
11526 if (ctxt == NULL) {
11527 return(NULL);
11528 }
11529 if (sax != NULL) {
11530 if (ctxt->sax != NULL)
11531 xmlFree(ctxt->sax);
11532 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011533 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011534 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011535 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011536 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011537 }
Owen Taylor3473f882001-02-23 17:55:21 +000011538
11539 if ((ctxt->directory == NULL) && (directory == NULL))
11540 directory = xmlParserGetDirectory(filename);
11541 if ((ctxt->directory == NULL) && (directory != NULL))
11542 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11543
Daniel Veillarddad3f682002-11-17 16:47:27 +000011544 ctxt->recovery = recovery;
11545
Owen Taylor3473f882001-02-23 17:55:21 +000011546 xmlParseDocument(ctxt);
11547
William M. Brackc07329e2003-09-08 01:57:30 +000011548 if ((ctxt->wellFormed) || recovery) {
11549 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011550 if (ret != NULL) {
11551 if (ctxt->input->buf->compressed > 0)
11552 ret->compression = 9;
11553 else
11554 ret->compression = ctxt->input->buf->compressed;
11555 }
William M. Brackc07329e2003-09-08 01:57:30 +000011556 }
Owen Taylor3473f882001-02-23 17:55:21 +000011557 else {
11558 ret = NULL;
11559 xmlFreeDoc(ctxt->myDoc);
11560 ctxt->myDoc = NULL;
11561 }
11562 if (sax != NULL)
11563 ctxt->sax = NULL;
11564 xmlFreeParserCtxt(ctxt);
11565
11566 return(ret);
11567}
11568
11569/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011570 * xmlSAXParseFile:
11571 * @sax: the SAX handler block
11572 * @filename: the filename
11573 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11574 * documents
11575 *
11576 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11577 * compressed document is provided by default if found at compile-time.
11578 * It use the given SAX function block to handle the parsing callback.
11579 * If sax is NULL, fallback to the default DOM tree building routines.
11580 *
11581 * Returns the resulting document tree
11582 */
11583
11584xmlDocPtr
11585xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11586 int recovery) {
11587 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11588}
11589
11590/**
Owen Taylor3473f882001-02-23 17:55:21 +000011591 * xmlRecoverDoc:
11592 * @cur: a pointer to an array of xmlChar
11593 *
11594 * parse an XML in-memory document and build a tree.
11595 * In the case the document is not Well Formed, a tree is built anyway
11596 *
11597 * Returns the resulting document tree
11598 */
11599
11600xmlDocPtr
11601xmlRecoverDoc(xmlChar *cur) {
11602 return(xmlSAXParseDoc(NULL, cur, 1));
11603}
11604
11605/**
11606 * xmlParseFile:
11607 * @filename: the filename
11608 *
11609 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11610 * compressed document is provided by default if found at compile-time.
11611 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011612 * Returns the resulting document tree if the file was wellformed,
11613 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011614 */
11615
11616xmlDocPtr
11617xmlParseFile(const char *filename) {
11618 return(xmlSAXParseFile(NULL, filename, 0));
11619}
11620
11621/**
11622 * xmlRecoverFile:
11623 * @filename: the filename
11624 *
11625 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11626 * compressed document is provided by default if found at compile-time.
11627 * In the case the document is not Well Formed, a tree is built anyway
11628 *
11629 * Returns the resulting document tree
11630 */
11631
11632xmlDocPtr
11633xmlRecoverFile(const char *filename) {
11634 return(xmlSAXParseFile(NULL, filename, 1));
11635}
11636
11637
11638/**
11639 * xmlSetupParserForBuffer:
11640 * @ctxt: an XML parser context
11641 * @buffer: a xmlChar * buffer
11642 * @filename: a file name
11643 *
11644 * Setup the parser context to parse a new buffer; Clears any prior
11645 * contents from the parser context. The buffer parameter must not be
11646 * NULL, but the filename parameter can be
11647 */
11648void
11649xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11650 const char* filename)
11651{
11652 xmlParserInputPtr input;
11653
11654 input = xmlNewInputStream(ctxt);
11655 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011656 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011657 xmlFree(ctxt);
11658 return;
11659 }
11660
11661 xmlClearParserCtxt(ctxt);
11662 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011663 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011664 input->base = buffer;
11665 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011666 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011667 inputPush(ctxt, input);
11668}
11669
11670/**
11671 * xmlSAXUserParseFile:
11672 * @sax: a SAX handler
11673 * @user_data: The user data returned on SAX callbacks
11674 * @filename: a file name
11675 *
11676 * parse an XML file and call the given SAX handler routines.
11677 * Automatic support for ZLIB/Compress compressed document is provided
11678 *
11679 * Returns 0 in case of success or a error number otherwise
11680 */
11681int
11682xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11683 const char *filename) {
11684 int ret = 0;
11685 xmlParserCtxtPtr ctxt;
11686
11687 ctxt = xmlCreateFileParserCtxt(filename);
11688 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011689#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011690 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011691#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011692 xmlFree(ctxt->sax);
11693 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011694 xmlDetectSAX2(ctxt);
11695
Owen Taylor3473f882001-02-23 17:55:21 +000011696 if (user_data != NULL)
11697 ctxt->userData = user_data;
11698
11699 xmlParseDocument(ctxt);
11700
11701 if (ctxt->wellFormed)
11702 ret = 0;
11703 else {
11704 if (ctxt->errNo != 0)
11705 ret = ctxt->errNo;
11706 else
11707 ret = -1;
11708 }
11709 if (sax != NULL)
11710 ctxt->sax = NULL;
11711 xmlFreeParserCtxt(ctxt);
11712
11713 return ret;
11714}
Daniel Veillard81273902003-09-30 00:43:48 +000011715#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011716
11717/************************************************************************
11718 * *
11719 * Front ends when parsing from memory *
11720 * *
11721 ************************************************************************/
11722
11723/**
11724 * xmlCreateMemoryParserCtxt:
11725 * @buffer: a pointer to a char array
11726 * @size: the size of the array
11727 *
11728 * Create a parser context for an XML in-memory document.
11729 *
11730 * Returns the new parser context or NULL
11731 */
11732xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011733xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011734 xmlParserCtxtPtr ctxt;
11735 xmlParserInputPtr input;
11736 xmlParserInputBufferPtr buf;
11737
11738 if (buffer == NULL)
11739 return(NULL);
11740 if (size <= 0)
11741 return(NULL);
11742
11743 ctxt = xmlNewParserCtxt();
11744 if (ctxt == NULL)
11745 return(NULL);
11746
Daniel Veillard53350552003-09-18 13:35:51 +000011747 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011748 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011749 if (buf == NULL) {
11750 xmlFreeParserCtxt(ctxt);
11751 return(NULL);
11752 }
Owen Taylor3473f882001-02-23 17:55:21 +000011753
11754 input = xmlNewInputStream(ctxt);
11755 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011756 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011757 xmlFreeParserCtxt(ctxt);
11758 return(NULL);
11759 }
11760
11761 input->filename = NULL;
11762 input->buf = buf;
11763 input->base = input->buf->buffer->content;
11764 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011765 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011766
11767 inputPush(ctxt, input);
11768 return(ctxt);
11769}
11770
Daniel Veillard81273902003-09-30 00:43:48 +000011771#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011772/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011773 * xmlSAXParseMemoryWithData:
11774 * @sax: the SAX handler block
11775 * @buffer: an pointer to a char array
11776 * @size: the size of the array
11777 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11778 * documents
11779 * @data: the userdata
11780 *
11781 * parse an XML in-memory block and use the given SAX function block
11782 * to handle the parsing callback. If sax is NULL, fallback to the default
11783 * DOM tree building routines.
11784 *
11785 * User data (void *) is stored within the parser context in the
11786 * context's _private member, so it is available nearly everywhere in libxml
11787 *
11788 * Returns the resulting document tree
11789 */
11790
11791xmlDocPtr
11792xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11793 int size, int recovery, void *data) {
11794 xmlDocPtr ret;
11795 xmlParserCtxtPtr ctxt;
11796
11797 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11798 if (ctxt == NULL) return(NULL);
11799 if (sax != NULL) {
11800 if (ctxt->sax != NULL)
11801 xmlFree(ctxt->sax);
11802 ctxt->sax = sax;
11803 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011804 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011805 if (data!=NULL) {
11806 ctxt->_private=data;
11807 }
11808
Daniel Veillardadba5f12003-04-04 16:09:01 +000011809 ctxt->recovery = recovery;
11810
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011811 xmlParseDocument(ctxt);
11812
11813 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11814 else {
11815 ret = NULL;
11816 xmlFreeDoc(ctxt->myDoc);
11817 ctxt->myDoc = NULL;
11818 }
11819 if (sax != NULL)
11820 ctxt->sax = NULL;
11821 xmlFreeParserCtxt(ctxt);
11822
11823 return(ret);
11824}
11825
11826/**
Owen Taylor3473f882001-02-23 17:55:21 +000011827 * xmlSAXParseMemory:
11828 * @sax: the SAX handler block
11829 * @buffer: an pointer to a char array
11830 * @size: the size of the array
11831 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11832 * documents
11833 *
11834 * parse an XML in-memory block and use the given SAX function block
11835 * to handle the parsing callback. If sax is NULL, fallback to the default
11836 * DOM tree building routines.
11837 *
11838 * Returns the resulting document tree
11839 */
11840xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011841xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11842 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011843 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011844}
11845
11846/**
11847 * xmlParseMemory:
11848 * @buffer: an pointer to a char array
11849 * @size: the size of the array
11850 *
11851 * parse an XML in-memory block and build a tree.
11852 *
11853 * Returns the resulting document tree
11854 */
11855
Daniel Veillard50822cb2001-07-26 20:05:51 +000011856xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011857 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11858}
11859
11860/**
11861 * xmlRecoverMemory:
11862 * @buffer: an pointer to a char array
11863 * @size: the size of the array
11864 *
11865 * parse an XML in-memory block and build a tree.
11866 * In the case the document is not Well Formed, a tree is built anyway
11867 *
11868 * Returns the resulting document tree
11869 */
11870
Daniel Veillard50822cb2001-07-26 20:05:51 +000011871xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011872 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11873}
11874
11875/**
11876 * xmlSAXUserParseMemory:
11877 * @sax: a SAX handler
11878 * @user_data: The user data returned on SAX callbacks
11879 * @buffer: an in-memory XML document input
11880 * @size: the length of the XML document in bytes
11881 *
11882 * A better SAX parsing routine.
11883 * parse an XML in-memory buffer and call the given SAX handler routines.
11884 *
11885 * Returns 0 in case of success or a error number otherwise
11886 */
11887int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011888 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011889 int ret = 0;
11890 xmlParserCtxtPtr ctxt;
11891 xmlSAXHandlerPtr oldsax = NULL;
11892
Daniel Veillard9e923512002-08-14 08:48:52 +000011893 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011894 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11895 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011896 oldsax = ctxt->sax;
11897 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011898 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011899 if (user_data != NULL)
11900 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011901
11902 xmlParseDocument(ctxt);
11903
11904 if (ctxt->wellFormed)
11905 ret = 0;
11906 else {
11907 if (ctxt->errNo != 0)
11908 ret = ctxt->errNo;
11909 else
11910 ret = -1;
11911 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011912 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011913 xmlFreeParserCtxt(ctxt);
11914
11915 return ret;
11916}
Daniel Veillard81273902003-09-30 00:43:48 +000011917#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011918
11919/**
11920 * xmlCreateDocParserCtxt:
11921 * @cur: a pointer to an array of xmlChar
11922 *
11923 * Creates a parser context for an XML in-memory document.
11924 *
11925 * Returns the new parser context or NULL
11926 */
11927xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011928xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011929 int len;
11930
11931 if (cur == NULL)
11932 return(NULL);
11933 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011934 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011935}
11936
Daniel Veillard81273902003-09-30 00:43:48 +000011937#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011938/**
11939 * xmlSAXParseDoc:
11940 * @sax: the SAX handler block
11941 * @cur: a pointer to an array of xmlChar
11942 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11943 * documents
11944 *
11945 * parse an XML in-memory document and build a tree.
11946 * It use the given SAX function block to handle the parsing callback.
11947 * If sax is NULL, fallback to the default DOM tree building routines.
11948 *
11949 * Returns the resulting document tree
11950 */
11951
11952xmlDocPtr
11953xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11954 xmlDocPtr ret;
11955 xmlParserCtxtPtr ctxt;
11956
11957 if (cur == NULL) return(NULL);
11958
11959
11960 ctxt = xmlCreateDocParserCtxt(cur);
11961 if (ctxt == NULL) return(NULL);
11962 if (sax != NULL) {
11963 ctxt->sax = sax;
11964 ctxt->userData = NULL;
11965 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011966 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011967
11968 xmlParseDocument(ctxt);
11969 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11970 else {
11971 ret = NULL;
11972 xmlFreeDoc(ctxt->myDoc);
11973 ctxt->myDoc = NULL;
11974 }
11975 if (sax != NULL)
11976 ctxt->sax = NULL;
11977 xmlFreeParserCtxt(ctxt);
11978
11979 return(ret);
11980}
11981
11982/**
11983 * xmlParseDoc:
11984 * @cur: a pointer to an array of xmlChar
11985 *
11986 * parse an XML in-memory document and build a tree.
11987 *
11988 * Returns the resulting document tree
11989 */
11990
11991xmlDocPtr
11992xmlParseDoc(xmlChar *cur) {
11993 return(xmlSAXParseDoc(NULL, cur, 0));
11994}
Daniel Veillard81273902003-09-30 00:43:48 +000011995#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011996
Daniel Veillard81273902003-09-30 00:43:48 +000011997#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011998/************************************************************************
11999 * *
12000 * Specific function to keep track of entities references *
12001 * and used by the XSLT debugger *
12002 * *
12003 ************************************************************************/
12004
12005static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12006
12007/**
12008 * xmlAddEntityReference:
12009 * @ent : A valid entity
12010 * @firstNode : A valid first node for children of entity
12011 * @lastNode : A valid last node of children entity
12012 *
12013 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12014 */
12015static void
12016xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12017 xmlNodePtr lastNode)
12018{
12019 if (xmlEntityRefFunc != NULL) {
12020 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12021 }
12022}
12023
12024
12025/**
12026 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012027 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012028 *
12029 * Set the function to call call back when a xml reference has been made
12030 */
12031void
12032xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12033{
12034 xmlEntityRefFunc = func;
12035}
Daniel Veillard81273902003-09-30 00:43:48 +000012036#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012037
12038/************************************************************************
12039 * *
12040 * Miscellaneous *
12041 * *
12042 ************************************************************************/
12043
12044#ifdef LIBXML_XPATH_ENABLED
12045#include <libxml/xpath.h>
12046#endif
12047
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012048extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012049static int xmlParserInitialized = 0;
12050
12051/**
12052 * xmlInitParser:
12053 *
12054 * Initialization function for the XML parser.
12055 * This is not reentrant. Call once before processing in case of
12056 * use in multithreaded programs.
12057 */
12058
12059void
12060xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012061 if (xmlParserInitialized != 0)
12062 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012063
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012064 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12065 (xmlGenericError == NULL))
12066 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012067 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012068 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012069 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012070 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012071 xmlDefaultSAXHandlerInit();
12072 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012073#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012074 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012075#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012076#ifdef LIBXML_HTML_ENABLED
12077 htmlInitAutoClose();
12078 htmlDefaultSAXHandlerInit();
12079#endif
12080#ifdef LIBXML_XPATH_ENABLED
12081 xmlXPathInit();
12082#endif
12083 xmlParserInitialized = 1;
12084}
12085
12086/**
12087 * xmlCleanupParser:
12088 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012089 * Cleanup function for the XML library. It tries to reclaim all
12090 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012091 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012092 * function should not prevent reusing the library but one should
12093 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012094 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012095 */
12096
12097void
12098xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012099 if (!xmlParserInitialized)
12100 return;
12101
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012103#ifdef LIBXML_CATALOG_ENABLED
12104 xmlCatalogCleanup();
12105#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012106 xmlCleanupInputCallbacks();
12107#ifdef LIBXML_OUTPUT_ENABLED
12108 xmlCleanupOutputCallbacks();
12109#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012110 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012111 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012112 xmlCleanupThreads(); /* must be last if called not from the main thread */
Daniel Veillardd0463562001-10-13 09:15:48 +000012113 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012114}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012115
12116/************************************************************************
12117 * *
12118 * New set (2.6.0) of simpler and more flexible APIs *
12119 * *
12120 ************************************************************************/
12121
12122/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012123 * DICT_FREE:
12124 * @str: a string
12125 *
12126 * Free a string if it is not owned by the "dict" dictionnary in the
12127 * current scope
12128 */
12129#define DICT_FREE(str) \
12130 if ((str) && ((!dict) || \
12131 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12132 xmlFree((char *)(str));
12133
12134/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012135 * xmlCtxtReset:
12136 * @ctxt: an XML parser context
12137 *
12138 * Reset a parser context
12139 */
12140void
12141xmlCtxtReset(xmlParserCtxtPtr ctxt)
12142{
12143 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012144 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012145
12146 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12147 xmlFreeInputStream(input);
12148 }
12149 ctxt->inputNr = 0;
12150 ctxt->input = NULL;
12151
12152 ctxt->spaceNr = 0;
12153 ctxt->spaceTab[0] = -1;
12154 ctxt->space = &ctxt->spaceTab[0];
12155
12156
12157 ctxt->nodeNr = 0;
12158 ctxt->node = NULL;
12159
12160 ctxt->nameNr = 0;
12161 ctxt->name = NULL;
12162
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012163 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012164 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012165 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012166 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012167 DICT_FREE(ctxt->directory);
12168 ctxt->directory = NULL;
12169 DICT_FREE(ctxt->extSubURI);
12170 ctxt->extSubURI = NULL;
12171 DICT_FREE(ctxt->extSubSystem);
12172 ctxt->extSubSystem = NULL;
12173 if (ctxt->myDoc != NULL)
12174 xmlFreeDoc(ctxt->myDoc);
12175 ctxt->myDoc = NULL;
12176
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012177 ctxt->standalone = -1;
12178 ctxt->hasExternalSubset = 0;
12179 ctxt->hasPErefs = 0;
12180 ctxt->html = 0;
12181 ctxt->external = 0;
12182 ctxt->instate = XML_PARSER_START;
12183 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012184
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012185 ctxt->wellFormed = 1;
12186 ctxt->nsWellFormed = 1;
12187 ctxt->valid = 1;
12188 ctxt->vctxt.userData = ctxt;
12189 ctxt->vctxt.error = xmlParserValidityError;
12190 ctxt->vctxt.warning = xmlParserValidityWarning;
12191 ctxt->record_info = 0;
12192 ctxt->nbChars = 0;
12193 ctxt->checkIndex = 0;
12194 ctxt->inSubset = 0;
12195 ctxt->errNo = XML_ERR_OK;
12196 ctxt->depth = 0;
12197 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12198 ctxt->catalogs = NULL;
12199 xmlInitNodeInfoSeq(&ctxt->node_seq);
12200
12201 if (ctxt->attsDefault != NULL) {
12202 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12203 ctxt->attsDefault = NULL;
12204 }
12205 if (ctxt->attsSpecial != NULL) {
12206 xmlHashFree(ctxt->attsSpecial, NULL);
12207 ctxt->attsSpecial = NULL;
12208 }
12209
Daniel Veillard4432df22003-09-28 18:58:27 +000012210#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012211 if (ctxt->catalogs != NULL)
12212 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012213#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012214 if (ctxt->lastError.code != XML_ERR_OK)
12215 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012216}
12217
12218/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012219 * xmlCtxtResetPush:
12220 * @ctxt: an XML parser context
12221 * @chunk: a pointer to an array of chars
12222 * @size: number of chars in the array
12223 * @filename: an optional file name or URI
12224 * @encoding: the document encoding, or NULL
12225 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012226 * Reset a push parser context
12227 *
12228 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012229 */
12230int
12231xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12232 int size, const char *filename, const char *encoding)
12233{
12234 xmlParserInputPtr inputStream;
12235 xmlParserInputBufferPtr buf;
12236 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12237
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012238 if (ctxt == NULL)
12239 return(1);
12240
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012241 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12242 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12243
12244 buf = xmlAllocParserInputBuffer(enc);
12245 if (buf == NULL)
12246 return(1);
12247
12248 if (ctxt == NULL) {
12249 xmlFreeParserInputBuffer(buf);
12250 return(1);
12251 }
12252
12253 xmlCtxtReset(ctxt);
12254
12255 if (ctxt->pushTab == NULL) {
12256 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12257 sizeof(xmlChar *));
12258 if (ctxt->pushTab == NULL) {
12259 xmlErrMemory(ctxt, NULL);
12260 xmlFreeParserInputBuffer(buf);
12261 return(1);
12262 }
12263 }
12264
12265 if (filename == NULL) {
12266 ctxt->directory = NULL;
12267 } else {
12268 ctxt->directory = xmlParserGetDirectory(filename);
12269 }
12270
12271 inputStream = xmlNewInputStream(ctxt);
12272 if (inputStream == NULL) {
12273 xmlFreeParserInputBuffer(buf);
12274 return(1);
12275 }
12276
12277 if (filename == NULL)
12278 inputStream->filename = NULL;
12279 else
12280 inputStream->filename = (char *)
12281 xmlCanonicPath((const xmlChar *) filename);
12282 inputStream->buf = buf;
12283 inputStream->base = inputStream->buf->buffer->content;
12284 inputStream->cur = inputStream->buf->buffer->content;
12285 inputStream->end =
12286 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12287
12288 inputPush(ctxt, inputStream);
12289
12290 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12291 (ctxt->input->buf != NULL)) {
12292 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12293 int cur = ctxt->input->cur - ctxt->input->base;
12294
12295 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12296
12297 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12298 ctxt->input->cur = ctxt->input->base + cur;
12299 ctxt->input->end =
12300 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12301 use];
12302#ifdef DEBUG_PUSH
12303 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12304#endif
12305 }
12306
12307 if (encoding != NULL) {
12308 xmlCharEncodingHandlerPtr hdlr;
12309
12310 hdlr = xmlFindCharEncodingHandler(encoding);
12311 if (hdlr != NULL) {
12312 xmlSwitchToEncoding(ctxt, hdlr);
12313 } else {
12314 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12315 "Unsupported encoding %s\n", BAD_CAST encoding);
12316 }
12317 } else if (enc != XML_CHAR_ENCODING_NONE) {
12318 xmlSwitchEncoding(ctxt, enc);
12319 }
12320
12321 return(0);
12322}
12323
12324/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325 * xmlCtxtUseOptions:
12326 * @ctxt: an XML parser context
12327 * @options: a combination of xmlParserOption(s)
12328 *
12329 * Applies the options to the parser context
12330 *
12331 * Returns 0 in case of success, the set of unknown or unimplemented options
12332 * in case of error.
12333 */
12334int
12335xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12336{
12337 if (options & XML_PARSE_RECOVER) {
12338 ctxt->recovery = 1;
12339 options -= XML_PARSE_RECOVER;
12340 } else
12341 ctxt->recovery = 0;
12342 if (options & XML_PARSE_DTDLOAD) {
12343 ctxt->loadsubset = XML_DETECT_IDS;
12344 options -= XML_PARSE_DTDLOAD;
12345 } else
12346 ctxt->loadsubset = 0;
12347 if (options & XML_PARSE_DTDATTR) {
12348 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12349 options -= XML_PARSE_DTDATTR;
12350 }
12351 if (options & XML_PARSE_NOENT) {
12352 ctxt->replaceEntities = 1;
12353 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12354 options -= XML_PARSE_NOENT;
12355 } else
12356 ctxt->replaceEntities = 0;
12357 if (options & XML_PARSE_NOWARNING) {
12358 ctxt->sax->warning = NULL;
12359 options -= XML_PARSE_NOWARNING;
12360 }
12361 if (options & XML_PARSE_NOERROR) {
12362 ctxt->sax->error = NULL;
12363 ctxt->sax->fatalError = NULL;
12364 options -= XML_PARSE_NOERROR;
12365 }
12366 if (options & XML_PARSE_PEDANTIC) {
12367 ctxt->pedantic = 1;
12368 options -= XML_PARSE_PEDANTIC;
12369 } else
12370 ctxt->pedantic = 0;
12371 if (options & XML_PARSE_NOBLANKS) {
12372 ctxt->keepBlanks = 0;
12373 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12374 options -= XML_PARSE_NOBLANKS;
12375 } else
12376 ctxt->keepBlanks = 1;
12377 if (options & XML_PARSE_DTDVALID) {
12378 ctxt->validate = 1;
12379 if (options & XML_PARSE_NOWARNING)
12380 ctxt->vctxt.warning = NULL;
12381 if (options & XML_PARSE_NOERROR)
12382 ctxt->vctxt.error = NULL;
12383 options -= XML_PARSE_DTDVALID;
12384 } else
12385 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012386#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012387 if (options & XML_PARSE_SAX1) {
12388 ctxt->sax->startElement = xmlSAX2StartElement;
12389 ctxt->sax->endElement = xmlSAX2EndElement;
12390 ctxt->sax->startElementNs = NULL;
12391 ctxt->sax->endElementNs = NULL;
12392 ctxt->sax->initialized = 1;
12393 options -= XML_PARSE_SAX1;
12394 }
Daniel Veillard81273902003-09-30 00:43:48 +000012395#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012396 if (options & XML_PARSE_NODICT) {
12397 ctxt->dictNames = 0;
12398 options -= XML_PARSE_NODICT;
12399 } else {
12400 ctxt->dictNames = 1;
12401 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012402 if (options & XML_PARSE_NOCDATA) {
12403 ctxt->sax->cdataBlock = NULL;
12404 options -= XML_PARSE_NOCDATA;
12405 }
12406 if (options & XML_PARSE_NSCLEAN) {
12407 ctxt->options |= XML_PARSE_NSCLEAN;
12408 options -= XML_PARSE_NSCLEAN;
12409 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012410 if (options & XML_PARSE_NONET) {
12411 ctxt->options |= XML_PARSE_NONET;
12412 options -= XML_PARSE_NONET;
12413 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012414 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012415 return (options);
12416}
12417
12418/**
12419 * xmlDoRead:
12420 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012421 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012422 * @encoding: the document encoding, or NULL
12423 * @options: a combination of xmlParserOption(s)
12424 * @reuse: keep the context for reuse
12425 *
12426 * Common front-end for the xmlRead functions
12427 *
12428 * Returns the resulting document tree or NULL
12429 */
12430static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012431xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12432 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012433{
12434 xmlDocPtr ret;
12435
12436 xmlCtxtUseOptions(ctxt, options);
12437 if (encoding != NULL) {
12438 xmlCharEncodingHandlerPtr hdlr;
12439
12440 hdlr = xmlFindCharEncodingHandler(encoding);
12441 if (hdlr != NULL)
12442 xmlSwitchToEncoding(ctxt, hdlr);
12443 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012444 if ((URL != NULL) && (ctxt->input != NULL) &&
12445 (ctxt->input->filename == NULL))
12446 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012447 xmlParseDocument(ctxt);
12448 if ((ctxt->wellFormed) || ctxt->recovery)
12449 ret = ctxt->myDoc;
12450 else {
12451 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012452 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012453 if ((ctxt->dictNames) &&
12454 (ctxt->myDoc->dict == ctxt->dict))
12455 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012456 xmlFreeDoc(ctxt->myDoc);
12457 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012458 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012459 ctxt->myDoc = NULL;
12460 if (!reuse) {
12461 if ((ctxt->dictNames) &&
12462 (ret != NULL) &&
12463 (ret->dict == ctxt->dict))
12464 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012465 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012466 } else {
12467 /* Must duplicate the reference to the dictionary */
12468 if ((ctxt->dictNames) &&
12469 (ret != NULL) &&
12470 (ret->dict == ctxt->dict))
12471 xmlDictReference(ctxt->dict);
12472 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012473
12474 return (ret);
12475}
12476
12477/**
12478 * xmlReadDoc:
12479 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012480 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012481 * @encoding: the document encoding, or NULL
12482 * @options: a combination of xmlParserOption(s)
12483 *
12484 * parse an XML in-memory document and build a tree.
12485 *
12486 * Returns the resulting document tree
12487 */
12488xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012489xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012490{
12491 xmlParserCtxtPtr ctxt;
12492
12493 if (cur == NULL)
12494 return (NULL);
12495
12496 ctxt = xmlCreateDocParserCtxt(cur);
12497 if (ctxt == NULL)
12498 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012499 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012500}
12501
12502/**
12503 * xmlReadFile:
12504 * @filename: a file or URL
12505 * @encoding: the document encoding, or NULL
12506 * @options: a combination of xmlParserOption(s)
12507 *
12508 * parse an XML file from the filesystem or the network.
12509 *
12510 * Returns the resulting document tree
12511 */
12512xmlDocPtr
12513xmlReadFile(const char *filename, const char *encoding, int options)
12514{
12515 xmlParserCtxtPtr ctxt;
12516
Daniel Veillard61b93382003-11-03 14:28:31 +000012517 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012518 if (ctxt == NULL)
12519 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012520 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012521}
12522
12523/**
12524 * xmlReadMemory:
12525 * @buffer: a pointer to a char array
12526 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012527 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528 * @encoding: the document encoding, or NULL
12529 * @options: a combination of xmlParserOption(s)
12530 *
12531 * parse an XML in-memory document and build a tree.
12532 *
12533 * Returns the resulting document tree
12534 */
12535xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012536xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012537{
12538 xmlParserCtxtPtr ctxt;
12539
12540 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12541 if (ctxt == NULL)
12542 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012543 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012544}
12545
12546/**
12547 * xmlReadFd:
12548 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012549 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012550 * @encoding: the document encoding, or NULL
12551 * @options: a combination of xmlParserOption(s)
12552 *
12553 * parse an XML from a file descriptor and build a tree.
12554 *
12555 * Returns the resulting document tree
12556 */
12557xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012558xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012559{
12560 xmlParserCtxtPtr ctxt;
12561 xmlParserInputBufferPtr input;
12562 xmlParserInputPtr stream;
12563
12564 if (fd < 0)
12565 return (NULL);
12566
12567 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12568 if (input == NULL)
12569 return (NULL);
12570 ctxt = xmlNewParserCtxt();
12571 if (ctxt == NULL) {
12572 xmlFreeParserInputBuffer(input);
12573 return (NULL);
12574 }
12575 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12576 if (stream == NULL) {
12577 xmlFreeParserInputBuffer(input);
12578 xmlFreeParserCtxt(ctxt);
12579 return (NULL);
12580 }
12581 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012582 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012583}
12584
12585/**
12586 * xmlReadIO:
12587 * @ioread: an I/O read function
12588 * @ioclose: an I/O close function
12589 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012590 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012591 * @encoding: the document encoding, or NULL
12592 * @options: a combination of xmlParserOption(s)
12593 *
12594 * parse an XML document from I/O functions and source and build a tree.
12595 *
12596 * Returns the resulting document tree
12597 */
12598xmlDocPtr
12599xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012600 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012601{
12602 xmlParserCtxtPtr ctxt;
12603 xmlParserInputBufferPtr input;
12604 xmlParserInputPtr stream;
12605
12606 if (ioread == NULL)
12607 return (NULL);
12608
12609 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12610 XML_CHAR_ENCODING_NONE);
12611 if (input == NULL)
12612 return (NULL);
12613 ctxt = xmlNewParserCtxt();
12614 if (ctxt == NULL) {
12615 xmlFreeParserInputBuffer(input);
12616 return (NULL);
12617 }
12618 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12619 if (stream == NULL) {
12620 xmlFreeParserInputBuffer(input);
12621 xmlFreeParserCtxt(ctxt);
12622 return (NULL);
12623 }
12624 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012625 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012626}
12627
12628/**
12629 * xmlCtxtReadDoc:
12630 * @ctxt: an XML parser context
12631 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012632 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012633 * @encoding: the document encoding, or NULL
12634 * @options: a combination of xmlParserOption(s)
12635 *
12636 * parse an XML in-memory document and build a tree.
12637 * This reuses the existing @ctxt parser context
12638 *
12639 * Returns the resulting document tree
12640 */
12641xmlDocPtr
12642xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012643 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012644{
12645 xmlParserInputPtr stream;
12646
12647 if (cur == NULL)
12648 return (NULL);
12649 if (ctxt == NULL)
12650 return (NULL);
12651
12652 xmlCtxtReset(ctxt);
12653
12654 stream = xmlNewStringInputStream(ctxt, cur);
12655 if (stream == NULL) {
12656 return (NULL);
12657 }
12658 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012659 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012660}
12661
12662/**
12663 * xmlCtxtReadFile:
12664 * @ctxt: an XML parser context
12665 * @filename: a file or URL
12666 * @encoding: the document encoding, or NULL
12667 * @options: a combination of xmlParserOption(s)
12668 *
12669 * parse an XML file from the filesystem or the network.
12670 * This reuses the existing @ctxt parser context
12671 *
12672 * Returns the resulting document tree
12673 */
12674xmlDocPtr
12675xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12676 const char *encoding, int options)
12677{
12678 xmlParserInputPtr stream;
12679
12680 if (filename == NULL)
12681 return (NULL);
12682 if (ctxt == NULL)
12683 return (NULL);
12684
12685 xmlCtxtReset(ctxt);
12686
12687 stream = xmlNewInputFromFile(ctxt, filename);
12688 if (stream == NULL) {
12689 return (NULL);
12690 }
12691 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012692 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012693}
12694
12695/**
12696 * xmlCtxtReadMemory:
12697 * @ctxt: an XML parser context
12698 * @buffer: a pointer to a char array
12699 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012700 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012701 * @encoding: the document encoding, or NULL
12702 * @options: a combination of xmlParserOption(s)
12703 *
12704 * parse an XML in-memory document and build a tree.
12705 * This reuses the existing @ctxt parser context
12706 *
12707 * Returns the resulting document tree
12708 */
12709xmlDocPtr
12710xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012711 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012712{
12713 xmlParserInputBufferPtr input;
12714 xmlParserInputPtr stream;
12715
12716 if (ctxt == NULL)
12717 return (NULL);
12718 if (buffer == NULL)
12719 return (NULL);
12720
12721 xmlCtxtReset(ctxt);
12722
12723 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12724 if (input == NULL) {
12725 return(NULL);
12726 }
12727
12728 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12729 if (stream == NULL) {
12730 xmlFreeParserInputBuffer(input);
12731 return(NULL);
12732 }
12733
12734 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012735 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012736}
12737
12738/**
12739 * xmlCtxtReadFd:
12740 * @ctxt: an XML parser context
12741 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012742 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012743 * @encoding: the document encoding, or NULL
12744 * @options: a combination of xmlParserOption(s)
12745 *
12746 * parse an XML from a file descriptor and build a tree.
12747 * This reuses the existing @ctxt parser context
12748 *
12749 * Returns the resulting document tree
12750 */
12751xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012752xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12753 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012754{
12755 xmlParserInputBufferPtr input;
12756 xmlParserInputPtr stream;
12757
12758 if (fd < 0)
12759 return (NULL);
12760 if (ctxt == NULL)
12761 return (NULL);
12762
12763 xmlCtxtReset(ctxt);
12764
12765
12766 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12767 if (input == NULL)
12768 return (NULL);
12769 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12770 if (stream == NULL) {
12771 xmlFreeParserInputBuffer(input);
12772 return (NULL);
12773 }
12774 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012775 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012776}
12777
12778/**
12779 * xmlCtxtReadIO:
12780 * @ctxt: an XML parser context
12781 * @ioread: an I/O read function
12782 * @ioclose: an I/O close function
12783 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012784 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012785 * @encoding: the document encoding, or NULL
12786 * @options: a combination of xmlParserOption(s)
12787 *
12788 * parse an XML document from I/O functions and source and build a tree.
12789 * This reuses the existing @ctxt parser context
12790 *
12791 * Returns the resulting document tree
12792 */
12793xmlDocPtr
12794xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12795 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012796 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012797 const char *encoding, int options)
12798{
12799 xmlParserInputBufferPtr input;
12800 xmlParserInputPtr stream;
12801
12802 if (ioread == NULL)
12803 return (NULL);
12804 if (ctxt == NULL)
12805 return (NULL);
12806
12807 xmlCtxtReset(ctxt);
12808
12809 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12810 XML_CHAR_ENCODING_NONE);
12811 if (input == NULL)
12812 return (NULL);
12813 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12814 if (stream == NULL) {
12815 xmlFreeParserInputBuffer(input);
12816 return (NULL);
12817 }
12818 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012819 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012820}