blob: c01c9de50efb0bf8b72fcbf03fa024fa9268a5c3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
609 (ctxt->str_xml_ns == NULL)) {
610 xmlErrMemory(ctxt, NULL);
611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000612}
613
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614typedef struct _xmlDefAttrs xmlDefAttrs;
615typedef xmlDefAttrs *xmlDefAttrsPtr;
616struct _xmlDefAttrs {
617 int nbAttrs; /* number of defaulted attributes on that element */
618 int maxAttrs; /* the size of the array */
619 const xmlChar *values[4]; /* array of localname/prefix/values */
620};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000621
622/**
623 * xmlAddDefAttrs:
624 * @ctxt: an XML parser context
625 * @fullname: the element fullname
626 * @fullattr: the attribute fullname
627 * @value: the attribute value
628 *
629 * Add a defaulted attribute for an element
630 */
631static void
632xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
633 const xmlChar *fullname,
634 const xmlChar *fullattr,
635 const xmlChar *value) {
636 xmlDefAttrsPtr defaults;
637 int len;
638 const xmlChar *name;
639 const xmlChar *prefix;
640
641 if (ctxt->attsDefault == NULL) {
642 ctxt->attsDefault = xmlHashCreate(10);
643 if (ctxt->attsDefault == NULL)
644 goto mem_error;
645 }
646
647 /*
648 * plit the element name into prefix:localname , the string found
649 * are within the DTD and hen not associated to namespace names.
650 */
651 name = xmlSplitQName3(fullname, &len);
652 if (name == NULL) {
653 name = xmlDictLookup(ctxt->dict, fullname, -1);
654 prefix = NULL;
655 } else {
656 name = xmlDictLookup(ctxt->dict, name, -1);
657 prefix = xmlDictLookup(ctxt->dict, fullname, len);
658 }
659
660 /*
661 * make sure there is some storage
662 */
663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
664 if (defaults == NULL) {
665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
666 12 * sizeof(const xmlChar *));
667 if (defaults == NULL)
668 goto mem_error;
669 defaults->maxAttrs = 4;
670 defaults->nbAttrs = 0;
671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
672 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
673 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
674 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
675 if (defaults == NULL)
676 goto mem_error;
677 defaults->maxAttrs *= 2;
678 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
679 }
680
681 /*
682 * plit the element name into prefix:localname , the string found
683 * are within the DTD and hen not associated to namespace names.
684 */
685 name = xmlSplitQName3(fullattr, &len);
686 if (name == NULL) {
687 name = xmlDictLookup(ctxt->dict, fullattr, -1);
688 prefix = NULL;
689 } else {
690 name = xmlDictLookup(ctxt->dict, name, -1);
691 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
692 }
693
694 defaults->values[4 * defaults->nbAttrs] = name;
695 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
696 /* intern the string and precompute the end */
697 len = xmlStrlen(value);
698 value = xmlDictLookup(ctxt->dict, value, len);
699 defaults->values[4 * defaults->nbAttrs + 2] = value;
700 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
701 defaults->nbAttrs++;
702
703 return;
704
705mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000706 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000707 return;
708}
709
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000710/**
711 * xmlAddSpecialAttr:
712 * @ctxt: an XML parser context
713 * @fullname: the element fullname
714 * @fullattr: the attribute fullname
715 * @type: the attribute type
716 *
717 * Register that this attribute is not CDATA
718 */
719static void
720xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
721 const xmlChar *fullname,
722 const xmlChar *fullattr,
723 int type)
724{
725 if (ctxt->attsSpecial == NULL) {
726 ctxt->attsSpecial = xmlHashCreate(10);
727 if (ctxt->attsSpecial == NULL)
728 goto mem_error;
729 }
730
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000731 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
732 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734
735mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000737 return;
738}
739
Daniel Veillard4432df22003-09-28 18:58:27 +0000740/**
741 * xmlCheckLanguageID:
742 * @lang: pointer to the string value
743 *
744 * Checks that the value conforms to the LanguageID production:
745 *
746 * NOTE: this is somewhat deprecated, those productions were removed from
747 * the XML Second edition.
748 *
749 * [33] LanguageID ::= Langcode ('-' Subcode)*
750 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
751 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
752 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
753 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
754 * [38] Subcode ::= ([a-z] | [A-Z])+
755 *
756 * Returns 1 if correct 0 otherwise
757 **/
758int
759xmlCheckLanguageID(const xmlChar * lang)
760{
761 const xmlChar *cur = lang;
762
763 if (cur == NULL)
764 return (0);
765 if (((cur[0] == 'i') && (cur[1] == '-')) ||
766 ((cur[0] == 'I') && (cur[1] == '-'))) {
767 /*
768 * IANA code
769 */
770 cur += 2;
771 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
772 ((cur[0] >= 'a') && (cur[0] <= 'z')))
773 cur++;
774 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
775 ((cur[0] == 'X') && (cur[1] == '-'))) {
776 /*
777 * User code
778 */
779 cur += 2;
780 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
781 ((cur[0] >= 'a') && (cur[0] <= 'z')))
782 cur++;
783 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
784 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
785 /*
786 * ISO639
787 */
788 cur++;
789 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
790 ((cur[0] >= 'a') && (cur[0] <= 'z')))
791 cur++;
792 else
793 return (0);
794 } else
795 return (0);
796 while (cur[0] != 0) { /* non input consuming */
797 if (cur[0] != '-')
798 return (0);
799 cur++;
800 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
801 ((cur[0] >= 'a') && (cur[0] <= 'z')))
802 cur++;
803 else
804 return (0);
805 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 }
809 return (1);
810}
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812/************************************************************************
813 * *
814 * Parser stacks related functions and macros *
815 * *
816 ************************************************************************/
817
818xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
819 const xmlChar ** str);
820
Daniel Veillard0fb18932003-09-07 09:14:37 +0000821#ifdef SAX2
822/**
823 * nsPush:
824 * @ctxt: an XML parser context
825 * @prefix: the namespace prefix or NULL
826 * @URL: the namespace name
827 *
828 * Pushes a new parser namespace on top of the ns stack
829 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000830 * Returns -1 in case of error, -2 if the namespace should be discarded
831 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000832 */
833static int
834nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
835{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000836 if (ctxt->options & XML_PARSE_NSCLEAN) {
837 int i;
838 for (i = 0;i < ctxt->nsNr;i += 2) {
839 if (ctxt->nsTab[i] == prefix) {
840 /* in scope */
841 if (ctxt->nsTab[i + 1] == URL)
842 return(-2);
843 /* out of scope keep it */
844 break;
845 }
846 }
847 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000848 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
849 ctxt->nsMax = 10;
850 ctxt->nsNr = 0;
851 ctxt->nsTab = (const xmlChar **)
852 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
853 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000854 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000855 ctxt->nsMax = 0;
856 return (-1);
857 }
858 } else if (ctxt->nsNr >= ctxt->nsMax) {
859 ctxt->nsMax *= 2;
860 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000861 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000862 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
863 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000864 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 ctxt->nsMax /= 2;
866 return (-1);
867 }
868 }
869 ctxt->nsTab[ctxt->nsNr++] = prefix;
870 ctxt->nsTab[ctxt->nsNr++] = URL;
871 return (ctxt->nsNr);
872}
873/**
874 * nsPop:
875 * @ctxt: an XML parser context
876 * @nr: the number to pop
877 *
878 * Pops the top @nr parser prefix/namespace from the ns stack
879 *
880 * Returns the number of namespaces removed
881 */
882static int
883nsPop(xmlParserCtxtPtr ctxt, int nr)
884{
885 int i;
886
887 if (ctxt->nsTab == NULL) return(0);
888 if (ctxt->nsNr < nr) {
889 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
890 nr = ctxt->nsNr;
891 }
892 if (ctxt->nsNr <= 0)
893 return (0);
894
895 for (i = 0;i < nr;i++) {
896 ctxt->nsNr--;
897 ctxt->nsTab[ctxt->nsNr] = NULL;
898 }
899 return(nr);
900}
901#endif
902
903static int
904xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
905 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 int maxatts;
908
909 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000910 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000911 atts = (const xmlChar **)
912 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000913 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
916 if (attallocs == NULL) goto mem_error;
917 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000918 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 } else if (nr + 5 > ctxt->maxatts) {
920 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000921 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
922 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000923 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000925 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
926 (maxatts / 5) * sizeof(int));
927 if (attallocs == NULL) goto mem_error;
928 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->maxatts = maxatts;
930 }
931 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000932mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000933 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000934 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000935}
936
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937/**
938 * inputPush:
939 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000940 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 *
942 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 *
944 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000945 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000946extern int
947inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
948{
949 if (ctxt->inputNr >= ctxt->inputMax) {
950 ctxt->inputMax *= 2;
951 ctxt->inputTab =
952 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
953 ctxt->inputMax *
954 sizeof(ctxt->inputTab[0]));
955 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000956 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000957 return (0);
958 }
959 }
960 ctxt->inputTab[ctxt->inputNr] = value;
961 ctxt->input = value;
962 return (ctxt->inputNr++);
963}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000964/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000965 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000966 * @ctxt: an XML parser context
967 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000969 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000970 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972extern xmlParserInputPtr
973inputPop(xmlParserCtxtPtr ctxt)
974{
975 xmlParserInputPtr ret;
976
977 if (ctxt->inputNr <= 0)
978 return (0);
979 ctxt->inputNr--;
980 if (ctxt->inputNr > 0)
981 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
982 else
983 ctxt->input = NULL;
984 ret = ctxt->inputTab[ctxt->inputNr];
985 ctxt->inputTab[ctxt->inputNr] = 0;
986 return (ret);
987}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988/**
989 * nodePush:
990 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000991 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 *
993 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000994 *
995 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000996 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000997extern int
998nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
999{
1000 if (ctxt->nodeNr >= ctxt->nodeMax) {
1001 ctxt->nodeMax *= 2;
1002 ctxt->nodeTab =
1003 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1004 ctxt->nodeMax *
1005 sizeof(ctxt->nodeTab[0]));
1006 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001007 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001008 return (0);
1009 }
1010 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001011 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001012 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001013 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1014 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001015 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001016 return(0);
1017 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 ctxt->nodeTab[ctxt->nodeNr] = value;
1019 ctxt->node = value;
1020 return (ctxt->nodeNr++);
1021}
1022/**
1023 * nodePop:
1024 * @ctxt: an XML parser context
1025 *
1026 * Pops the top element node from the node stack
1027 *
1028 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001029 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030extern xmlNodePtr
1031nodePop(xmlParserCtxtPtr ctxt)
1032{
1033 xmlNodePtr ret;
1034
1035 if (ctxt->nodeNr <= 0)
1036 return (0);
1037 ctxt->nodeNr--;
1038 if (ctxt->nodeNr > 0)
1039 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1040 else
1041 ctxt->node = NULL;
1042 ret = ctxt->nodeTab[ctxt->nodeNr];
1043 ctxt->nodeTab[ctxt->nodeNr] = 0;
1044 return (ret);
1045}
Daniel Veillarda2351322004-06-27 12:08:10 +00001046
1047#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001049 * nameNsPush:
1050 * @ctxt: an XML parser context
1051 * @value: the element name
1052 * @prefix: the element prefix
1053 * @URI: the element namespace name
1054 *
1055 * Pushes a new element name/prefix/URL on top of the name stack
1056 *
1057 * Returns -1 in case of error, the index in the stack otherwise
1058 */
1059static int
1060nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1061 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1062{
1063 if (ctxt->nameNr >= ctxt->nameMax) {
1064 const xmlChar * *tmp;
1065 void **tmp2;
1066 ctxt->nameMax *= 2;
1067 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1068 ctxt->nameMax *
1069 sizeof(ctxt->nameTab[0]));
1070 if (tmp == NULL) {
1071 ctxt->nameMax /= 2;
1072 goto mem_error;
1073 }
1074 ctxt->nameTab = tmp;
1075 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1076 ctxt->nameMax * 3 *
1077 sizeof(ctxt->pushTab[0]));
1078 if (tmp2 == NULL) {
1079 ctxt->nameMax /= 2;
1080 goto mem_error;
1081 }
1082 ctxt->pushTab = tmp2;
1083 }
1084 ctxt->nameTab[ctxt->nameNr] = value;
1085 ctxt->name = value;
1086 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1087 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001088 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089 return (ctxt->nameNr++);
1090mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001091 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001092 return (-1);
1093}
1094/**
1095 * nameNsPop:
1096 * @ctxt: an XML parser context
1097 *
1098 * Pops the top element/prefix/URI name from the name stack
1099 *
1100 * Returns the name just removed
1101 */
1102static const xmlChar *
1103nameNsPop(xmlParserCtxtPtr ctxt)
1104{
1105 const xmlChar *ret;
1106
1107 if (ctxt->nameNr <= 0)
1108 return (0);
1109 ctxt->nameNr--;
1110 if (ctxt->nameNr > 0)
1111 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1112 else
1113 ctxt->name = NULL;
1114 ret = ctxt->nameTab[ctxt->nameNr];
1115 ctxt->nameTab[ctxt->nameNr] = NULL;
1116 return (ret);
1117}
Daniel Veillarda2351322004-06-27 12:08:10 +00001118#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001119
1120/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 * namePush:
1122 * @ctxt: an XML parser context
1123 * @value: the element name
1124 *
1125 * Pushes a new element name on top of the name stack
1126 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001127 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001128 */
1129extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001130namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001131{
1132 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001133 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 ctxt->nameMax *
1137 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001138 if (tmp == NULL) {
1139 ctxt->nameMax /= 2;
1140 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143 }
1144 ctxt->nameTab[ctxt->nameNr] = value;
1145 ctxt->name = value;
1146 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001147mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001148 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150}
1151/**
1152 * namePop:
1153 * @ctxt: an XML parser context
1154 *
1155 * Pops the top element name from the name stack
1156 *
1157 * Returns the name just removed
1158 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001159extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001160namePop(xmlParserCtxtPtr ctxt)
1161{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001162 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001163
1164 if (ctxt->nameNr <= 0)
1165 return (0);
1166 ctxt->nameNr--;
1167 if (ctxt->nameNr > 0)
1168 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1169 else
1170 ctxt->name = NULL;
1171 ret = ctxt->nameTab[ctxt->nameNr];
1172 ctxt->nameTab[ctxt->nameNr] = 0;
1173 return (ret);
1174}
Owen Taylor3473f882001-02-23 17:55:21 +00001175
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001176static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001177 if (ctxt->spaceNr >= ctxt->spaceMax) {
1178 ctxt->spaceMax *= 2;
1179 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1180 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1181 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001182 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001183 return(0);
1184 }
1185 }
1186 ctxt->spaceTab[ctxt->spaceNr] = val;
1187 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1188 return(ctxt->spaceNr++);
1189}
1190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 int ret;
1193 if (ctxt->spaceNr <= 0) return(0);
1194 ctxt->spaceNr--;
1195 if (ctxt->spaceNr > 0)
1196 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1197 else
1198 ctxt->space = NULL;
1199 ret = ctxt->spaceTab[ctxt->spaceNr];
1200 ctxt->spaceTab[ctxt->spaceNr] = -1;
1201 return(ret);
1202}
1203
1204/*
1205 * Macros for accessing the content. Those should be used only by the parser,
1206 * and not exported.
1207 *
1208 * Dirty macros, i.e. one often need to make assumption on the context to
1209 * use them
1210 *
1211 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1212 * To be used with extreme caution since operations consuming
1213 * characters may move the input buffer to a different location !
1214 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1215 * This should be used internally by the parser
1216 * only to compare to ASCII values otherwise it would break when
1217 * running with UTF-8 encoding.
1218 * RAW same as CUR but in the input buffer, bypass any token
1219 * extraction that may have been done
1220 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1221 * to compare on ASCII based substring.
1222 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * strings without newlines within the parser.
1224 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1225 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001226 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1227 *
1228 * NEXT Skip to the next character, this does the proper decoding
1229 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001230 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * CUR_CHAR(l) returns the current unicode character (int), set l
1232 * to the number of xmlChars used for the encoding [0-5].
1233 * CUR_SCHAR same but operate on a string instead of the context
1234 * COPY_BUF copy the current unicode char to the target buffer, increment
1235 * the index
1236 * GROW, SHRINK handling of input buffers
1237 */
1238
Daniel Veillardfdc91562002-07-01 21:52:03 +00001239#define RAW (*ctxt->input->cur)
1240#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001241#define NXT(val) ctxt->input->cur[(val)]
1242#define CUR_PTR ctxt->input->cur
1243
Daniel Veillarda07050d2003-10-19 14:46:32 +00001244#define CMP4( s, c1, c2, c3, c4 ) \
1245 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1246 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1247#define CMP5( s, c1, c2, c3, c4, c5 ) \
1248 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1249#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1250 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1251#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1252 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1253#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1254 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1255#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1256 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1257 ((unsigned char *) s)[ 8 ] == c9 )
1258#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1259 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1260 ((unsigned char *) s)[ 9 ] == c10 )
1261
Owen Taylor3473f882001-02-23 17:55:21 +00001262#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001263 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001264 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001265 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001266 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1267 xmlPopInput(ctxt); \
1268 } while (0)
1269
Daniel Veillard0b787f32004-03-26 17:29:53 +00001270#define SKIPL(val) do { \
1271 int skipl; \
1272 for(skipl=0; skipl<val; skipl++) { \
1273 if (*(ctxt->input->cur) == '\n') { \
1274 ctxt->input->line++; ctxt->input->col = 1; \
1275 } else ctxt->input->col++; \
1276 ctxt->nbChars++; \
1277 ctxt->input->cur++; \
1278 } \
1279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1280 if ((*ctxt->input->cur == 0) && \
1281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillarda880b122003-04-21 21:36:41 +00001285#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001286 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1287 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001288 xmlSHRINK (ctxt);
1289
1290static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1291 xmlParserInputShrink(ctxt->input);
1292 if ((*ctxt->input->cur == 0) &&
1293 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1294 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001295 }
Owen Taylor3473f882001-02-23 17:55:21 +00001296
Daniel Veillarda880b122003-04-21 21:36:41 +00001297#define GROW if ((ctxt->progressive == 0) && \
1298 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001299 xmlGROW (ctxt);
1300
1301static void xmlGROW (xmlParserCtxtPtr ctxt) {
1302 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1303 if ((*ctxt->input->cur == 0) &&
1304 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1305 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001306}
Owen Taylor3473f882001-02-23 17:55:21 +00001307
1308#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1309
1310#define NEXT xmlNextChar(ctxt)
1311
Daniel Veillard21a0f912001-02-25 19:54:14 +00001312#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001313 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001314 ctxt->input->cur++; \
1315 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001316 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1318 }
1319
Owen Taylor3473f882001-02-23 17:55:21 +00001320#define NEXTL(l) do { \
1321 if (*(ctxt->input->cur) == '\n') { \
1322 ctxt->input->line++; ctxt->input->col = 1; \
1323 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001324 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001325 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001326 } while (0)
1327
1328#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1329#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1330
1331#define COPY_BUF(l,b,i,v) \
1332 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001333 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001334
1335/**
1336 * xmlSkipBlankChars:
1337 * @ctxt: the XML parser context
1338 *
1339 * skip all blanks character found at that point in the input streams.
1340 * It pops up finished entities in the process if allowable at that point.
1341 *
1342 * Returns the number of space chars skipped
1343 */
1344
1345int
1346xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001347 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001348
1349 /*
1350 * It's Okay to use CUR/NEXT here since all the blanks are on
1351 * the ASCII range.
1352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001353 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1354 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001355 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001356 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001357 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001358 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001359 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001360 if (*cur == '\n') {
1361 ctxt->input->line++; ctxt->input->col = 1;
1362 }
1363 cur++;
1364 res++;
1365 if (*cur == 0) {
1366 ctxt->input->cur = cur;
1367 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1368 cur = ctxt->input->cur;
1369 }
1370 }
1371 ctxt->input->cur = cur;
1372 } else {
1373 int cur;
1374 do {
1375 cur = CUR;
1376 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1377 NEXT;
1378 cur = CUR;
1379 res++;
1380 }
1381 while ((cur == 0) && (ctxt->inputNr > 1) &&
1382 (ctxt->instate != XML_PARSER_COMMENT)) {
1383 xmlPopInput(ctxt);
1384 cur = CUR;
1385 }
1386 /*
1387 * Need to handle support of entities branching here
1388 */
1389 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1390 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1391 }
Owen Taylor3473f882001-02-23 17:55:21 +00001392 return(res);
1393}
1394
1395/************************************************************************
1396 * *
1397 * Commodity functions to handle entities *
1398 * *
1399 ************************************************************************/
1400
1401/**
1402 * xmlPopInput:
1403 * @ctxt: an XML parser context
1404 *
1405 * xmlPopInput: the current input pointed by ctxt->input came to an end
1406 * pop it and return the next char.
1407 *
1408 * Returns the current xmlChar in the parser context
1409 */
1410xmlChar
1411xmlPopInput(xmlParserCtxtPtr ctxt) {
1412 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1413 if (xmlParserDebugEntities)
1414 xmlGenericError(xmlGenericErrorContext,
1415 "Popping input %d\n", ctxt->inputNr);
1416 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001417 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001418 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1419 return(xmlPopInput(ctxt));
1420 return(CUR);
1421}
1422
1423/**
1424 * xmlPushInput:
1425 * @ctxt: an XML parser context
1426 * @input: an XML parser input fragment (entity, XML fragment ...).
1427 *
1428 * xmlPushInput: switch to a new input stream which is stacked on top
1429 * of the previous one(s).
1430 */
1431void
1432xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1433 if (input == NULL) return;
1434
1435 if (xmlParserDebugEntities) {
1436 if ((ctxt->input != NULL) && (ctxt->input->filename))
1437 xmlGenericError(xmlGenericErrorContext,
1438 "%s(%d): ", ctxt->input->filename,
1439 ctxt->input->line);
1440 xmlGenericError(xmlGenericErrorContext,
1441 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1442 }
1443 inputPush(ctxt, input);
1444 GROW;
1445}
1446
1447/**
1448 * xmlParseCharRef:
1449 * @ctxt: an XML parser context
1450 *
1451 * parse Reference declarations
1452 *
1453 * [66] CharRef ::= '&#' [0-9]+ ';' |
1454 * '&#x' [0-9a-fA-F]+ ';'
1455 *
1456 * [ WFC: Legal Character ]
1457 * Characters referred to using character references must match the
1458 * production for Char.
1459 *
1460 * Returns the value parsed (as an int), 0 in case of error
1461 */
1462int
1463xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001464 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001465 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001466 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001467
Owen Taylor3473f882001-02-23 17:55:21 +00001468 /*
1469 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1470 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001471 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001472 (NXT(2) == 'x')) {
1473 SKIP(3);
1474 GROW;
1475 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001476 if (count++ > 20) {
1477 count = 0;
1478 GROW;
1479 }
1480 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001481 val = val * 16 + (CUR - '0');
1482 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1483 val = val * 16 + (CUR - 'a') + 10;
1484 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1485 val = val * 16 + (CUR - 'A') + 10;
1486 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001487 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001488 val = 0;
1489 break;
1490 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001491 if (val > 0x10FFFF)
1492 outofrange = val;
1493
Owen Taylor3473f882001-02-23 17:55:21 +00001494 NEXT;
1495 count++;
1496 }
1497 if (RAW == ';') {
1498 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001499 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001500 ctxt->nbChars ++;
1501 ctxt->input->cur++;
1502 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001503 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001504 SKIP(2);
1505 GROW;
1506 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001507 if (count++ > 20) {
1508 count = 0;
1509 GROW;
1510 }
1511 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001512 val = val * 10 + (CUR - '0');
1513 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001514 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001515 val = 0;
1516 break;
1517 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001518 if (val > 0x10FFFF)
1519 outofrange = val;
1520
Owen Taylor3473f882001-02-23 17:55:21 +00001521 NEXT;
1522 count++;
1523 }
1524 if (RAW == ';') {
1525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001526 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001527 ctxt->nbChars ++;
1528 ctxt->input->cur++;
1529 }
1530 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001531 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001532 }
1533
1534 /*
1535 * [ WFC: Legal Character ]
1536 * Characters referred to using character references must match the
1537 * production for Char.
1538 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001539 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001540 return(val);
1541 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001542 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1543 "xmlParseCharRef: invalid xmlChar value %d\n",
1544 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001545 }
1546 return(0);
1547}
1548
1549/**
1550 * xmlParseStringCharRef:
1551 * @ctxt: an XML parser context
1552 * @str: a pointer to an index in the string
1553 *
1554 * parse Reference declarations, variant parsing from a string rather
1555 * than an an input flow.
1556 *
1557 * [66] CharRef ::= '&#' [0-9]+ ';' |
1558 * '&#x' [0-9a-fA-F]+ ';'
1559 *
1560 * [ WFC: Legal Character ]
1561 * Characters referred to using character references must match the
1562 * production for Char.
1563 *
1564 * Returns the value parsed (as an int), 0 in case of error, str will be
1565 * updated to the current value of the index
1566 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001567static int
Owen Taylor3473f882001-02-23 17:55:21 +00001568xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1569 const xmlChar *ptr;
1570 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001571 unsigned int val = 0;
1572 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001573
1574 if ((str == NULL) || (*str == NULL)) return(0);
1575 ptr = *str;
1576 cur = *ptr;
1577 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1578 ptr += 3;
1579 cur = *ptr;
1580 while (cur != ';') { /* Non input consuming loop */
1581 if ((cur >= '0') && (cur <= '9'))
1582 val = val * 16 + (cur - '0');
1583 else if ((cur >= 'a') && (cur <= 'f'))
1584 val = val * 16 + (cur - 'a') + 10;
1585 else if ((cur >= 'A') && (cur <= 'F'))
1586 val = val * 16 + (cur - 'A') + 10;
1587 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001588 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001589 val = 0;
1590 break;
1591 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001592 if (val > 0x10FFFF)
1593 outofrange = val;
1594
Owen Taylor3473f882001-02-23 17:55:21 +00001595 ptr++;
1596 cur = *ptr;
1597 }
1598 if (cur == ';')
1599 ptr++;
1600 } else if ((cur == '&') && (ptr[1] == '#')){
1601 ptr += 2;
1602 cur = *ptr;
1603 while (cur != ';') { /* Non input consuming loops */
1604 if ((cur >= '0') && (cur <= '9'))
1605 val = val * 10 + (cur - '0');
1606 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001607 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001608 val = 0;
1609 break;
1610 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001611 if (val > 0x10FFFF)
1612 outofrange = val;
1613
Owen Taylor3473f882001-02-23 17:55:21 +00001614 ptr++;
1615 cur = *ptr;
1616 }
1617 if (cur == ';')
1618 ptr++;
1619 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001621 return(0);
1622 }
1623 *str = ptr;
1624
1625 /*
1626 * [ WFC: Legal Character ]
1627 * Characters referred to using character references must match the
1628 * production for Char.
1629 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001630 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001631 return(val);
1632 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001633 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1634 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1635 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 }
1637 return(0);
1638}
1639
1640/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001641 * xmlNewBlanksWrapperInputStream:
1642 * @ctxt: an XML parser context
1643 * @entity: an Entity pointer
1644 *
1645 * Create a new input stream for wrapping
1646 * blanks around a PEReference
1647 *
1648 * Returns the new input stream or NULL
1649 */
1650
1651static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1652
Daniel Veillardf4862f02002-09-10 11:13:43 +00001653static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001654xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1655 xmlParserInputPtr input;
1656 xmlChar *buffer;
1657 size_t length;
1658 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1660 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001661 return(NULL);
1662 }
1663 if (xmlParserDebugEntities)
1664 xmlGenericError(xmlGenericErrorContext,
1665 "new blanks wrapper for entity: %s\n", entity->name);
1666 input = xmlNewInputStream(ctxt);
1667 if (input == NULL) {
1668 return(NULL);
1669 }
1670 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001671 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001672 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001673 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001674 return(NULL);
1675 }
1676 buffer [0] = ' ';
1677 buffer [1] = '%';
1678 buffer [length-3] = ';';
1679 buffer [length-2] = ' ';
1680 buffer [length-1] = 0;
1681 memcpy(buffer + 2, entity->name, length - 5);
1682 input->free = deallocblankswrapper;
1683 input->base = buffer;
1684 input->cur = buffer;
1685 input->length = length;
1686 input->end = &buffer[length];
1687 return(input);
1688}
1689
1690/**
Owen Taylor3473f882001-02-23 17:55:21 +00001691 * xmlParserHandlePEReference:
1692 * @ctxt: the parser context
1693 *
1694 * [69] PEReference ::= '%' Name ';'
1695 *
1696 * [ WFC: No Recursion ]
1697 * A parsed entity must not contain a recursive
1698 * reference to itself, either directly or indirectly.
1699 *
1700 * [ WFC: Entity Declared ]
1701 * In a document without any DTD, a document with only an internal DTD
1702 * subset which contains no parameter entity references, or a document
1703 * with "standalone='yes'", ... ... The declaration of a parameter
1704 * entity must precede any reference to it...
1705 *
1706 * [ VC: Entity Declared ]
1707 * In a document with an external subset or external parameter entities
1708 * with "standalone='no'", ... ... The declaration of a parameter entity
1709 * must precede any reference to it...
1710 *
1711 * [ WFC: In DTD ]
1712 * Parameter-entity references may only appear in the DTD.
1713 * NOTE: misleading but this is handled.
1714 *
1715 * A PEReference may have been detected in the current input stream
1716 * the handling is done accordingly to
1717 * http://www.w3.org/TR/REC-xml#entproc
1718 * i.e.
1719 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001720 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001721 */
1722void
1723xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001724 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001725 xmlEntityPtr entity = NULL;
1726 xmlParserInputPtr input;
1727
Owen Taylor3473f882001-02-23 17:55:21 +00001728 if (RAW != '%') return;
1729 switch(ctxt->instate) {
1730 case XML_PARSER_CDATA_SECTION:
1731 return;
1732 case XML_PARSER_COMMENT:
1733 return;
1734 case XML_PARSER_START_TAG:
1735 return;
1736 case XML_PARSER_END_TAG:
1737 return;
1738 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001739 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001740 return;
1741 case XML_PARSER_PROLOG:
1742 case XML_PARSER_START:
1743 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001744 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001745 return;
1746 case XML_PARSER_ENTITY_DECL:
1747 case XML_PARSER_CONTENT:
1748 case XML_PARSER_ATTRIBUTE_VALUE:
1749 case XML_PARSER_PI:
1750 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001751 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001752 /* we just ignore it there */
1753 return;
1754 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001755 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001756 return;
1757 case XML_PARSER_ENTITY_VALUE:
1758 /*
1759 * NOTE: in the case of entity values, we don't do the
1760 * substitution here since we need the literal
1761 * entity value to be able to save the internal
1762 * subset of the document.
1763 * This will be handled by xmlStringDecodeEntities
1764 */
1765 return;
1766 case XML_PARSER_DTD:
1767 /*
1768 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1769 * In the internal DTD subset, parameter-entity references
1770 * can occur only where markup declarations can occur, not
1771 * within markup declarations.
1772 * In that case this is handled in xmlParseMarkupDecl
1773 */
1774 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1775 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001776 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001777 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001778 break;
1779 case XML_PARSER_IGNORE:
1780 return;
1781 }
1782
1783 NEXT;
1784 name = xmlParseName(ctxt);
1785 if (xmlParserDebugEntities)
1786 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001787 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001788 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001789 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 } else {
1791 if (RAW == ';') {
1792 NEXT;
1793 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1794 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1795 if (entity == NULL) {
1796
1797 /*
1798 * [ WFC: Entity Declared ]
1799 * In a document without any DTD, a document with only an
1800 * internal DTD subset which contains no parameter entity
1801 * references, or a document with "standalone='yes'", ...
1802 * ... The declaration of a parameter entity must precede
1803 * any reference to it...
1804 */
1805 if ((ctxt->standalone == 1) ||
1806 ((ctxt->hasExternalSubset == 0) &&
1807 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001808 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001809 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 } else {
1811 /*
1812 * [ VC: Entity Declared ]
1813 * In a document with an external subset or external
1814 * parameter entities with "standalone='no'", ...
1815 * ... The declaration of a parameter entity must precede
1816 * any reference to it...
1817 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001818 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1819 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1820 "PEReference: %%%s; not found\n",
1821 name);
1822 } else
1823 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1824 "PEReference: %%%s; not found\n",
1825 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001826 ctxt->valid = 0;
1827 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001828 } else if (ctxt->input->free != deallocblankswrapper) {
1829 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1830 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
1832 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1833 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001834 xmlChar start[4];
1835 xmlCharEncoding enc;
1836
Owen Taylor3473f882001-02-23 17:55:21 +00001837 /*
1838 * handle the extra spaces added before and after
1839 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001840 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001841 */
1842 input = xmlNewEntityInputStream(ctxt, entity);
1843 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001844
1845 /*
1846 * Get the 4 first bytes and decode the charset
1847 * if enc != XML_CHAR_ENCODING_NONE
1848 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001849 * Note that, since we may have some non-UTF8
1850 * encoding (like UTF16, bug 135229), the 'length'
1851 * is not known, but we can calculate based upon
1852 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001853 */
1854 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001855 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001856 start[0] = RAW;
1857 start[1] = NXT(1);
1858 start[2] = NXT(2);
1859 start[3] = NXT(3);
1860 enc = xmlDetectCharEncoding(start, 4);
1861 if (enc != XML_CHAR_ENCODING_NONE) {
1862 xmlSwitchEncoding(ctxt, enc);
1863 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001864 }
1865
Owen Taylor3473f882001-02-23 17:55:21 +00001866 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001867 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1868 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001869 xmlParseTextDecl(ctxt);
1870 }
Owen Taylor3473f882001-02-23 17:55:21 +00001871 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001872 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1873 "PEReference: %s is not a parameter entity\n",
1874 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001875 }
1876 }
1877 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001878 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001879 }
Owen Taylor3473f882001-02-23 17:55:21 +00001880 }
1881}
1882
1883/*
1884 * Macro used to grow the current buffer.
1885 */
1886#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001887 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001888 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001889 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001890 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001891 if (tmp == NULL) goto mem_error; \
1892 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001893}
1894
1895/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001896 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001897 * @ctxt: the parser context
1898 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001899 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001900 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1901 * @end: an end marker xmlChar, 0 if none
1902 * @end2: an end marker xmlChar, 0 if none
1903 * @end3: an end marker xmlChar, 0 if none
1904 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001905 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001906 *
1907 * [67] Reference ::= EntityRef | CharRef
1908 *
1909 * [69] PEReference ::= '%' Name ';'
1910 *
1911 * Returns A newly allocated string with the substitution done. The caller
1912 * must deallocate it !
1913 */
1914xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1916 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001917 xmlChar *buffer = NULL;
1918 int buffer_size = 0;
1919
1920 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001922 xmlEntityPtr ent;
1923 int c,l;
1924 int nbchars = 0;
1925
Daniel Veillarde57ec792003-09-10 10:50:59 +00001926 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001927 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001928 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001929
1930 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001931 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001932 return(NULL);
1933 }
1934
1935 /*
1936 * allocate a translation buffer.
1937 */
1938 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001939 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001940 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001941
1942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * we are operating on already parsed values.
1945 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001946 if (str < last)
1947 c = CUR_SCHAR(str, l);
1948 else
1949 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001950 while ((c != 0) && (c != end) && /* non input consuming loop */
1951 (c != end2) && (c != end3)) {
1952
1953 if (c == 0) break;
1954 if ((c == '&') && (str[1] == '#')) {
1955 int val = xmlParseStringCharRef(ctxt, &str);
1956 if (val != 0) {
1957 COPY_BUF(0,buffer,nbchars,val);
1958 }
1959 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1960 if (xmlParserDebugEntities)
1961 xmlGenericError(xmlGenericErrorContext,
1962 "String decoding Entity Reference: %.30s\n",
1963 str);
1964 ent = xmlParseStringEntityRef(ctxt, &str);
1965 if ((ent != NULL) &&
1966 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1967 if (ent->content != NULL) {
1968 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1969 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001970 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1971 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001972 }
1973 } else if ((ent != NULL) && (ent->content != NULL)) {
1974 xmlChar *rep;
1975
1976 ctxt->depth++;
1977 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1978 0, 0, 0);
1979 ctxt->depth--;
1980 if (rep != NULL) {
1981 current = rep;
1982 while (*current != 0) { /* non input consuming loop */
1983 buffer[nbchars++] = *current++;
1984 if (nbchars >
1985 buffer_size - XML_PARSER_BUFFER_SIZE) {
1986 growBuffer(buffer);
1987 }
1988 }
1989 xmlFree(rep);
1990 }
1991 } else if (ent != NULL) {
1992 int i = xmlStrlen(ent->name);
1993 const xmlChar *cur = ent->name;
1994
1995 buffer[nbchars++] = '&';
1996 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1997 growBuffer(buffer);
1998 }
1999 for (;i > 0;i--)
2000 buffer[nbchars++] = *cur++;
2001 buffer[nbchars++] = ';';
2002 }
2003 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2004 if (xmlParserDebugEntities)
2005 xmlGenericError(xmlGenericErrorContext,
2006 "String decoding PE Reference: %.30s\n", str);
2007 ent = xmlParseStringPEReference(ctxt, &str);
2008 if (ent != NULL) {
2009 xmlChar *rep;
2010
2011 ctxt->depth++;
2012 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2013 0, 0, 0);
2014 ctxt->depth--;
2015 if (rep != NULL) {
2016 current = rep;
2017 while (*current != 0) { /* non input consuming loop */
2018 buffer[nbchars++] = *current++;
2019 if (nbchars >
2020 buffer_size - XML_PARSER_BUFFER_SIZE) {
2021 growBuffer(buffer);
2022 }
2023 }
2024 xmlFree(rep);
2025 }
2026 }
2027 } else {
2028 COPY_BUF(l,buffer,nbchars,c);
2029 str += l;
2030 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2031 growBuffer(buffer);
2032 }
2033 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002034 if (str < last)
2035 c = CUR_SCHAR(str, l);
2036 else
2037 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002038 }
2039 buffer[nbchars++] = 0;
2040 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002041
2042mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002043 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002044 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002045}
2046
Daniel Veillarde57ec792003-09-10 10:50:59 +00002047/**
2048 * xmlStringDecodeEntities:
2049 * @ctxt: the parser context
2050 * @str: the input string
2051 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2052 * @end: an end marker xmlChar, 0 if none
2053 * @end2: an end marker xmlChar, 0 if none
2054 * @end3: an end marker xmlChar, 0 if none
2055 *
2056 * Takes a entity string content and process to do the adequate substitutions.
2057 *
2058 * [67] Reference ::= EntityRef | CharRef
2059 *
2060 * [69] PEReference ::= '%' Name ';'
2061 *
2062 * Returns A newly allocated string with the substitution done. The caller
2063 * must deallocate it !
2064 */
2065xmlChar *
2066xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2067 xmlChar end, xmlChar end2, xmlChar end3) {
2068 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2069 end, end2, end3));
2070}
Owen Taylor3473f882001-02-23 17:55:21 +00002071
2072/************************************************************************
2073 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002074 * Commodity functions, cleanup needed ? *
2075 * *
2076 ************************************************************************/
2077
2078/**
2079 * areBlanks:
2080 * @ctxt: an XML parser context
2081 * @str: a xmlChar *
2082 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002083 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002084 *
2085 * Is this a sequence of blank chars that one can ignore ?
2086 *
2087 * Returns 1 if ignorable 0 otherwise.
2088 */
2089
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002090static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2091 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 int i, ret;
2093 xmlNodePtr lastChild;
2094
Daniel Veillard05c13a22001-09-09 08:38:09 +00002095 /*
2096 * Don't spend time trying to differentiate them, the same callback is
2097 * used !
2098 */
2099 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002100 return(0);
2101
Owen Taylor3473f882001-02-23 17:55:21 +00002102 /*
2103 * Check for xml:space value.
2104 */
2105 if (*(ctxt->space) == 1)
2106 return(0);
2107
2108 /*
2109 * Check that the string is made of blanks
2110 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002111 if (blank_chars == 0) {
2112 for (i = 0;i < len;i++)
2113 if (!(IS_BLANK_CH(str[i]))) return(0);
2114 }
Owen Taylor3473f882001-02-23 17:55:21 +00002115
2116 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002117 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002118 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002119 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002120 if (ctxt->myDoc != NULL) {
2121 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2122 if (ret == 0) return(1);
2123 if (ret == 1) return(0);
2124 }
2125
2126 /*
2127 * Otherwise, heuristic :-\
2128 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002129 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002130 if ((ctxt->node->children == NULL) &&
2131 (RAW == '<') && (NXT(1) == '/')) return(0);
2132
2133 lastChild = xmlGetLastChild(ctxt->node);
2134 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002135 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2136 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 } else if (xmlNodeIsText(lastChild))
2138 return(0);
2139 else if ((ctxt->node->children != NULL) &&
2140 (xmlNodeIsText(ctxt->node->children)))
2141 return(0);
2142 return(1);
2143}
2144
Owen Taylor3473f882001-02-23 17:55:21 +00002145/************************************************************************
2146 * *
2147 * Extra stuff for namespace support *
2148 * Relates to http://www.w3.org/TR/WD-xml-names *
2149 * *
2150 ************************************************************************/
2151
2152/**
2153 * xmlSplitQName:
2154 * @ctxt: an XML parser context
2155 * @name: an XML parser context
2156 * @prefix: a xmlChar **
2157 *
2158 * parse an UTF8 encoded XML qualified name string
2159 *
2160 * [NS 5] QName ::= (Prefix ':')? LocalPart
2161 *
2162 * [NS 6] Prefix ::= NCName
2163 *
2164 * [NS 7] LocalPart ::= NCName
2165 *
2166 * Returns the local part, and prefix is updated
2167 * to get the Prefix if any.
2168 */
2169
2170xmlChar *
2171xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2172 xmlChar buf[XML_MAX_NAMELEN + 5];
2173 xmlChar *buffer = NULL;
2174 int len = 0;
2175 int max = XML_MAX_NAMELEN;
2176 xmlChar *ret = NULL;
2177 const xmlChar *cur = name;
2178 int c;
2179
2180 *prefix = NULL;
2181
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002182 if (cur == NULL) return(NULL);
2183
Owen Taylor3473f882001-02-23 17:55:21 +00002184#ifndef XML_XML_NAMESPACE
2185 /* xml: prefix is not really a namespace */
2186 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2187 (cur[2] == 'l') && (cur[3] == ':'))
2188 return(xmlStrdup(name));
2189#endif
2190
Daniel Veillard597bc482003-07-24 16:08:28 +00002191 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002192 if (cur[0] == ':')
2193 return(xmlStrdup(name));
2194
2195 c = *cur++;
2196 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2197 buf[len++] = c;
2198 c = *cur++;
2199 }
2200 if (len >= max) {
2201 /*
2202 * Okay someone managed to make a huge name, so he's ready to pay
2203 * for the processing speed.
2204 */
2205 max = len * 2;
2206
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002207 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002208 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 return(NULL);
2211 }
2212 memcpy(buffer, buf, len);
2213 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2214 if (len + 10 > max) {
2215 max *= 2;
2216 buffer = (xmlChar *) xmlRealloc(buffer,
2217 max * sizeof(xmlChar));
2218 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002219 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002220 return(NULL);
2221 }
2222 }
2223 buffer[len++] = c;
2224 c = *cur++;
2225 }
2226 buffer[len] = 0;
2227 }
2228
Daniel Veillard597bc482003-07-24 16:08:28 +00002229 /* nasty but well=formed
2230 if ((c == ':') && (*cur == 0)) {
2231 return(xmlStrdup(name));
2232 } */
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 if (buffer == NULL)
2235 ret = xmlStrndup(buf, len);
2236 else {
2237 ret = buffer;
2238 buffer = NULL;
2239 max = XML_MAX_NAMELEN;
2240 }
2241
2242
2243 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002244 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002245 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002246 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002247 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002248 }
Owen Taylor3473f882001-02-23 17:55:21 +00002249 len = 0;
2250
Daniel Veillardbb284f42002-10-16 18:02:47 +00002251 /*
2252 * Check that the first character is proper to start
2253 * a new name
2254 */
2255 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2256 ((c >= 0x41) && (c <= 0x5A)) ||
2257 (c == '_') || (c == ':'))) {
2258 int l;
2259 int first = CUR_SCHAR(cur, l);
2260
2261 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002262 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002263 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002264 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 }
2266 }
2267 cur++;
2268
Owen Taylor3473f882001-02-23 17:55:21 +00002269 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2270 buf[len++] = c;
2271 c = *cur++;
2272 }
2273 if (len >= max) {
2274 /*
2275 * Okay someone managed to make a huge name, so he's ready to pay
2276 * for the processing speed.
2277 */
2278 max = len * 2;
2279
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002280 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002282 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002283 return(NULL);
2284 }
2285 memcpy(buffer, buf, len);
2286 while (c != 0) { /* tested bigname2.xml */
2287 if (len + 10 > max) {
2288 max *= 2;
2289 buffer = (xmlChar *) xmlRealloc(buffer,
2290 max * sizeof(xmlChar));
2291 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return(NULL);
2294 }
2295 }
2296 buffer[len++] = c;
2297 c = *cur++;
2298 }
2299 buffer[len] = 0;
2300 }
2301
2302 if (buffer == NULL)
2303 ret = xmlStrndup(buf, len);
2304 else {
2305 ret = buffer;
2306 }
2307 }
2308
2309 return(ret);
2310}
2311
2312/************************************************************************
2313 * *
2314 * The parser itself *
2315 * Relates to http://www.w3.org/TR/REC-xml *
2316 * *
2317 ************************************************************************/
2318
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002319static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002320static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002321 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002322
Owen Taylor3473f882001-02-23 17:55:21 +00002323/**
2324 * xmlParseName:
2325 * @ctxt: an XML parser context
2326 *
2327 * parse an XML name.
2328 *
2329 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2330 * CombiningChar | Extender
2331 *
2332 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2333 *
2334 * [6] Names ::= Name (S Name)*
2335 *
2336 * Returns the Name parsed or NULL
2337 */
2338
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002339const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002340xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002341 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002342 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002343 int count = 0;
2344
2345 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002346
2347 /*
2348 * Accelerator for simple ASCII names
2349 */
2350 in = ctxt->input->cur;
2351 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2352 ((*in >= 0x41) && (*in <= 0x5A)) ||
2353 (*in == '_') || (*in == ':')) {
2354 in++;
2355 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2356 ((*in >= 0x41) && (*in <= 0x5A)) ||
2357 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002358 (*in == '_') || (*in == '-') ||
2359 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002360 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002361 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002362 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002363 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002364 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002365 ctxt->nbChars += count;
2366 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002367 if (ret == NULL)
2368 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002369 return(ret);
2370 }
2371 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002372 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002373}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002374
Daniel Veillard46de64e2002-05-29 08:21:33 +00002375/**
2376 * xmlParseNameAndCompare:
2377 * @ctxt: an XML parser context
2378 *
2379 * parse an XML name and compares for match
2380 * (specialized for endtag parsing)
2381 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002382 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2383 * and the name for mismatch
2384 */
2385
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002386static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002387xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002388 register const xmlChar *cmp = other;
2389 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002390 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002391
2392 GROW;
2393
2394 in = ctxt->input->cur;
2395 while (*in != 0 && *in == *cmp) {
2396 ++in;
2397 ++cmp;
2398 }
William M. Brack76e95df2003-10-18 16:20:14 +00002399 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400 /* success */
2401 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002402 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002403 }
2404 /* failure (or end of input buffer), check with full function */
2405 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002406 /* strings coming from the dictionnary direct compare possible */
2407 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002408 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002409 }
2410 return ret;
2411}
2412
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002413static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002414xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002415 int len = 0, l;
2416 int c;
2417 int count = 0;
2418
2419 /*
2420 * Handler for more complex cases
2421 */
2422 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002423 c = CUR_CHAR(l);
2424 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2425 (!IS_LETTER(c) && (c != '_') &&
2426 (c != ':'))) {
2427 return(NULL);
2428 }
2429
2430 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002431 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002432 (c == '.') || (c == '-') ||
2433 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002434 (IS_COMBINING(c)) ||
2435 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002436 if (count++ > 100) {
2437 count = 0;
2438 GROW;
2439 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002440 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002441 NEXTL(l);
2442 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002443 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002444 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002445}
2446
2447/**
2448 * xmlParseStringName:
2449 * @ctxt: an XML parser context
2450 * @str: a pointer to the string pointer (IN/OUT)
2451 *
2452 * parse an XML name.
2453 *
2454 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2455 * CombiningChar | Extender
2456 *
2457 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2458 *
2459 * [6] Names ::= Name (S Name)*
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002462 * is updated to the current location in the string.
2463 */
2464
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002465static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002466xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2467 xmlChar buf[XML_MAX_NAMELEN + 5];
2468 const xmlChar *cur = *str;
2469 int len = 0, l;
2470 int c;
2471
2472 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002473 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002474 (c != ':')) {
2475 return(NULL);
2476 }
2477
William M. Brack871611b2003-10-18 04:53:14 +00002478 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002479 (c == '.') || (c == '-') ||
2480 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002481 (IS_COMBINING(c)) ||
2482 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002483 COPY_BUF(l,buf,len,c);
2484 cur += l;
2485 c = CUR_SCHAR(cur, l);
2486 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2487 /*
2488 * Okay someone managed to make a huge name, so he's ready to pay
2489 * for the processing speed.
2490 */
2491 xmlChar *buffer;
2492 int max = len * 2;
2493
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002494 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002495 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return(NULL);
2498 }
2499 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002500 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002501 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002502 (c == '.') || (c == '-') ||
2503 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002504 (IS_COMBINING(c)) ||
2505 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002506 if (len + 10 > max) {
2507 max *= 2;
2508 buffer = (xmlChar *) xmlRealloc(buffer,
2509 max * sizeof(xmlChar));
2510 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002511 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002512 return(NULL);
2513 }
2514 }
2515 COPY_BUF(l,buffer,len,c);
2516 cur += l;
2517 c = CUR_SCHAR(cur, l);
2518 }
2519 buffer[len] = 0;
2520 *str = cur;
2521 return(buffer);
2522 }
2523 }
2524 *str = cur;
2525 return(xmlStrndup(buf, len));
2526}
2527
2528/**
2529 * xmlParseNmtoken:
2530 * @ctxt: an XML parser context
2531 *
2532 * parse an XML Nmtoken.
2533 *
2534 * [7] Nmtoken ::= (NameChar)+
2535 *
2536 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2537 *
2538 * Returns the Nmtoken parsed or NULL
2539 */
2540
2541xmlChar *
2542xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2543 xmlChar buf[XML_MAX_NAMELEN + 5];
2544 int len = 0, l;
2545 int c;
2546 int count = 0;
2547
2548 GROW;
2549 c = CUR_CHAR(l);
2550
William M. Brack871611b2003-10-18 04:53:14 +00002551 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002552 (c == '.') || (c == '-') ||
2553 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002554 (IS_COMBINING(c)) ||
2555 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002556 if (count++ > 100) {
2557 count = 0;
2558 GROW;
2559 }
2560 COPY_BUF(l,buf,len,c);
2561 NEXTL(l);
2562 c = CUR_CHAR(l);
2563 if (len >= XML_MAX_NAMELEN) {
2564 /*
2565 * Okay someone managed to make a huge token, so he's ready to pay
2566 * for the processing speed.
2567 */
2568 xmlChar *buffer;
2569 int max = len * 2;
2570
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002571 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002572 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002573 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002574 return(NULL);
2575 }
2576 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002577 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002578 (c == '.') || (c == '-') ||
2579 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002580 (IS_COMBINING(c)) ||
2581 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (count++ > 100) {
2583 count = 0;
2584 GROW;
2585 }
2586 if (len + 10 > max) {
2587 max *= 2;
2588 buffer = (xmlChar *) xmlRealloc(buffer,
2589 max * sizeof(xmlChar));
2590 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002591 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002592 return(NULL);
2593 }
2594 }
2595 COPY_BUF(l,buffer,len,c);
2596 NEXTL(l);
2597 c = CUR_CHAR(l);
2598 }
2599 buffer[len] = 0;
2600 return(buffer);
2601 }
2602 }
2603 if (len == 0)
2604 return(NULL);
2605 return(xmlStrndup(buf, len));
2606}
2607
2608/**
2609 * xmlParseEntityValue:
2610 * @ctxt: an XML parser context
2611 * @orig: if non-NULL store a copy of the original entity value
2612 *
2613 * parse a value for ENTITY declarations
2614 *
2615 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2616 * "'" ([^%&'] | PEReference | Reference)* "'"
2617 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002618 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002619 */
2620
2621xmlChar *
2622xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2623 xmlChar *buf = NULL;
2624 int len = 0;
2625 int size = XML_PARSER_BUFFER_SIZE;
2626 int c, l;
2627 xmlChar stop;
2628 xmlChar *ret = NULL;
2629 const xmlChar *cur = NULL;
2630 xmlParserInputPtr input;
2631
2632 if (RAW == '"') stop = '"';
2633 else if (RAW == '\'') stop = '\'';
2634 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002635 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002636 return(NULL);
2637 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002638 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002639 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002640 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002641 return(NULL);
2642 }
2643
2644 /*
2645 * The content of the entity definition is copied in a buffer.
2646 */
2647
2648 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2649 input = ctxt->input;
2650 GROW;
2651 NEXT;
2652 c = CUR_CHAR(l);
2653 /*
2654 * NOTE: 4.4.5 Included in Literal
2655 * When a parameter entity reference appears in a literal entity
2656 * value, ... a single or double quote character in the replacement
2657 * text is always treated as a normal data character and will not
2658 * terminate the literal.
2659 * In practice it means we stop the loop only when back at parsing
2660 * the initial entity and the quote is found
2661 */
William M. Brack871611b2003-10-18 04:53:14 +00002662 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002663 (ctxt->input != input))) {
2664 if (len + 5 >= size) {
2665 size *= 2;
2666 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2667 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002668 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002669 return(NULL);
2670 }
2671 }
2672 COPY_BUF(l,buf,len,c);
2673 NEXTL(l);
2674 /*
2675 * Pop-up of finished entities.
2676 */
2677 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2678 xmlPopInput(ctxt);
2679
2680 GROW;
2681 c = CUR_CHAR(l);
2682 if (c == 0) {
2683 GROW;
2684 c = CUR_CHAR(l);
2685 }
2686 }
2687 buf[len] = 0;
2688
2689 /*
2690 * Raise problem w.r.t. '&' and '%' being used in non-entities
2691 * reference constructs. Note Charref will be handled in
2692 * xmlStringDecodeEntities()
2693 */
2694 cur = buf;
2695 while (*cur != 0) { /* non input consuming */
2696 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2697 xmlChar *name;
2698 xmlChar tmp = *cur;
2699
2700 cur++;
2701 name = xmlParseStringName(ctxt, &cur);
2702 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002703 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002704 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002705 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002707 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2708 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002709 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
2711 if (name != NULL)
2712 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002713 if (*cur == 0)
2714 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002715 }
2716 cur++;
2717 }
2718
2719 /*
2720 * Then PEReference entities are substituted.
2721 */
2722 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002723 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 xmlFree(buf);
2725 } else {
2726 NEXT;
2727 /*
2728 * NOTE: 4.4.7 Bypassed
2729 * When a general entity reference appears in the EntityValue in
2730 * an entity declaration, it is bypassed and left as is.
2731 * so XML_SUBSTITUTE_REF is not set here.
2732 */
2733 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2734 0, 0, 0);
2735 if (orig != NULL)
2736 *orig = buf;
2737 else
2738 xmlFree(buf);
2739 }
2740
2741 return(ret);
2742}
2743
2744/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002745 * xmlParseAttValueComplex:
2746 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002747 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002748 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002749 *
2750 * parse a value for an attribute, this is the fallback function
2751 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002752 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002753 *
2754 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2755 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002756static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002757xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002758 xmlChar limit = 0;
2759 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002760 int len = 0;
2761 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002762 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002763 xmlChar *current = NULL;
2764 xmlEntityPtr ent;
2765
Owen Taylor3473f882001-02-23 17:55:21 +00002766 if (NXT(0) == '"') {
2767 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2768 limit = '"';
2769 NEXT;
2770 } else if (NXT(0) == '\'') {
2771 limit = '\'';
2772 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2773 NEXT;
2774 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002775 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002776 return(NULL);
2777 }
2778
2779 /*
2780 * allocate a translation buffer.
2781 */
2782 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002783 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002784 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002785
2786 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002787 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002788 */
2789 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002790 while ((NXT(0) != limit) && /* checked */
2791 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002792 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002793 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002794 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002795 if (NXT(1) == '#') {
2796 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002797
Owen Taylor3473f882001-02-23 17:55:21 +00002798 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002799 if (ctxt->replaceEntities) {
2800 if (len > buf_size - 10) {
2801 growBuffer(buf);
2802 }
2803 buf[len++] = '&';
2804 } else {
2805 /*
2806 * The reparsing will be done in xmlStringGetNodeList()
2807 * called by the attribute() function in SAX.c
2808 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002809 if (len > buf_size - 10) {
2810 growBuffer(buf);
2811 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002812 buf[len++] = '&';
2813 buf[len++] = '#';
2814 buf[len++] = '3';
2815 buf[len++] = '8';
2816 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002817 }
2818 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002819 if (len > buf_size - 10) {
2820 growBuffer(buf);
2821 }
Owen Taylor3473f882001-02-23 17:55:21 +00002822 len += xmlCopyChar(0, &buf[len], val);
2823 }
2824 } else {
2825 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002826 if ((ent != NULL) &&
2827 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2828 if (len > buf_size - 10) {
2829 growBuffer(buf);
2830 }
2831 if ((ctxt->replaceEntities == 0) &&
2832 (ent->content[0] == '&')) {
2833 buf[len++] = '&';
2834 buf[len++] = '#';
2835 buf[len++] = '3';
2836 buf[len++] = '8';
2837 buf[len++] = ';';
2838 } else {
2839 buf[len++] = ent->content[0];
2840 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002841 } else if ((ent != NULL) &&
2842 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002843 xmlChar *rep;
2844
2845 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2846 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002847 XML_SUBSTITUTE_REF,
2848 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002849 if (rep != NULL) {
2850 current = rep;
2851 while (*current != 0) { /* non input consuming */
2852 buf[len++] = *current++;
2853 if (len > buf_size - 10) {
2854 growBuffer(buf);
2855 }
2856 }
2857 xmlFree(rep);
2858 }
2859 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002860 if (len > buf_size - 10) {
2861 growBuffer(buf);
2862 }
Owen Taylor3473f882001-02-23 17:55:21 +00002863 if (ent->content != NULL)
2864 buf[len++] = ent->content[0];
2865 }
2866 } else if (ent != NULL) {
2867 int i = xmlStrlen(ent->name);
2868 const xmlChar *cur = ent->name;
2869
2870 /*
2871 * This may look absurd but is needed to detect
2872 * entities problems
2873 */
2874 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2875 (ent->content != NULL)) {
2876 xmlChar *rep;
2877 rep = xmlStringDecodeEntities(ctxt, ent->content,
2878 XML_SUBSTITUTE_REF, 0, 0, 0);
2879 if (rep != NULL)
2880 xmlFree(rep);
2881 }
2882
2883 /*
2884 * Just output the reference
2885 */
2886 buf[len++] = '&';
2887 if (len > buf_size - i - 10) {
2888 growBuffer(buf);
2889 }
2890 for (;i > 0;i--)
2891 buf[len++] = *cur++;
2892 buf[len++] = ';';
2893 }
2894 }
2895 } else {
2896 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002897 if ((len != 0) || (!normalize)) {
2898 if ((!normalize) || (!in_space)) {
2899 COPY_BUF(l,buf,len,0x20);
2900 if (len > buf_size - 10) {
2901 growBuffer(buf);
2902 }
2903 }
2904 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002905 }
2906 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002907 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002908 COPY_BUF(l,buf,len,c);
2909 if (len > buf_size - 10) {
2910 growBuffer(buf);
2911 }
2912 }
2913 NEXTL(l);
2914 }
2915 GROW;
2916 c = CUR_CHAR(l);
2917 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002918 if ((in_space) && (normalize)) {
2919 while (buf[len - 1] == 0x20) len--;
2920 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002921 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002923 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002924 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002925 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2926 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002927 } else
2928 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002929 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002930 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002931
2932mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002933 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002934 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002935}
2936
2937/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002938 * xmlParseAttValue:
2939 * @ctxt: an XML parser context
2940 *
2941 * parse a value for an attribute
2942 * Note: the parser won't do substitution of entities here, this
2943 * will be handled later in xmlStringGetNodeList
2944 *
2945 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2946 * "'" ([^<&'] | Reference)* "'"
2947 *
2948 * 3.3.3 Attribute-Value Normalization:
2949 * Before the value of an attribute is passed to the application or
2950 * checked for validity, the XML processor must normalize it as follows:
2951 * - a character reference is processed by appending the referenced
2952 * character to the attribute value
2953 * - an entity reference is processed by recursively processing the
2954 * replacement text of the entity
2955 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2956 * appending #x20 to the normalized value, except that only a single
2957 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2958 * parsed entity or the literal entity value of an internal parsed entity
2959 * - other characters are processed by appending them to the normalized value
2960 * If the declared value is not CDATA, then the XML processor must further
2961 * process the normalized attribute value by discarding any leading and
2962 * trailing space (#x20) characters, and by replacing sequences of space
2963 * (#x20) characters by a single space (#x20) character.
2964 * All attributes for which no declaration has been read should be treated
2965 * by a non-validating parser as if declared CDATA.
2966 *
2967 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2968 */
2969
2970
2971xmlChar *
2972xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002973 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002974}
2975
2976/**
Owen Taylor3473f882001-02-23 17:55:21 +00002977 * xmlParseSystemLiteral:
2978 * @ctxt: an XML parser context
2979 *
2980 * parse an XML Literal
2981 *
2982 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2983 *
2984 * Returns the SystemLiteral parsed or NULL
2985 */
2986
2987xmlChar *
2988xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2989 xmlChar *buf = NULL;
2990 int len = 0;
2991 int size = XML_PARSER_BUFFER_SIZE;
2992 int cur, l;
2993 xmlChar stop;
2994 int state = ctxt->instate;
2995 int count = 0;
2996
2997 SHRINK;
2998 if (RAW == '"') {
2999 NEXT;
3000 stop = '"';
3001 } else if (RAW == '\'') {
3002 NEXT;
3003 stop = '\'';
3004 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003009 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003010 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003011 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003012 return(NULL);
3013 }
3014 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3015 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003016 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (len + 5 >= size) {
3018 size *= 2;
3019 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3020 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003021 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003022 ctxt->instate = (xmlParserInputState) state;
3023 return(NULL);
3024 }
3025 }
3026 count++;
3027 if (count > 50) {
3028 GROW;
3029 count = 0;
3030 }
3031 COPY_BUF(l,buf,len,cur);
3032 NEXTL(l);
3033 cur = CUR_CHAR(l);
3034 if (cur == 0) {
3035 GROW;
3036 SHRINK;
3037 cur = CUR_CHAR(l);
3038 }
3039 }
3040 buf[len] = 0;
3041 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003042 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003043 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003044 } else {
3045 NEXT;
3046 }
3047 return(buf);
3048}
3049
3050/**
3051 * xmlParsePubidLiteral:
3052 * @ctxt: an XML parser context
3053 *
3054 * parse an XML public literal
3055 *
3056 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3057 *
3058 * Returns the PubidLiteral parsed or NULL.
3059 */
3060
3061xmlChar *
3062xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3063 xmlChar *buf = NULL;
3064 int len = 0;
3065 int size = XML_PARSER_BUFFER_SIZE;
3066 xmlChar cur;
3067 xmlChar stop;
3068 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003069 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003070
3071 SHRINK;
3072 if (RAW == '"') {
3073 NEXT;
3074 stop = '"';
3075 } else if (RAW == '\'') {
3076 NEXT;
3077 stop = '\'';
3078 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003079 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003080 return(NULL);
3081 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003082 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003083 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003085 return(NULL);
3086 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003087 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003088 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003089 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003090 if (len + 1 >= size) {
3091 size *= 2;
3092 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3093 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003094 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003095 return(NULL);
3096 }
3097 }
3098 buf[len++] = cur;
3099 count++;
3100 if (count > 50) {
3101 GROW;
3102 count = 0;
3103 }
3104 NEXT;
3105 cur = CUR;
3106 if (cur == 0) {
3107 GROW;
3108 SHRINK;
3109 cur = CUR;
3110 }
3111 }
3112 buf[len] = 0;
3113 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003114 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003115 } else {
3116 NEXT;
3117 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003118 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003119 return(buf);
3120}
3121
Daniel Veillard48b2f892001-02-25 16:11:03 +00003122void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003123/**
3124 * xmlParseCharData:
3125 * @ctxt: an XML parser context
3126 * @cdata: int indicating whether we are within a CDATA section
3127 *
3128 * parse a CharData section.
3129 * if we are within a CDATA section ']]>' marks an end of section.
3130 *
3131 * The right angle bracket (>) may be represented using the string "&gt;",
3132 * and must, for compatibility, be escaped using "&gt;" or a character
3133 * reference when it appears in the string "]]>" in content, when that
3134 * string is not marking the end of a CDATA section.
3135 *
3136 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3137 */
3138
3139void
3140xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003141 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003143 int line = ctxt->input->line;
3144 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003145
3146 SHRINK;
3147 GROW;
3148 /*
3149 * Accelerated common case where input don't need to be
3150 * modified before passing it to the handler.
3151 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003152 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003153 in = ctxt->input->cur;
3154 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003155get_more_space:
3156 while (*in == 0x20) in++;
3157 if (*in == 0xA) {
3158 ctxt->input->line++;
3159 in++;
3160 while (*in == 0xA) {
3161 ctxt->input->line++;
3162 in++;
3163 }
3164 goto get_more_space;
3165 }
3166 if (*in == '<') {
3167 nbchar = in - ctxt->input->cur;
3168 if (nbchar > 0) {
3169 const xmlChar *tmp = ctxt->input->cur;
3170 ctxt->input->cur = in;
3171
3172 if (ctxt->sax->ignorableWhitespace !=
3173 ctxt->sax->characters) {
3174 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3175 ctxt->sax->ignorableWhitespace(ctxt->userData,
3176 tmp, nbchar);
3177 } else if (ctxt->sax->characters != NULL)
3178 ctxt->sax->characters(ctxt->userData,
3179 tmp, nbchar);
3180 } else if (ctxt->sax->characters != NULL) {
3181 ctxt->sax->characters(ctxt->userData,
3182 tmp, nbchar);
3183 }
3184 }
3185 return;
3186 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003187get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003188 while (((*in > ']') && (*in <= 0x7F)) ||
3189 ((*in > '&') && (*in < '<')) ||
3190 ((*in > '<') && (*in < ']')) ||
3191 ((*in >= 0x20) && (*in < '&')) ||
3192 (*in == 0x09))
3193 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003194 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003195 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003196 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003197 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003198 ctxt->input->line++;
3199 in++;
3200 }
3201 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003202 }
3203 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003204 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003205 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003206 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003207 return;
3208 }
3209 in++;
3210 goto get_more;
3211 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003212 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003213 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003214 if ((ctxt->sax->ignorableWhitespace !=
3215 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003216 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003217 const xmlChar *tmp = ctxt->input->cur;
3218 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003219
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003220 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003221 ctxt->sax->ignorableWhitespace(ctxt->userData,
3222 tmp, nbchar);
3223 } else if (ctxt->sax->characters != NULL)
3224 ctxt->sax->characters(ctxt->userData,
3225 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003226 line = ctxt->input->line;
3227 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003228 } else {
3229 if (ctxt->sax->characters != NULL)
3230 ctxt->sax->characters(ctxt->userData,
3231 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003232 line = ctxt->input->line;
3233 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003234 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003235 }
3236 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003237 if (*in == 0xD) {
3238 in++;
3239 if (*in == 0xA) {
3240 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003241 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003242 ctxt->input->line++;
3243 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003244 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003245 in--;
3246 }
3247 if (*in == '<') {
3248 return;
3249 }
3250 if (*in == '&') {
3251 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003252 }
3253 SHRINK;
3254 GROW;
3255 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003256 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003257 nbchar = 0;
3258 }
Daniel Veillard50582112001-03-26 22:52:16 +00003259 ctxt->input->line = line;
3260 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003261 xmlParseCharDataComplex(ctxt, cdata);
3262}
3263
Daniel Veillard01c13b52002-12-10 15:19:08 +00003264/**
3265 * xmlParseCharDataComplex:
3266 * @ctxt: an XML parser context
3267 * @cdata: int indicating whether we are within a CDATA section
3268 *
3269 * parse a CharData section.this is the fallback function
3270 * of xmlParseCharData() when the parsing requires handling
3271 * of non-ASCII characters.
3272 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003273void
3274xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003275 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3276 int nbchar = 0;
3277 int cur, l;
3278 int count = 0;
3279
3280 SHRINK;
3281 GROW;
3282 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003283 while ((cur != '<') && /* checked */
3284 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003285 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003286 if ((cur == ']') && (NXT(1) == ']') &&
3287 (NXT(2) == '>')) {
3288 if (cdata) break;
3289 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003290 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003291 }
3292 }
3293 COPY_BUF(l,buf,nbchar,cur);
3294 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003295 buf[nbchar] = 0;
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003298 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003299 */
3300 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003301 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (ctxt->sax->ignorableWhitespace != NULL)
3303 ctxt->sax->ignorableWhitespace(ctxt->userData,
3304 buf, nbchar);
3305 } else {
3306 if (ctxt->sax->characters != NULL)
3307 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3308 }
3309 }
3310 nbchar = 0;
3311 }
3312 count++;
3313 if (count > 50) {
3314 GROW;
3315 count = 0;
3316 }
3317 NEXTL(l);
3318 cur = CUR_CHAR(l);
3319 }
3320 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003321 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003322 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003323 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003324 */
3325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003326 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003327 if (ctxt->sax->ignorableWhitespace != NULL)
3328 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3329 } else {
3330 if (ctxt->sax->characters != NULL)
3331 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3332 }
3333 }
3334 }
3335}
3336
3337/**
3338 * xmlParseExternalID:
3339 * @ctxt: an XML parser context
3340 * @publicID: a xmlChar** receiving PubidLiteral
3341 * @strict: indicate whether we should restrict parsing to only
3342 * production [75], see NOTE below
3343 *
3344 * Parse an External ID or a Public ID
3345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003346 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003347 * 'PUBLIC' S PubidLiteral S SystemLiteral
3348 *
3349 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3350 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3351 *
3352 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3353 *
3354 * Returns the function returns SystemLiteral and in the second
3355 * case publicID receives PubidLiteral, is strict is off
3356 * it is possible to return NULL and have publicID set.
3357 */
3358
3359xmlChar *
3360xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3361 xmlChar *URI = NULL;
3362
3363 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003364
3365 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003366 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003367 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003368 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003369 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3370 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003371 }
3372 SKIP_BLANKS;
3373 URI = xmlParseSystemLiteral(ctxt);
3374 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003377 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003378 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003379 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003380 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003381 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003382 }
3383 SKIP_BLANKS;
3384 *publicID = xmlParsePubidLiteral(ctxt);
3385 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003386 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003387 }
3388 if (strict) {
3389 /*
3390 * We don't handle [83] so "S SystemLiteral" is required.
3391 */
William M. Brack76e95df2003-10-18 16:20:14 +00003392 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003393 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003394 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003395 }
3396 } else {
3397 /*
3398 * We handle [83] so we return immediately, if
3399 * "S SystemLiteral" is not detected. From a purely parsing
3400 * point of view that's a nice mess.
3401 */
3402 const xmlChar *ptr;
3403 GROW;
3404
3405 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003406 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407
William M. Brack76e95df2003-10-18 16:20:14 +00003408 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003409 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3410 }
3411 SKIP_BLANKS;
3412 URI = xmlParseSystemLiteral(ctxt);
3413 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003414 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003415 }
3416 }
3417 return(URI);
3418}
3419
3420/**
3421 * xmlParseComment:
3422 * @ctxt: an XML parser context
3423 *
3424 * Skip an XML (SGML) comment <!-- .... -->
3425 * The spec says that "For compatibility, the string "--" (double-hyphen)
3426 * must not occur within comments. "
3427 *
3428 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3429 */
3430void
3431xmlParseComment(xmlParserCtxtPtr ctxt) {
3432 xmlChar *buf = NULL;
3433 int len;
3434 int size = XML_PARSER_BUFFER_SIZE;
3435 int q, ql;
3436 int r, rl;
3437 int cur, l;
3438 xmlParserInputState state;
3439 xmlParserInputPtr input = ctxt->input;
3440 int count = 0;
3441
3442 /*
3443 * Check that there is a comment right here.
3444 */
3445 if ((RAW != '<') || (NXT(1) != '!') ||
3446 (NXT(2) != '-') || (NXT(3) != '-')) return;
3447
3448 state = ctxt->instate;
3449 ctxt->instate = XML_PARSER_COMMENT;
3450 SHRINK;
3451 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003452 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003453 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003454 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003455 ctxt->instate = state;
3456 return;
3457 }
3458 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003459 if (q == 0)
3460 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003461 NEXTL(ql);
3462 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003463 if (r == 0)
3464 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003465 NEXTL(rl);
3466 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003467 if (cur == 0)
3468 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003469 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003470 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003471 ((cur != '>') ||
3472 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003473 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003474 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003475 }
3476 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003477 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003478 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003479 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3480 if (new_buf == NULL) {
3481 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003482 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003483 ctxt->instate = state;
3484 return;
3485 }
William M. Bracka3215c72004-07-31 16:24:01 +00003486 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003487 }
3488 COPY_BUF(ql,buf,len,q);
3489 q = r;
3490 ql = rl;
3491 r = cur;
3492 rl = l;
3493
3494 count++;
3495 if (count > 50) {
3496 GROW;
3497 count = 0;
3498 }
3499 NEXTL(l);
3500 cur = CUR_CHAR(l);
3501 if (cur == 0) {
3502 SHRINK;
3503 GROW;
3504 cur = CUR_CHAR(l);
3505 }
3506 }
3507 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003508 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003509 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003510 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003511 xmlFree(buf);
3512 } else {
3513 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003514 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3515 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003516 }
3517 NEXT;
3518 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3519 (!ctxt->disableSAX))
3520 ctxt->sax->comment(ctxt->userData, buf);
3521 xmlFree(buf);
3522 }
3523 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003524 return;
3525not_terminated:
3526 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3527 "Comment not terminated\n", NULL);
3528 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003529}
3530
3531/**
3532 * xmlParsePITarget:
3533 * @ctxt: an XML parser context
3534 *
3535 * parse the name of a PI
3536 *
3537 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3538 *
3539 * Returns the PITarget name or NULL
3540 */
3541
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003542const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003543xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003544 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003545
3546 name = xmlParseName(ctxt);
3547 if ((name != NULL) &&
3548 ((name[0] == 'x') || (name[0] == 'X')) &&
3549 ((name[1] == 'm') || (name[1] == 'M')) &&
3550 ((name[2] == 'l') || (name[2] == 'L'))) {
3551 int i;
3552 if ((name[0] == 'x') && (name[1] == 'm') &&
3553 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003554 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003555 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003556 return(name);
3557 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003558 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003559 return(name);
3560 }
3561 for (i = 0;;i++) {
3562 if (xmlW3CPIs[i] == NULL) break;
3563 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3564 return(name);
3565 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003566 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3567 "xmlParsePITarget: invalid name prefix 'xml'\n",
3568 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 return(name);
3571}
3572
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003573#ifdef LIBXML_CATALOG_ENABLED
3574/**
3575 * xmlParseCatalogPI:
3576 * @ctxt: an XML parser context
3577 * @catalog: the PI value string
3578 *
3579 * parse an XML Catalog Processing Instruction.
3580 *
3581 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3582 *
3583 * Occurs only if allowed by the user and if happening in the Misc
3584 * part of the document before any doctype informations
3585 * This will add the given catalog to the parsing context in order
3586 * to be used if there is a resolution need further down in the document
3587 */
3588
3589static void
3590xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3591 xmlChar *URL = NULL;
3592 const xmlChar *tmp, *base;
3593 xmlChar marker;
3594
3595 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003596 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003597 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3598 goto error;
3599 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003600 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003601 if (*tmp != '=') {
3602 return;
3603 }
3604 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003605 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003606 marker = *tmp;
3607 if ((marker != '\'') && (marker != '"'))
3608 goto error;
3609 tmp++;
3610 base = tmp;
3611 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3612 if (*tmp == 0)
3613 goto error;
3614 URL = xmlStrndup(base, tmp - base);
3615 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003616 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003617 if (*tmp != 0)
3618 goto error;
3619
3620 if (URL != NULL) {
3621 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3622 xmlFree(URL);
3623 }
3624 return;
3625
3626error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003627 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3628 "Catalog PI syntax error: %s\n",
3629 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003630 if (URL != NULL)
3631 xmlFree(URL);
3632}
3633#endif
3634
Owen Taylor3473f882001-02-23 17:55:21 +00003635/**
3636 * xmlParsePI:
3637 * @ctxt: an XML parser context
3638 *
3639 * parse an XML Processing Instruction.
3640 *
3641 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3642 *
3643 * The processing is transfered to SAX once parsed.
3644 */
3645
3646void
3647xmlParsePI(xmlParserCtxtPtr ctxt) {
3648 xmlChar *buf = NULL;
3649 int len = 0;
3650 int size = XML_PARSER_BUFFER_SIZE;
3651 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003652 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003653 xmlParserInputState state;
3654 int count = 0;
3655
3656 if ((RAW == '<') && (NXT(1) == '?')) {
3657 xmlParserInputPtr input = ctxt->input;
3658 state = ctxt->instate;
3659 ctxt->instate = XML_PARSER_PI;
3660 /*
3661 * this is a Processing Instruction.
3662 */
3663 SKIP(2);
3664 SHRINK;
3665
3666 /*
3667 * Parse the target name and check for special support like
3668 * namespace.
3669 */
3670 target = xmlParsePITarget(ctxt);
3671 if (target != NULL) {
3672 if ((RAW == '?') && (NXT(1) == '>')) {
3673 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003674 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3675 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003676 }
3677 SKIP(2);
3678
3679 /*
3680 * SAX: PI detected.
3681 */
3682 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3683 (ctxt->sax->processingInstruction != NULL))
3684 ctxt->sax->processingInstruction(ctxt->userData,
3685 target, NULL);
3686 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003687 return;
3688 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003689 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003690 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003691 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003692 ctxt->instate = state;
3693 return;
3694 }
3695 cur = CUR;
3696 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003697 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3698 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003699 }
3700 SKIP_BLANKS;
3701 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003702 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003703 ((cur != '?') || (NXT(1) != '>'))) {
3704 if (len + 5 >= size) {
3705 size *= 2;
3706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3707 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003708 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003709 ctxt->instate = state;
3710 return;
3711 }
3712 }
3713 count++;
3714 if (count > 50) {
3715 GROW;
3716 count = 0;
3717 }
3718 COPY_BUF(l,buf,len,cur);
3719 NEXTL(l);
3720 cur = CUR_CHAR(l);
3721 if (cur == 0) {
3722 SHRINK;
3723 GROW;
3724 cur = CUR_CHAR(l);
3725 }
3726 }
3727 buf[len] = 0;
3728 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003729 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3730 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 } else {
3732 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003733 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3734 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003735 }
3736 SKIP(2);
3737
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003738#ifdef LIBXML_CATALOG_ENABLED
3739 if (((state == XML_PARSER_MISC) ||
3740 (state == XML_PARSER_START)) &&
3741 (xmlStrEqual(target, XML_CATALOG_PI))) {
3742 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3743 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3744 (allow == XML_CATA_ALLOW_ALL))
3745 xmlParseCatalogPI(ctxt, buf);
3746 }
3747#endif
3748
3749
Owen Taylor3473f882001-02-23 17:55:21 +00003750 /*
3751 * SAX: PI detected.
3752 */
3753 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3754 (ctxt->sax->processingInstruction != NULL))
3755 ctxt->sax->processingInstruction(ctxt->userData,
3756 target, buf);
3757 }
3758 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003759 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 ctxt->instate = state;
3763 }
3764}
3765
3766/**
3767 * xmlParseNotationDecl:
3768 * @ctxt: an XML parser context
3769 *
3770 * parse a notation declaration
3771 *
3772 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3773 *
3774 * Hence there is actually 3 choices:
3775 * 'PUBLIC' S PubidLiteral
3776 * 'PUBLIC' S PubidLiteral S SystemLiteral
3777 * and 'SYSTEM' S SystemLiteral
3778 *
3779 * See the NOTE on xmlParseExternalID().
3780 */
3781
3782void
3783xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003784 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 xmlChar *Pubid;
3786 xmlChar *Systemid;
3787
Daniel Veillarda07050d2003-10-19 14:46:32 +00003788 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003789 xmlParserInputPtr input = ctxt->input;
3790 SHRINK;
3791 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003792 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003793 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3794 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003795 return;
3796 }
3797 SKIP_BLANKS;
3798
Daniel Veillard76d66f42001-05-16 21:05:17 +00003799 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003801 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802 return;
3803 }
William M. Brack76e95df2003-10-18 16:20:14 +00003804 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003805 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003806 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003807 return;
3808 }
3809 SKIP_BLANKS;
3810
3811 /*
3812 * Parse the IDs.
3813 */
3814 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3815 SKIP_BLANKS;
3816
3817 if (RAW == '>') {
3818 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003819 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3820 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
3822 NEXT;
3823 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3824 (ctxt->sax->notationDecl != NULL))
3825 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3826 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003827 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
Owen Taylor3473f882001-02-23 17:55:21 +00003829 if (Systemid != NULL) xmlFree(Systemid);
3830 if (Pubid != NULL) xmlFree(Pubid);
3831 }
3832}
3833
3834/**
3835 * xmlParseEntityDecl:
3836 * @ctxt: an XML parser context
3837 *
3838 * parse <!ENTITY declarations
3839 *
3840 * [70] EntityDecl ::= GEDecl | PEDecl
3841 *
3842 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3843 *
3844 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3845 *
3846 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3847 *
3848 * [74] PEDef ::= EntityValue | ExternalID
3849 *
3850 * [76] NDataDecl ::= S 'NDATA' S Name
3851 *
3852 * [ VC: Notation Declared ]
3853 * The Name must match the declared name of a notation.
3854 */
3855
3856void
3857xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003858 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003859 xmlChar *value = NULL;
3860 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003861 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003862 int isParameter = 0;
3863 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003864 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003865
3866 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003867 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003868 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003869 SHRINK;
3870 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003871 skipped = SKIP_BLANKS;
3872 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3874 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003875 }
Owen Taylor3473f882001-02-23 17:55:21 +00003876
3877 if (RAW == '%') {
3878 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003879 skipped = SKIP_BLANKS;
3880 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3882 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003883 }
Owen Taylor3473f882001-02-23 17:55:21 +00003884 isParameter = 1;
3885 }
3886
Daniel Veillard76d66f42001-05-16 21:05:17 +00003887 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003888 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003889 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3890 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003891 return;
3892 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003893 skipped = SKIP_BLANKS;
3894 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003895 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3896 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
Owen Taylor3473f882001-02-23 17:55:21 +00003898
Daniel Veillardf5582f12002-06-11 10:08:16 +00003899 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 /*
3901 * handle the various case of definitions...
3902 */
3903 if (isParameter) {
3904 if ((RAW == '"') || (RAW == '\'')) {
3905 value = xmlParseEntityValue(ctxt, &orig);
3906 if (value) {
3907 if ((ctxt->sax != NULL) &&
3908 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3909 ctxt->sax->entityDecl(ctxt->userData, name,
3910 XML_INTERNAL_PARAMETER_ENTITY,
3911 NULL, NULL, value);
3912 }
3913 } else {
3914 URI = xmlParseExternalID(ctxt, &literal, 1);
3915 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003916 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003917 }
3918 if (URI) {
3919 xmlURIPtr uri;
3920
3921 uri = xmlParseURI((const char *) URI);
3922 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003923 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3924 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003925 /*
3926 * This really ought to be a well formedness error
3927 * but the XML Core WG decided otherwise c.f. issue
3928 * E26 of the XML erratas.
3929 */
Owen Taylor3473f882001-02-23 17:55:21 +00003930 } else {
3931 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003932 /*
3933 * Okay this is foolish to block those but not
3934 * invalid URIs.
3935 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003936 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 } else {
3938 if ((ctxt->sax != NULL) &&
3939 (!ctxt->disableSAX) &&
3940 (ctxt->sax->entityDecl != NULL))
3941 ctxt->sax->entityDecl(ctxt->userData, name,
3942 XML_EXTERNAL_PARAMETER_ENTITY,
3943 literal, URI, NULL);
3944 }
3945 xmlFreeURI(uri);
3946 }
3947 }
3948 }
3949 } else {
3950 if ((RAW == '"') || (RAW == '\'')) {
3951 value = xmlParseEntityValue(ctxt, &orig);
3952 if ((ctxt->sax != NULL) &&
3953 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3954 ctxt->sax->entityDecl(ctxt->userData, name,
3955 XML_INTERNAL_GENERAL_ENTITY,
3956 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003957 /*
3958 * For expat compatibility in SAX mode.
3959 */
3960 if ((ctxt->myDoc == NULL) ||
3961 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3962 if (ctxt->myDoc == NULL) {
3963 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3964 }
3965 if (ctxt->myDoc->intSubset == NULL)
3966 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3967 BAD_CAST "fake", NULL, NULL);
3968
Daniel Veillard1af9a412003-08-20 22:54:39 +00003969 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3970 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003971 }
Owen Taylor3473f882001-02-23 17:55:21 +00003972 } else {
3973 URI = xmlParseExternalID(ctxt, &literal, 1);
3974 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003975 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003976 }
3977 if (URI) {
3978 xmlURIPtr uri;
3979
3980 uri = xmlParseURI((const char *)URI);
3981 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003982 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3983 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003984 /*
3985 * This really ought to be a well formedness error
3986 * but the XML Core WG decided otherwise c.f. issue
3987 * E26 of the XML erratas.
3988 */
Owen Taylor3473f882001-02-23 17:55:21 +00003989 } else {
3990 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003991 /*
3992 * Okay this is foolish to block those but not
3993 * invalid URIs.
3994 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003995 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003996 }
3997 xmlFreeURI(uri);
3998 }
3999 }
William M. Brack76e95df2003-10-18 16:20:14 +00004000 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004001 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4002 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004003 }
4004 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004005 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004006 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004007 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4009 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
4011 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004012 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004013 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4014 (ctxt->sax->unparsedEntityDecl != NULL))
4015 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4016 literal, URI, ndata);
4017 } else {
4018 if ((ctxt->sax != NULL) &&
4019 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4020 ctxt->sax->entityDecl(ctxt->userData, name,
4021 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4022 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004023 /*
4024 * For expat compatibility in SAX mode.
4025 * assuming the entity repalcement was asked for
4026 */
4027 if ((ctxt->replaceEntities != 0) &&
4028 ((ctxt->myDoc == NULL) ||
4029 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4030 if (ctxt->myDoc == NULL) {
4031 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4032 }
4033
4034 if (ctxt->myDoc->intSubset == NULL)
4035 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4036 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004037 xmlSAX2EntityDecl(ctxt, name,
4038 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4039 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004040 }
Owen Taylor3473f882001-02-23 17:55:21 +00004041 }
4042 }
4043 }
4044 SKIP_BLANKS;
4045 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004046 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004047 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004048 } else {
4049 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004050 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4051 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004052 }
4053 NEXT;
4054 }
4055 if (orig != NULL) {
4056 /*
4057 * Ugly mechanism to save the raw entity value.
4058 */
4059 xmlEntityPtr cur = NULL;
4060
4061 if (isParameter) {
4062 if ((ctxt->sax != NULL) &&
4063 (ctxt->sax->getParameterEntity != NULL))
4064 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4065 } else {
4066 if ((ctxt->sax != NULL) &&
4067 (ctxt->sax->getEntity != NULL))
4068 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004069 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004070 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004071 }
Owen Taylor3473f882001-02-23 17:55:21 +00004072 }
4073 if (cur != NULL) {
4074 if (cur->orig != NULL)
4075 xmlFree(orig);
4076 else
4077 cur->orig = orig;
4078 } else
4079 xmlFree(orig);
4080 }
Owen Taylor3473f882001-02-23 17:55:21 +00004081 if (value != NULL) xmlFree(value);
4082 if (URI != NULL) xmlFree(URI);
4083 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004084 }
4085}
4086
4087/**
4088 * xmlParseDefaultDecl:
4089 * @ctxt: an XML parser context
4090 * @value: Receive a possible fixed default value for the attribute
4091 *
4092 * Parse an attribute default declaration
4093 *
4094 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4095 *
4096 * [ VC: Required Attribute ]
4097 * if the default declaration is the keyword #REQUIRED, then the
4098 * attribute must be specified for all elements of the type in the
4099 * attribute-list declaration.
4100 *
4101 * [ VC: Attribute Default Legal ]
4102 * The declared default value must meet the lexical constraints of
4103 * the declared attribute type c.f. xmlValidateAttributeDecl()
4104 *
4105 * [ VC: Fixed Attribute Default ]
4106 * if an attribute has a default value declared with the #FIXED
4107 * keyword, instances of that attribute must match the default value.
4108 *
4109 * [ WFC: No < in Attribute Values ]
4110 * handled in xmlParseAttValue()
4111 *
4112 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4113 * or XML_ATTRIBUTE_FIXED.
4114 */
4115
4116int
4117xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4118 int val;
4119 xmlChar *ret;
4120
4121 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004122 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004123 SKIP(9);
4124 return(XML_ATTRIBUTE_REQUIRED);
4125 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004126 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004127 SKIP(8);
4128 return(XML_ATTRIBUTE_IMPLIED);
4129 }
4130 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004131 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004132 SKIP(6);
4133 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004134 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004135 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4136 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004137 }
4138 SKIP_BLANKS;
4139 }
4140 ret = xmlParseAttValue(ctxt);
4141 ctxt->instate = XML_PARSER_DTD;
4142 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004143 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004144 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004145 } else
4146 *value = ret;
4147 return(val);
4148}
4149
4150/**
4151 * xmlParseNotationType:
4152 * @ctxt: an XML parser context
4153 *
4154 * parse an Notation attribute type.
4155 *
4156 * Note: the leading 'NOTATION' S part has already being parsed...
4157 *
4158 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4159 *
4160 * [ VC: Notation Attributes ]
4161 * Values of this type must match one of the notation names included
4162 * in the declaration; all notation names in the declaration must be declared.
4163 *
4164 * Returns: the notation attribute tree built while parsing
4165 */
4166
4167xmlEnumerationPtr
4168xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004169 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004170 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4171
4172 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004173 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004174 return(NULL);
4175 }
4176 SHRINK;
4177 do {
4178 NEXT;
4179 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004180 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004181 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004182 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4183 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004184 return(ret);
4185 }
4186 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 if (cur == NULL) return(ret);
4188 if (last == NULL) ret = last = cur;
4189 else {
4190 last->next = cur;
4191 last = cur;
4192 }
4193 SKIP_BLANKS;
4194 } while (RAW == '|');
4195 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004196 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004197 if ((last != NULL) && (last != ret))
4198 xmlFreeEnumeration(last);
4199 return(ret);
4200 }
4201 NEXT;
4202 return(ret);
4203}
4204
4205/**
4206 * xmlParseEnumerationType:
4207 * @ctxt: an XML parser context
4208 *
4209 * parse an Enumeration attribute type.
4210 *
4211 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4212 *
4213 * [ VC: Enumeration ]
4214 * Values of this type must match one of the Nmtoken tokens in
4215 * the declaration
4216 *
4217 * Returns: the enumeration attribute tree built while parsing
4218 */
4219
4220xmlEnumerationPtr
4221xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4222 xmlChar *name;
4223 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4224
4225 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004226 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004227 return(NULL);
4228 }
4229 SHRINK;
4230 do {
4231 NEXT;
4232 SKIP_BLANKS;
4233 name = xmlParseNmtoken(ctxt);
4234 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004235 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 return(ret);
4237 }
4238 cur = xmlCreateEnumeration(name);
4239 xmlFree(name);
4240 if (cur == NULL) return(ret);
4241 if (last == NULL) ret = last = cur;
4242 else {
4243 last->next = cur;
4244 last = cur;
4245 }
4246 SKIP_BLANKS;
4247 } while (RAW == '|');
4248 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004249 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004250 return(ret);
4251 }
4252 NEXT;
4253 return(ret);
4254}
4255
4256/**
4257 * xmlParseEnumeratedType:
4258 * @ctxt: an XML parser context
4259 * @tree: the enumeration tree built while parsing
4260 *
4261 * parse an Enumerated attribute type.
4262 *
4263 * [57] EnumeratedType ::= NotationType | Enumeration
4264 *
4265 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4266 *
4267 *
4268 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4269 */
4270
4271int
4272xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004273 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004274 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004275 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004276 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4277 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004278 return(0);
4279 }
4280 SKIP_BLANKS;
4281 *tree = xmlParseNotationType(ctxt);
4282 if (*tree == NULL) return(0);
4283 return(XML_ATTRIBUTE_NOTATION);
4284 }
4285 *tree = xmlParseEnumerationType(ctxt);
4286 if (*tree == NULL) return(0);
4287 return(XML_ATTRIBUTE_ENUMERATION);
4288}
4289
4290/**
4291 * xmlParseAttributeType:
4292 * @ctxt: an XML parser context
4293 * @tree: the enumeration tree built while parsing
4294 *
4295 * parse the Attribute list def for an element
4296 *
4297 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4298 *
4299 * [55] StringType ::= 'CDATA'
4300 *
4301 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4302 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4303 *
4304 * Validity constraints for attribute values syntax are checked in
4305 * xmlValidateAttributeValue()
4306 *
4307 * [ VC: ID ]
4308 * Values of type ID must match the Name production. A name must not
4309 * appear more than once in an XML document as a value of this type;
4310 * i.e., ID values must uniquely identify the elements which bear them.
4311 *
4312 * [ VC: One ID per Element Type ]
4313 * No element type may have more than one ID attribute specified.
4314 *
4315 * [ VC: ID Attribute Default ]
4316 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4317 *
4318 * [ VC: IDREF ]
4319 * Values of type IDREF must match the Name production, and values
4320 * of type IDREFS must match Names; each IDREF Name must match the value
4321 * of an ID attribute on some element in the XML document; i.e. IDREF
4322 * values must match the value of some ID attribute.
4323 *
4324 * [ VC: Entity Name ]
4325 * Values of type ENTITY must match the Name production, values
4326 * of type ENTITIES must match Names; each Entity Name must match the
4327 * name of an unparsed entity declared in the DTD.
4328 *
4329 * [ VC: Name Token ]
4330 * Values of type NMTOKEN must match the Nmtoken production; values
4331 * of type NMTOKENS must match Nmtokens.
4332 *
4333 * Returns the attribute type
4334 */
4335int
4336xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4337 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004338 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004339 SKIP(5);
4340 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004341 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004342 SKIP(6);
4343 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004344 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004345 SKIP(5);
4346 return(XML_ATTRIBUTE_IDREF);
4347 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4348 SKIP(2);
4349 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004350 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004351 SKIP(6);
4352 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004353 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004354 SKIP(8);
4355 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004356 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004357 SKIP(8);
4358 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004359 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004360 SKIP(7);
4361 return(XML_ATTRIBUTE_NMTOKEN);
4362 }
4363 return(xmlParseEnumeratedType(ctxt, tree));
4364}
4365
4366/**
4367 * xmlParseAttributeListDecl:
4368 * @ctxt: an XML parser context
4369 *
4370 * : parse the Attribute list def for an element
4371 *
4372 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4373 *
4374 * [53] AttDef ::= S Name S AttType S DefaultDecl
4375 *
4376 */
4377void
4378xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004379 const xmlChar *elemName;
4380 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004381 xmlEnumerationPtr tree;
4382
Daniel Veillarda07050d2003-10-19 14:46:32 +00004383 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004384 xmlParserInputPtr input = ctxt->input;
4385
4386 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004387 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004389 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004390 }
4391 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004392 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004394 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4395 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004396 return;
4397 }
4398 SKIP_BLANKS;
4399 GROW;
4400 while (RAW != '>') {
4401 const xmlChar *check = CUR_PTR;
4402 int type;
4403 int def;
4404 xmlChar *defaultValue = NULL;
4405
4406 GROW;
4407 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004408 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004409 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004410 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4411 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004412 break;
4413 }
4414 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004415 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004417 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004418 if (defaultValue != NULL)
4419 xmlFree(defaultValue);
4420 break;
4421 }
4422 SKIP_BLANKS;
4423
4424 type = xmlParseAttributeType(ctxt, &tree);
4425 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004426 if (defaultValue != NULL)
4427 xmlFree(defaultValue);
4428 break;
4429 }
4430
4431 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004432 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4434 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004435 if (defaultValue != NULL)
4436 xmlFree(defaultValue);
4437 if (tree != NULL)
4438 xmlFreeEnumeration(tree);
4439 break;
4440 }
4441 SKIP_BLANKS;
4442
4443 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4444 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004445 if (defaultValue != NULL)
4446 xmlFree(defaultValue);
4447 if (tree != NULL)
4448 xmlFreeEnumeration(tree);
4449 break;
4450 }
4451
4452 GROW;
4453 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004456 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004457 if (defaultValue != NULL)
4458 xmlFree(defaultValue);
4459 if (tree != NULL)
4460 xmlFreeEnumeration(tree);
4461 break;
4462 }
4463 SKIP_BLANKS;
4464 }
4465 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004466 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4467 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004468 if (defaultValue != NULL)
4469 xmlFree(defaultValue);
4470 if (tree != NULL)
4471 xmlFreeEnumeration(tree);
4472 break;
4473 }
4474 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4475 (ctxt->sax->attributeDecl != NULL))
4476 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4477 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004478 else if (tree != NULL)
4479 xmlFreeEnumeration(tree);
4480
4481 if ((ctxt->sax2) && (defaultValue != NULL) &&
4482 (def != XML_ATTRIBUTE_IMPLIED) &&
4483 (def != XML_ATTRIBUTE_REQUIRED)) {
4484 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4485 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004486 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4487 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4488 }
Owen Taylor3473f882001-02-23 17:55:21 +00004489 if (defaultValue != NULL)
4490 xmlFree(defaultValue);
4491 GROW;
4492 }
4493 if (RAW == '>') {
4494 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004495 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4496 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004497 }
4498 NEXT;
4499 }
Owen Taylor3473f882001-02-23 17:55:21 +00004500 }
4501}
4502
4503/**
4504 * xmlParseElementMixedContentDecl:
4505 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004506 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004507 *
4508 * parse the declaration for a Mixed Element content
4509 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4510 *
4511 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4512 * '(' S? '#PCDATA' S? ')'
4513 *
4514 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4515 *
4516 * [ VC: No Duplicate Types ]
4517 * The same name must not appear more than once in a single
4518 * mixed-content declaration.
4519 *
4520 * returns: the list of the xmlElementContentPtr describing the element choices
4521 */
4522xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004523xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004524 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004525 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004526
4527 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004528 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 SKIP(7);
4530 SKIP_BLANKS;
4531 SHRINK;
4532 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4535"Element content declaration doesn't start and stop in the same entity\n",
4536 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004537 }
Owen Taylor3473f882001-02-23 17:55:21 +00004538 NEXT;
4539 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4540 if (RAW == '*') {
4541 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4542 NEXT;
4543 }
4544 return(ret);
4545 }
4546 if ((RAW == '(') || (RAW == '|')) {
4547 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4548 if (ret == NULL) return(NULL);
4549 }
4550 while (RAW == '|') {
4551 NEXT;
4552 if (elem == NULL) {
4553 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4554 if (ret == NULL) return(NULL);
4555 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004556 if (cur != NULL)
4557 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 cur = ret;
4559 } else {
4560 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4561 if (n == NULL) return(NULL);
4562 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004563 if (n->c1 != NULL)
4564 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004565 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004566 if (n != NULL)
4567 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004568 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004569 }
4570 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004571 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004572 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004573 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004574 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004575 xmlFreeElementContent(cur);
4576 return(NULL);
4577 }
4578 SKIP_BLANKS;
4579 GROW;
4580 }
4581 if ((RAW == ')') && (NXT(1) == '*')) {
4582 if (elem != NULL) {
4583 cur->c2 = xmlNewElementContent(elem,
4584 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004585 if (cur->c2 != NULL)
4586 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004587 }
4588 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004589 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004590 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4591"Element content declaration doesn't start and stop in the same entity\n",
4592 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004593 }
Owen Taylor3473f882001-02-23 17:55:21 +00004594 SKIP(2);
4595 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004596 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004597 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return(NULL);
4599 }
4600
4601 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004602 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 return(ret);
4605}
4606
4607/**
4608 * xmlParseElementChildrenContentDecl:
4609 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004610 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004611 *
4612 * parse the declaration for a Mixed Element content
4613 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4614 *
4615 *
4616 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4617 *
4618 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4619 *
4620 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4621 *
4622 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4623 *
4624 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4625 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004626 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004627 * opening or closing parentheses in a choice, seq, or Mixed
4628 * construct is contained in the replacement text for a parameter
4629 * entity, both must be contained in the same replacement text. For
4630 * interoperability, if a parameter-entity reference appears in a
4631 * choice, seq, or Mixed construct, its replacement text should not
4632 * be empty, and neither the first nor last non-blank character of
4633 * the replacement text should be a connector (| or ,).
4634 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004635 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004636 * hierarchy.
4637 */
4638xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004639xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004640 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004641 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 xmlChar type = 0;
4643
4644 SKIP_BLANKS;
4645 GROW;
4646 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004647 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004648
Owen Taylor3473f882001-02-23 17:55:21 +00004649 /* Recurse on first child */
4650 NEXT;
4651 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004652 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004653 SKIP_BLANKS;
4654 GROW;
4655 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004656 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004657 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004658 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 return(NULL);
4660 }
4661 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004662 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004663 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004664 return(NULL);
4665 }
Owen Taylor3473f882001-02-23 17:55:21 +00004666 GROW;
4667 if (RAW == '?') {
4668 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4669 NEXT;
4670 } else if (RAW == '*') {
4671 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4672 NEXT;
4673 } else if (RAW == '+') {
4674 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4675 NEXT;
4676 } else {
4677 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4678 }
Owen Taylor3473f882001-02-23 17:55:21 +00004679 GROW;
4680 }
4681 SKIP_BLANKS;
4682 SHRINK;
4683 while (RAW != ')') {
4684 /*
4685 * Each loop we parse one separator and one element.
4686 */
4687 if (RAW == ',') {
4688 if (type == 0) type = CUR;
4689
4690 /*
4691 * Detect "Name | Name , Name" error
4692 */
4693 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004695 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004696 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004697 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004698 xmlFreeElementContent(last);
4699 if (ret != NULL)
4700 xmlFreeElementContent(ret);
4701 return(NULL);
4702 }
4703 NEXT;
4704
4705 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4706 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004707 if ((last != NULL) && (last != ret))
4708 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004709 xmlFreeElementContent(ret);
4710 return(NULL);
4711 }
4712 if (last == NULL) {
4713 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004714 if (ret != NULL)
4715 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 ret = cur = op;
4717 } else {
4718 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004719 if (op != NULL)
4720 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004721 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004722 if (last != NULL)
4723 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004724 cur =op;
4725 last = NULL;
4726 }
4727 } else if (RAW == '|') {
4728 if (type == 0) type = CUR;
4729
4730 /*
4731 * Detect "Name , Name | Name" error
4732 */
4733 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004734 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004735 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004736 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004737 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004738 xmlFreeElementContent(last);
4739 if (ret != NULL)
4740 xmlFreeElementContent(ret);
4741 return(NULL);
4742 }
4743 NEXT;
4744
4745 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4746 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004747 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004748 xmlFreeElementContent(last);
4749 if (ret != NULL)
4750 xmlFreeElementContent(ret);
4751 return(NULL);
4752 }
4753 if (last == NULL) {
4754 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004755 if (ret != NULL)
4756 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004757 ret = cur = op;
4758 } else {
4759 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004760 if (op != NULL)
4761 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004762 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004763 if (last != NULL)
4764 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004765 cur =op;
4766 last = NULL;
4767 }
4768 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004769 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004770 if (ret != NULL)
4771 xmlFreeElementContent(ret);
4772 return(NULL);
4773 }
4774 GROW;
4775 SKIP_BLANKS;
4776 GROW;
4777 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004778 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004779 /* Recurse on second child */
4780 NEXT;
4781 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004782 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004783 SKIP_BLANKS;
4784 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004785 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004787 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004788 if (ret != NULL)
4789 xmlFreeElementContent(ret);
4790 return(NULL);
4791 }
4792 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (RAW == '?') {
4794 last->ocur = XML_ELEMENT_CONTENT_OPT;
4795 NEXT;
4796 } else if (RAW == '*') {
4797 last->ocur = XML_ELEMENT_CONTENT_MULT;
4798 NEXT;
4799 } else if (RAW == '+') {
4800 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4801 NEXT;
4802 } else {
4803 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4804 }
4805 }
4806 SKIP_BLANKS;
4807 GROW;
4808 }
4809 if ((cur != NULL) && (last != NULL)) {
4810 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004811 if (last != NULL)
4812 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004813 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004814 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004815 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4816"Element content declaration doesn't start and stop in the same entity\n",
4817 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004818 }
Owen Taylor3473f882001-02-23 17:55:21 +00004819 NEXT;
4820 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004821 if (ret != NULL) {
4822 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4823 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4824 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4825 else
4826 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4827 }
Owen Taylor3473f882001-02-23 17:55:21 +00004828 NEXT;
4829 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004830 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004831 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004832 cur = ret;
4833 /*
4834 * Some normalization:
4835 * (a | b* | c?)* == (a | b | c)*
4836 */
4837 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4838 if ((cur->c1 != NULL) &&
4839 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4840 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4841 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4842 if ((cur->c2 != NULL) &&
4843 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4844 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4845 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4846 cur = cur->c2;
4847 }
4848 }
Owen Taylor3473f882001-02-23 17:55:21 +00004849 NEXT;
4850 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004851 if (ret != NULL) {
4852 int found = 0;
4853
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004854 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4855 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4856 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004857 else
4858 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004859 /*
4860 * Some normalization:
4861 * (a | b*)+ == (a | b)*
4862 * (a | b?)+ == (a | b)*
4863 */
4864 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4865 if ((cur->c1 != NULL) &&
4866 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4867 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4868 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4869 found = 1;
4870 }
4871 if ((cur->c2 != NULL) &&
4872 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4873 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4874 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4875 found = 1;
4876 }
4877 cur = cur->c2;
4878 }
4879 if (found)
4880 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4881 }
Owen Taylor3473f882001-02-23 17:55:21 +00004882 NEXT;
4883 }
4884 return(ret);
4885}
4886
4887/**
4888 * xmlParseElementContentDecl:
4889 * @ctxt: an XML parser context
4890 * @name: the name of the element being defined.
4891 * @result: the Element Content pointer will be stored here if any
4892 *
4893 * parse the declaration for an Element content either Mixed or Children,
4894 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4895 *
4896 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4897 *
4898 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4899 */
4900
4901int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004902xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004903 xmlElementContentPtr *result) {
4904
4905 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004906 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004907 int res;
4908
4909 *result = NULL;
4910
4911 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004912 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004913 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 return(-1);
4915 }
4916 NEXT;
4917 GROW;
4918 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004919 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004921 res = XML_ELEMENT_TYPE_MIXED;
4922 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004923 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 res = XML_ELEMENT_TYPE_ELEMENT;
4925 }
Owen Taylor3473f882001-02-23 17:55:21 +00004926 SKIP_BLANKS;
4927 *result = tree;
4928 return(res);
4929}
4930
4931/**
4932 * xmlParseElementDecl:
4933 * @ctxt: an XML parser context
4934 *
4935 * parse an Element declaration.
4936 *
4937 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4938 *
4939 * [ VC: Unique Element Type Declaration ]
4940 * No element type may be declared more than once
4941 *
4942 * Returns the type of the element, or -1 in case of error
4943 */
4944int
4945xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004946 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004947 int ret = -1;
4948 xmlElementContentPtr content = NULL;
4949
4950 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004951 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004952 xmlParserInputPtr input = ctxt->input;
4953
4954 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004955 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4957 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004958 }
4959 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004960 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004961 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004962 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4963 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 return(-1);
4965 }
4966 while ((RAW == 0) && (ctxt->inputNr > 1))
4967 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004968 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004969 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4970 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004971 }
4972 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004973 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004974 SKIP(5);
4975 /*
4976 * Element must always be empty.
4977 */
4978 ret = XML_ELEMENT_TYPE_EMPTY;
4979 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4980 (NXT(2) == 'Y')) {
4981 SKIP(3);
4982 /*
4983 * Element is a generic container.
4984 */
4985 ret = XML_ELEMENT_TYPE_ANY;
4986 } else if (RAW == '(') {
4987 ret = xmlParseElementContentDecl(ctxt, name, &content);
4988 } else {
4989 /*
4990 * [ WFC: PEs in Internal Subset ] error handling.
4991 */
4992 if ((RAW == '%') && (ctxt->external == 0) &&
4993 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004995 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004997 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004998 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4999 }
Owen Taylor3473f882001-02-23 17:55:21 +00005000 return(-1);
5001 }
5002
5003 SKIP_BLANKS;
5004 /*
5005 * Pop-up of finished entities.
5006 */
5007 while ((RAW == 0) && (ctxt->inputNr > 1))
5008 xmlPopInput(ctxt);
5009 SKIP_BLANKS;
5010
5011 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005012 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005013 } else {
5014 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005015 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5016 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005017 }
5018
5019 NEXT;
5020 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5021 (ctxt->sax->elementDecl != NULL))
5022 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5023 content);
5024 }
5025 if (content != NULL) {
5026 xmlFreeElementContent(content);
5027 }
Owen Taylor3473f882001-02-23 17:55:21 +00005028 }
5029 return(ret);
5030}
5031
5032/**
Owen Taylor3473f882001-02-23 17:55:21 +00005033 * xmlParseConditionalSections
5034 * @ctxt: an XML parser context
5035 *
5036 * [61] conditionalSect ::= includeSect | ignoreSect
5037 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5038 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5039 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5040 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5041 */
5042
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005043static void
Owen Taylor3473f882001-02-23 17:55:21 +00005044xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5045 SKIP(3);
5046 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005047 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005048 SKIP(7);
5049 SKIP_BLANKS;
5050 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005051 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 } else {
5053 NEXT;
5054 }
5055 if (xmlParserDebugEntities) {
5056 if ((ctxt->input != NULL) && (ctxt->input->filename))
5057 xmlGenericError(xmlGenericErrorContext,
5058 "%s(%d): ", ctxt->input->filename,
5059 ctxt->input->line);
5060 xmlGenericError(xmlGenericErrorContext,
5061 "Entering INCLUDE Conditional Section\n");
5062 }
5063
5064 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5065 (NXT(2) != '>'))) {
5066 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005067 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005068
5069 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5070 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005071 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005072 NEXT;
5073 } else if (RAW == '%') {
5074 xmlParsePEReference(ctxt);
5075 } else
5076 xmlParseMarkupDecl(ctxt);
5077
5078 /*
5079 * Pop-up of finished entities.
5080 */
5081 while ((RAW == 0) && (ctxt->inputNr > 1))
5082 xmlPopInput(ctxt);
5083
Daniel Veillardfdc91562002-07-01 21:52:03 +00005084 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005085 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005086 break;
5087 }
5088 }
5089 if (xmlParserDebugEntities) {
5090 if ((ctxt->input != NULL) && (ctxt->input->filename))
5091 xmlGenericError(xmlGenericErrorContext,
5092 "%s(%d): ", ctxt->input->filename,
5093 ctxt->input->line);
5094 xmlGenericError(xmlGenericErrorContext,
5095 "Leaving INCLUDE Conditional Section\n");
5096 }
5097
Daniel Veillarda07050d2003-10-19 14:46:32 +00005098 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005099 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005100 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005101 int depth = 0;
5102
5103 SKIP(6);
5104 SKIP_BLANKS;
5105 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005106 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005107 } else {
5108 NEXT;
5109 }
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Entering IGNORE Conditional Section\n");
5117 }
5118
5119 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005120 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005121 * But disable SAX event generating DTD building in the meantime
5122 */
5123 state = ctxt->disableSAX;
5124 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 ctxt->instate = XML_PARSER_IGNORE;
5127
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005128 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005129 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5130 depth++;
5131 SKIP(3);
5132 continue;
5133 }
5134 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5135 if (--depth >= 0) SKIP(3);
5136 continue;
5137 }
5138 NEXT;
5139 continue;
5140 }
5141
5142 ctxt->disableSAX = state;
5143 ctxt->instate = instate;
5144
5145 if (xmlParserDebugEntities) {
5146 if ((ctxt->input != NULL) && (ctxt->input->filename))
5147 xmlGenericError(xmlGenericErrorContext,
5148 "%s(%d): ", ctxt->input->filename,
5149 ctxt->input->line);
5150 xmlGenericError(xmlGenericErrorContext,
5151 "Leaving IGNORE Conditional Section\n");
5152 }
5153
5154 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005155 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 }
5157
5158 if (RAW == 0)
5159 SHRINK;
5160
5161 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 } else {
5164 SKIP(3);
5165 }
5166}
5167
5168/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005169 * xmlParseMarkupDecl:
5170 * @ctxt: an XML parser context
5171 *
5172 * parse Markup declarations
5173 *
5174 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5175 * NotationDecl | PI | Comment
5176 *
5177 * [ VC: Proper Declaration/PE Nesting ]
5178 * Parameter-entity replacement text must be properly nested with
5179 * markup declarations. That is to say, if either the first character
5180 * or the last character of a markup declaration (markupdecl above) is
5181 * contained in the replacement text for a parameter-entity reference,
5182 * both must be contained in the same replacement text.
5183 *
5184 * [ WFC: PEs in Internal Subset ]
5185 * In the internal DTD subset, parameter-entity references can occur
5186 * only where markup declarations can occur, not within markup declarations.
5187 * (This does not apply to references that occur in external parameter
5188 * entities or to the external subset.)
5189 */
5190void
5191xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5192 GROW;
5193 xmlParseElementDecl(ctxt);
5194 xmlParseAttributeListDecl(ctxt);
5195 xmlParseEntityDecl(ctxt);
5196 xmlParseNotationDecl(ctxt);
5197 xmlParsePI(ctxt);
5198 xmlParseComment(ctxt);
5199 /*
5200 * This is only for internal subset. On external entities,
5201 * the replacement is done before parsing stage
5202 */
5203 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5204 xmlParsePEReference(ctxt);
5205
5206 /*
5207 * Conditional sections are allowed from entities included
5208 * by PE References in the internal subset.
5209 */
5210 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5211 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5212 xmlParseConditionalSections(ctxt);
5213 }
5214 }
5215
5216 ctxt->instate = XML_PARSER_DTD;
5217}
5218
5219/**
5220 * xmlParseTextDecl:
5221 * @ctxt: an XML parser context
5222 *
5223 * parse an XML declaration header for external entities
5224 *
5225 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5226 *
5227 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5228 */
5229
5230void
5231xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5232 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005233 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234
5235 /*
5236 * We know that '<?xml' is here.
5237 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005238 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005239 SKIP(5);
5240 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005241 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005242 return;
5243 }
5244
William M. Brack76e95df2003-10-18 16:20:14 +00005245 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005246 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5247 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005248 }
5249 SKIP_BLANKS;
5250
5251 /*
5252 * We may have the VersionInfo here.
5253 */
5254 version = xmlParseVersionInfo(ctxt);
5255 if (version == NULL)
5256 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005257 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005258 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5260 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005261 }
5262 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005263 ctxt->input->version = version;
5264
5265 /*
5266 * We must have the encoding declaration
5267 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005268 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005269 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5270 /*
5271 * The XML REC instructs us to stop parsing right here
5272 */
5273 return;
5274 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005275 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5276 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5277 "Missing encoding in text declaration\n");
5278 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005279
5280 SKIP_BLANKS;
5281 if ((RAW == '?') && (NXT(1) == '>')) {
5282 SKIP(2);
5283 } else if (RAW == '>') {
5284 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005285 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005286 NEXT;
5287 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005288 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005289 MOVETO_ENDTAG(CUR_PTR);
5290 NEXT;
5291 }
5292}
5293
5294/**
Owen Taylor3473f882001-02-23 17:55:21 +00005295 * xmlParseExternalSubset:
5296 * @ctxt: an XML parser context
5297 * @ExternalID: the external identifier
5298 * @SystemID: the system identifier (or URL)
5299 *
5300 * parse Markup declarations from an external subset
5301 *
5302 * [30] extSubset ::= textDecl? extSubsetDecl
5303 *
5304 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5305 */
5306void
5307xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5308 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005309 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005311 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005312 xmlParseTextDecl(ctxt);
5313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5314 /*
5315 * The XML REC instructs us to stop parsing right here
5316 */
5317 ctxt->instate = XML_PARSER_EOF;
5318 return;
5319 }
5320 }
5321 if (ctxt->myDoc == NULL) {
5322 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5323 }
5324 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5325 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5326
5327 ctxt->instate = XML_PARSER_DTD;
5328 ctxt->external = 1;
5329 while (((RAW == '<') && (NXT(1) == '?')) ||
5330 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005331 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005332 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005333 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005334
5335 GROW;
5336 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5337 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005338 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005339 NEXT;
5340 } else if (RAW == '%') {
5341 xmlParsePEReference(ctxt);
5342 } else
5343 xmlParseMarkupDecl(ctxt);
5344
5345 /*
5346 * Pop-up of finished entities.
5347 */
5348 while ((RAW == 0) && (ctxt->inputNr > 1))
5349 xmlPopInput(ctxt);
5350
Daniel Veillardfdc91562002-07-01 21:52:03 +00005351 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005352 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005353 break;
5354 }
5355 }
5356
5357 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005358 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360
5361}
5362
5363/**
5364 * xmlParseReference:
5365 * @ctxt: an XML parser context
5366 *
5367 * parse and handle entity references in content, depending on the SAX
5368 * interface, this may end-up in a call to character() if this is a
5369 * CharRef, a predefined entity, if there is no reference() callback.
5370 * or if the parser was asked to switch to that mode.
5371 *
5372 * [67] Reference ::= EntityRef | CharRef
5373 */
5374void
5375xmlParseReference(xmlParserCtxtPtr ctxt) {
5376 xmlEntityPtr ent;
5377 xmlChar *val;
5378 if (RAW != '&') return;
5379
5380 if (NXT(1) == '#') {
5381 int i = 0;
5382 xmlChar out[10];
5383 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005384 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005385
5386 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5387 /*
5388 * So we are using non-UTF-8 buffers
5389 * Check that the char fit on 8bits, if not
5390 * generate a CharRef.
5391 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005392 if (value <= 0xFF) {
5393 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005394 out[1] = 0;
5395 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5396 (!ctxt->disableSAX))
5397 ctxt->sax->characters(ctxt->userData, out, 1);
5398 } else {
5399 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005400 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005402 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005403 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5404 (!ctxt->disableSAX))
5405 ctxt->sax->reference(ctxt->userData, out);
5406 }
5407 } else {
5408 /*
5409 * Just encode the value in UTF-8
5410 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005411 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 out[i] = 0;
5413 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5414 (!ctxt->disableSAX))
5415 ctxt->sax->characters(ctxt->userData, out, i);
5416 }
5417 } else {
5418 ent = xmlParseEntityRef(ctxt);
5419 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005420 if (!ctxt->wellFormed)
5421 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005422 if ((ent->name != NULL) &&
5423 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5424 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005425 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005426
5427
5428 /*
5429 * The first reference to the entity trigger a parsing phase
5430 * where the ent->children is filled with the result from
5431 * the parsing.
5432 */
5433 if (ent->children == NULL) {
5434 xmlChar *value;
5435 value = ent->content;
5436
5437 /*
5438 * Check that this entity is well formed
5439 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005440 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005441 (value[1] == 0) && (value[0] == '<') &&
5442 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5443 /*
5444 * DONE: get definite answer on this !!!
5445 * Lots of entity decls are used to declare a single
5446 * char
5447 * <!ENTITY lt "<">
5448 * Which seems to be valid since
5449 * 2.4: The ampersand character (&) and the left angle
5450 * bracket (<) may appear in their literal form only
5451 * when used ... They are also legal within the literal
5452 * entity value of an internal entity declaration;i
5453 * see "4.3.2 Well-Formed Parsed Entities".
5454 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5455 * Looking at the OASIS test suite and James Clark
5456 * tests, this is broken. However the XML REC uses
5457 * it. Is the XML REC not well-formed ????
5458 * This is a hack to avoid this problem
5459 *
5460 * ANSWER: since lt gt amp .. are already defined,
5461 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005462 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005463 * is lousy but acceptable.
5464 */
5465 list = xmlNewDocText(ctxt->myDoc, value);
5466 if (list != NULL) {
5467 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5468 (ent->children == NULL)) {
5469 ent->children = list;
5470 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005471 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 list->parent = (xmlNodePtr) ent;
5473 } else {
5474 xmlFreeNodeList(list);
5475 }
5476 } else if (list != NULL) {
5477 xmlFreeNodeList(list);
5478 }
5479 } else {
5480 /*
5481 * 4.3.2: An internal general parsed entity is well-formed
5482 * if its replacement text matches the production labeled
5483 * content.
5484 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005485
5486 void *user_data;
5487 /*
5488 * This is a bit hackish but this seems the best
5489 * way to make sure both SAX and DOM entity support
5490 * behaves okay.
5491 */
5492 if (ctxt->userData == ctxt)
5493 user_data = NULL;
5494 else
5495 user_data = ctxt->userData;
5496
Owen Taylor3473f882001-02-23 17:55:21 +00005497 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5498 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005499 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5500 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 ctxt->depth--;
5502 } else if (ent->etype ==
5503 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5504 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005505 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005506 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005507 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005508 ctxt->depth--;
5509 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005510 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005511 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5512 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 }
5514 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005515 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005516 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005517 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005518 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5519 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005520 (ent->children == NULL)) {
5521 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005522 if (ctxt->replaceEntities) {
5523 /*
5524 * Prune it directly in the generated document
5525 * except for single text nodes.
5526 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005527 if (((list->type == XML_TEXT_NODE) &&
5528 (list->next == NULL)) ||
5529 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005530 list->parent = (xmlNodePtr) ent;
5531 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005532 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005534 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005535 while (list != NULL) {
5536 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005537 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005538 if (list->next == NULL)
5539 ent->last = list;
5540 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005541 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005542 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005543#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005544 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5545 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005546#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005547 }
5548 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005549 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 while (list != NULL) {
5551 list->parent = (xmlNodePtr) ent;
5552 if (list->next == NULL)
5553 ent->last = list;
5554 list = list->next;
5555 }
Owen Taylor3473f882001-02-23 17:55:21 +00005556 }
5557 } else {
5558 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005559 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005560 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005561 } else if ((ret != XML_ERR_OK) &&
5562 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else if (list != NULL) {
5565 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005566 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
5568 }
5569 }
5570 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5571 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5572 /*
5573 * Create a node.
5574 */
5575 ctxt->sax->reference(ctxt->userData, ent->name);
5576 return;
5577 } else if (ctxt->replaceEntities) {
5578 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5579 /*
5580 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005581 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005582 * In the first occurrence list contains the replacement.
5583 * progressive == 2 means we are operating on the Reader
5584 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005585 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005586 if (((list == NULL) && (ent->owner == 0)) ||
5587 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005588 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005589
5590 /*
5591 * when operating on a reader, the entities definitions
5592 * are always owning the entities subtree.
5593 if (ctxt->parseMode == XML_PARSE_READER)
5594 ent->owner = 1;
5595 */
5596
Daniel Veillard62f313b2001-07-04 19:49:14 +00005597 cur = ent->children;
5598 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005599 nw = xmlCopyNode(cur, 1);
5600 if (nw != NULL) {
5601 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005602 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005603 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005604 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005605 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005607 if (cur == ent->last) {
5608 /*
5609 * needed to detect some strange empty
5610 * node cases in the reader tests
5611 */
5612 if ((ctxt->parseMode == XML_PARSE_READER) &&
5613 (nw->type == XML_ELEMENT_NODE) &&
5614 (nw->children == NULL))
5615 nw->extra = 1;
5616
Daniel Veillard62f313b2001-07-04 19:49:14 +00005617 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005618 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 cur = cur->next;
5620 }
Daniel Veillard81273902003-09-30 00:43:48 +00005621#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005622 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005623 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005624#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005625 } else if (list == NULL) {
5626 xmlNodePtr nw = NULL, cur, next, last,
5627 firstChild = NULL;
5628 /*
5629 * Copy the entity child list and make it the new
5630 * entity child list. The goal is to make sure any
5631 * ID or REF referenced will be the one from the
5632 * document content and not the entity copy.
5633 */
5634 cur = ent->children;
5635 ent->children = NULL;
5636 last = ent->last;
5637 ent->last = NULL;
5638 while (cur != NULL) {
5639 next = cur->next;
5640 cur->next = NULL;
5641 cur->parent = NULL;
5642 nw = xmlCopyNode(cur, 1);
5643 if (nw != NULL) {
5644 nw->_private = cur->_private;
5645 if (firstChild == NULL){
5646 firstChild = cur;
5647 }
5648 xmlAddChild((xmlNodePtr) ent, nw);
5649 xmlAddChild(ctxt->node, cur);
5650 }
5651 if (cur == last)
5652 break;
5653 cur = next;
5654 }
5655 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005656#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005657 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5658 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005659#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005660 } else {
5661 /*
5662 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005663 * node with a possible previous text one which
5664 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005665 */
5666 if (ent->children->type == XML_TEXT_NODE)
5667 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5668 if ((ent->last != ent->children) &&
5669 (ent->last->type == XML_TEXT_NODE))
5670 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5671 xmlAddChildList(ctxt->node, ent->children);
5672 }
5673
Owen Taylor3473f882001-02-23 17:55:21 +00005674 /*
5675 * This is to avoid a nasty side effect, see
5676 * characters() in SAX.c
5677 */
5678 ctxt->nodemem = 0;
5679 ctxt->nodelen = 0;
5680 return;
5681 } else {
5682 /*
5683 * Probably running in SAX mode
5684 */
5685 xmlParserInputPtr input;
5686
5687 input = xmlNewEntityInputStream(ctxt, ent);
5688 xmlPushInput(ctxt, input);
5689 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5691 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005692 xmlParseTextDecl(ctxt);
5693 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5694 /*
5695 * The XML REC instructs us to stop parsing right here
5696 */
5697 ctxt->instate = XML_PARSER_EOF;
5698 return;
5699 }
5700 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005701 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5702 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005703 }
5704 }
5705 return;
5706 }
5707 }
5708 } else {
5709 val = ent->content;
5710 if (val == NULL) return;
5711 /*
5712 * inline the entity.
5713 */
5714 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5715 (!ctxt->disableSAX))
5716 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5717 }
5718 }
5719}
5720
5721/**
5722 * xmlParseEntityRef:
5723 * @ctxt: an XML parser context
5724 *
5725 * parse ENTITY references declarations
5726 *
5727 * [68] EntityRef ::= '&' Name ';'
5728 *
5729 * [ WFC: Entity Declared ]
5730 * In a document without any DTD, a document with only an internal DTD
5731 * subset which contains no parameter entity references, or a document
5732 * with "standalone='yes'", the Name given in the entity reference
5733 * must match that in an entity declaration, except that well-formed
5734 * documents need not declare any of the following entities: amp, lt,
5735 * gt, apos, quot. The declaration of a parameter entity must precede
5736 * any reference to it. Similarly, the declaration of a general entity
5737 * must precede any reference to it which appears in a default value in an
5738 * attribute-list declaration. Note that if entities are declared in the
5739 * external subset or in external parameter entities, a non-validating
5740 * processor is not obligated to read and process their declarations;
5741 * for such documents, the rule that an entity must be declared is a
5742 * well-formedness constraint only if standalone='yes'.
5743 *
5744 * [ WFC: Parsed Entity ]
5745 * An entity reference must not contain the name of an unparsed entity
5746 *
5747 * Returns the xmlEntityPtr if found, or NULL otherwise.
5748 */
5749xmlEntityPtr
5750xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005751 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005752 xmlEntityPtr ent = NULL;
5753
5754 GROW;
5755
5756 if (RAW == '&') {
5757 NEXT;
5758 name = xmlParseName(ctxt);
5759 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005760 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5761 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005762 } else {
5763 if (RAW == ';') {
5764 NEXT;
5765 /*
5766 * Ask first SAX for entity resolution, otherwise try the
5767 * predefined set.
5768 */
5769 if (ctxt->sax != NULL) {
5770 if (ctxt->sax->getEntity != NULL)
5771 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005772 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005773 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005774 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5775 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005776 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005777 }
Owen Taylor3473f882001-02-23 17:55:21 +00005778 }
5779 /*
5780 * [ WFC: Entity Declared ]
5781 * In a document without any DTD, a document with only an
5782 * internal DTD subset which contains no parameter entity
5783 * references, or a document with "standalone='yes'", the
5784 * Name given in the entity reference must match that in an
5785 * entity declaration, except that well-formed documents
5786 * need not declare any of the following entities: amp, lt,
5787 * gt, apos, quot.
5788 * The declaration of a parameter entity must precede any
5789 * reference to it.
5790 * Similarly, the declaration of a general entity must
5791 * precede any reference to it which appears in a default
5792 * value in an attribute-list declaration. Note that if
5793 * entities are declared in the external subset or in
5794 * external parameter entities, a non-validating processor
5795 * is not obligated to read and process their declarations;
5796 * for such documents, the rule that an entity must be
5797 * declared is a well-formedness constraint only if
5798 * standalone='yes'.
5799 */
5800 if (ent == NULL) {
5801 if ((ctxt->standalone == 1) ||
5802 ((ctxt->hasExternalSubset == 0) &&
5803 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005804 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005805 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005807 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005808 "Entity '%s' not defined\n", name);
5809 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005810 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005811 }
5812
5813 /*
5814 * [ WFC: Parsed Entity ]
5815 * An entity reference must not contain the name of an
5816 * unparsed entity
5817 */
5818 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005819 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005820 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005821 }
5822
5823 /*
5824 * [ WFC: No External Entity References ]
5825 * Attribute values cannot contain direct or indirect
5826 * entity references to external entities.
5827 */
5828 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5829 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005830 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5831 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005832 }
5833 /*
5834 * [ WFC: No < in Attribute Values ]
5835 * The replacement text of any entity referred to directly or
5836 * indirectly in an attribute value (other than "&lt;") must
5837 * not contain a <.
5838 */
5839 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5840 (ent != NULL) &&
5841 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5842 (ent->content != NULL) &&
5843 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005844 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005845 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847
5848 /*
5849 * Internal check, no parameter entities here ...
5850 */
5851 else {
5852 switch (ent->etype) {
5853 case XML_INTERNAL_PARAMETER_ENTITY:
5854 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005855 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5856 "Attempt to reference the parameter entity '%s'\n",
5857 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 break;
5859 default:
5860 break;
5861 }
5862 }
5863
5864 /*
5865 * [ WFC: No Recursion ]
5866 * A parsed entity must not contain a recursive reference
5867 * to itself, either directly or indirectly.
5868 * Done somewhere else
5869 */
5870
5871 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005872 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005873 }
Owen Taylor3473f882001-02-23 17:55:21 +00005874 }
5875 }
5876 return(ent);
5877}
5878
5879/**
5880 * xmlParseStringEntityRef:
5881 * @ctxt: an XML parser context
5882 * @str: a pointer to an index in the string
5883 *
5884 * parse ENTITY references declarations, but this version parses it from
5885 * a string value.
5886 *
5887 * [68] EntityRef ::= '&' Name ';'
5888 *
5889 * [ WFC: Entity Declared ]
5890 * In a document without any DTD, a document with only an internal DTD
5891 * subset which contains no parameter entity references, or a document
5892 * with "standalone='yes'", the Name given in the entity reference
5893 * must match that in an entity declaration, except that well-formed
5894 * documents need not declare any of the following entities: amp, lt,
5895 * gt, apos, quot. The declaration of a parameter entity must precede
5896 * any reference to it. Similarly, the declaration of a general entity
5897 * must precede any reference to it which appears in a default value in an
5898 * attribute-list declaration. Note that if entities are declared in the
5899 * external subset or in external parameter entities, a non-validating
5900 * processor is not obligated to read and process their declarations;
5901 * for such documents, the rule that an entity must be declared is a
5902 * well-formedness constraint only if standalone='yes'.
5903 *
5904 * [ WFC: Parsed Entity ]
5905 * An entity reference must not contain the name of an unparsed entity
5906 *
5907 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5908 * is updated to the current location in the string.
5909 */
5910xmlEntityPtr
5911xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5912 xmlChar *name;
5913 const xmlChar *ptr;
5914 xmlChar cur;
5915 xmlEntityPtr ent = NULL;
5916
5917 if ((str == NULL) || (*str == NULL))
5918 return(NULL);
5919 ptr = *str;
5920 cur = *ptr;
5921 if (cur == '&') {
5922 ptr++;
5923 cur = *ptr;
5924 name = xmlParseStringName(ctxt, &ptr);
5925 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005926 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5927 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005928 } else {
5929 if (*ptr == ';') {
5930 ptr++;
5931 /*
5932 * Ask first SAX for entity resolution, otherwise try the
5933 * predefined set.
5934 */
5935 if (ctxt->sax != NULL) {
5936 if (ctxt->sax->getEntity != NULL)
5937 ent = ctxt->sax->getEntity(ctxt->userData, name);
5938 if (ent == NULL)
5939 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005940 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005941 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005942 }
Owen Taylor3473f882001-02-23 17:55:21 +00005943 }
5944 /*
5945 * [ WFC: Entity Declared ]
5946 * In a document without any DTD, a document with only an
5947 * internal DTD subset which contains no parameter entity
5948 * references, or a document with "standalone='yes'", the
5949 * Name given in the entity reference must match that in an
5950 * entity declaration, except that well-formed documents
5951 * need not declare any of the following entities: amp, lt,
5952 * gt, apos, quot.
5953 * The declaration of a parameter entity must precede any
5954 * reference to it.
5955 * Similarly, the declaration of a general entity must
5956 * precede any reference to it which appears in a default
5957 * value in an attribute-list declaration. Note that if
5958 * entities are declared in the external subset or in
5959 * external parameter entities, a non-validating processor
5960 * is not obligated to read and process their declarations;
5961 * for such documents, the rule that an entity must be
5962 * declared is a well-formedness constraint only if
5963 * standalone='yes'.
5964 */
5965 if (ent == NULL) {
5966 if ((ctxt->standalone == 1) ||
5967 ((ctxt->hasExternalSubset == 0) &&
5968 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005970 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005971 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005972 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005973 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005974 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005976 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005977 }
5978
5979 /*
5980 * [ WFC: Parsed Entity ]
5981 * An entity reference must not contain the name of an
5982 * unparsed entity
5983 */
5984 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005985 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005986 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005987 }
5988
5989 /*
5990 * [ WFC: No External Entity References ]
5991 * Attribute values cannot contain direct or indirect
5992 * entity references to external entities.
5993 */
5994 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5995 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005996 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005997 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: No < in Attribute Values ]
6001 * The replacement text of any entity referred to directly or
6002 * indirectly in an attribute value (other than "&lt;") must
6003 * not contain a <.
6004 */
6005 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6006 (ent != NULL) &&
6007 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6008 (ent->content != NULL) &&
6009 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006010 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6011 "'<' in entity '%s' is not allowed in attributes values\n",
6012 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006013 }
6014
6015 /*
6016 * Internal check, no parameter entities here ...
6017 */
6018 else {
6019 switch (ent->etype) {
6020 case XML_INTERNAL_PARAMETER_ENTITY:
6021 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006022 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6023 "Attempt to reference the parameter entity '%s'\n",
6024 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006025 break;
6026 default:
6027 break;
6028 }
6029 }
6030
6031 /*
6032 * [ WFC: No Recursion ]
6033 * A parsed entity must not contain a recursive reference
6034 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006035 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006036 */
6037
6038 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006039 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006040 }
6041 xmlFree(name);
6042 }
6043 }
6044 *str = ptr;
6045 return(ent);
6046}
6047
6048/**
6049 * xmlParsePEReference:
6050 * @ctxt: an XML parser context
6051 *
6052 * parse PEReference declarations
6053 * The entity content is handled directly by pushing it's content as
6054 * a new input stream.
6055 *
6056 * [69] PEReference ::= '%' Name ';'
6057 *
6058 * [ WFC: No Recursion ]
6059 * A parsed entity must not contain a recursive
6060 * reference to itself, either directly or indirectly.
6061 *
6062 * [ WFC: Entity Declared ]
6063 * In a document without any DTD, a document with only an internal DTD
6064 * subset which contains no parameter entity references, or a document
6065 * with "standalone='yes'", ... ... The declaration of a parameter
6066 * entity must precede any reference to it...
6067 *
6068 * [ VC: Entity Declared ]
6069 * In a document with an external subset or external parameter entities
6070 * with "standalone='no'", ... ... The declaration of a parameter entity
6071 * must precede any reference to it...
6072 *
6073 * [ WFC: In DTD ]
6074 * Parameter-entity references may only appear in the DTD.
6075 * NOTE: misleading but this is handled.
6076 */
6077void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006078xmlParsePEReference(xmlParserCtxtPtr ctxt)
6079{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006080 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006081 xmlEntityPtr entity = NULL;
6082 xmlParserInputPtr input;
6083
6084 if (RAW == '%') {
6085 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006086 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006087 if (name == NULL) {
6088 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6089 "xmlParsePEReference: no name\n");
6090 } else {
6091 if (RAW == ';') {
6092 NEXT;
6093 if ((ctxt->sax != NULL) &&
6094 (ctxt->sax->getParameterEntity != NULL))
6095 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6096 name);
6097 if (entity == NULL) {
6098 /*
6099 * [ WFC: Entity Declared ]
6100 * In a document without any DTD, a document with only an
6101 * internal DTD subset which contains no parameter entity
6102 * references, or a document with "standalone='yes'", ...
6103 * ... The declaration of a parameter entity must precede
6104 * any reference to it...
6105 */
6106 if ((ctxt->standalone == 1) ||
6107 ((ctxt->hasExternalSubset == 0) &&
6108 (ctxt->hasPErefs == 0))) {
6109 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6110 "PEReference: %%%s; not found\n",
6111 name);
6112 } else {
6113 /*
6114 * [ VC: Entity Declared ]
6115 * In a document with an external subset or external
6116 * parameter entities with "standalone='no'", ...
6117 * ... The declaration of a parameter entity must
6118 * precede any reference to it...
6119 */
6120 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6121 "PEReference: %%%s; not found\n",
6122 name, NULL);
6123 ctxt->valid = 0;
6124 }
6125 } else {
6126 /*
6127 * Internal checking in case the entity quest barfed
6128 */
6129 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6130 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6131 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6132 "Internal: %%%s; is not a parameter entity\n",
6133 name, NULL);
6134 } else if (ctxt->input->free != deallocblankswrapper) {
6135 input =
6136 xmlNewBlanksWrapperInputStream(ctxt, entity);
6137 xmlPushInput(ctxt, input);
6138 } else {
6139 /*
6140 * TODO !!!
6141 * handle the extra spaces added before and after
6142 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6143 */
6144 input = xmlNewEntityInputStream(ctxt, entity);
6145 xmlPushInput(ctxt, input);
6146 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006147 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006148 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006149 xmlParseTextDecl(ctxt);
6150 if (ctxt->errNo ==
6151 XML_ERR_UNSUPPORTED_ENCODING) {
6152 /*
6153 * The XML REC instructs us to stop parsing
6154 * right here
6155 */
6156 ctxt->instate = XML_PARSER_EOF;
6157 return;
6158 }
6159 }
6160 }
6161 }
6162 ctxt->hasPErefs = 1;
6163 } else {
6164 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6165 }
6166 }
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
6168}
6169
6170/**
6171 * xmlParseStringPEReference:
6172 * @ctxt: an XML parser context
6173 * @str: a pointer to an index in the string
6174 *
6175 * parse PEReference declarations
6176 *
6177 * [69] PEReference ::= '%' Name ';'
6178 *
6179 * [ WFC: No Recursion ]
6180 * A parsed entity must not contain a recursive
6181 * reference to itself, either directly or indirectly.
6182 *
6183 * [ WFC: Entity Declared ]
6184 * In a document without any DTD, a document with only an internal DTD
6185 * subset which contains no parameter entity references, or a document
6186 * with "standalone='yes'", ... ... The declaration of a parameter
6187 * entity must precede any reference to it...
6188 *
6189 * [ VC: Entity Declared ]
6190 * In a document with an external subset or external parameter entities
6191 * with "standalone='no'", ... ... The declaration of a parameter entity
6192 * must precede any reference to it...
6193 *
6194 * [ WFC: In DTD ]
6195 * Parameter-entity references may only appear in the DTD.
6196 * NOTE: misleading but this is handled.
6197 *
6198 * Returns the string of the entity content.
6199 * str is updated to the current value of the index
6200 */
6201xmlEntityPtr
6202xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6203 const xmlChar *ptr;
6204 xmlChar cur;
6205 xmlChar *name;
6206 xmlEntityPtr entity = NULL;
6207
6208 if ((str == NULL) || (*str == NULL)) return(NULL);
6209 ptr = *str;
6210 cur = *ptr;
6211 if (cur == '%') {
6212 ptr++;
6213 cur = *ptr;
6214 name = xmlParseStringName(ctxt, &ptr);
6215 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006216 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6217 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006218 } else {
6219 cur = *ptr;
6220 if (cur == ';') {
6221 ptr++;
6222 cur = *ptr;
6223 if ((ctxt->sax != NULL) &&
6224 (ctxt->sax->getParameterEntity != NULL))
6225 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6226 name);
6227 if (entity == NULL) {
6228 /*
6229 * [ WFC: Entity Declared ]
6230 * In a document without any DTD, a document with only an
6231 * internal DTD subset which contains no parameter entity
6232 * references, or a document with "standalone='yes'", ...
6233 * ... The declaration of a parameter entity must precede
6234 * any reference to it...
6235 */
6236 if ((ctxt->standalone == 1) ||
6237 ((ctxt->hasExternalSubset == 0) &&
6238 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006239 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006240 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006241 } else {
6242 /*
6243 * [ VC: Entity Declared ]
6244 * In a document with an external subset or external
6245 * parameter entities with "standalone='no'", ...
6246 * ... The declaration of a parameter entity must
6247 * precede any reference to it...
6248 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006249 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6250 "PEReference: %%%s; not found\n",
6251 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006252 ctxt->valid = 0;
6253 }
6254 } else {
6255 /*
6256 * Internal checking in case the entity quest barfed
6257 */
6258 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6259 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006260 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6261 "%%%s; is not a parameter entity\n",
6262 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 }
6264 }
6265 ctxt->hasPErefs = 1;
6266 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006267 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 }
6269 xmlFree(name);
6270 }
6271 }
6272 *str = ptr;
6273 return(entity);
6274}
6275
6276/**
6277 * xmlParseDocTypeDecl:
6278 * @ctxt: an XML parser context
6279 *
6280 * parse a DOCTYPE declaration
6281 *
6282 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6283 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6284 *
6285 * [ VC: Root Element Type ]
6286 * The Name in the document type declaration must match the element
6287 * type of the root element.
6288 */
6289
6290void
6291xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006292 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006293 xmlChar *ExternalID = NULL;
6294 xmlChar *URI = NULL;
6295
6296 /*
6297 * We know that '<!DOCTYPE' has been detected.
6298 */
6299 SKIP(9);
6300
6301 SKIP_BLANKS;
6302
6303 /*
6304 * Parse the DOCTYPE name.
6305 */
6306 name = xmlParseName(ctxt);
6307 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006308 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6309 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006310 }
6311 ctxt->intSubName = name;
6312
6313 SKIP_BLANKS;
6314
6315 /*
6316 * Check for SystemID and ExternalID
6317 */
6318 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6319
6320 if ((URI != NULL) || (ExternalID != NULL)) {
6321 ctxt->hasExternalSubset = 1;
6322 }
6323 ctxt->extSubURI = URI;
6324 ctxt->extSubSystem = ExternalID;
6325
6326 SKIP_BLANKS;
6327
6328 /*
6329 * Create and update the internal subset.
6330 */
6331 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6332 (!ctxt->disableSAX))
6333 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6334
6335 /*
6336 * Is there any internal subset declarations ?
6337 * they are handled separately in xmlParseInternalSubset()
6338 */
6339 if (RAW == '[')
6340 return;
6341
6342 /*
6343 * We should be at the end of the DOCTYPE declaration.
6344 */
6345 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006346 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006347 }
6348 NEXT;
6349}
6350
6351/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006352 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006353 * @ctxt: an XML parser context
6354 *
6355 * parse the internal subset declaration
6356 *
6357 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6358 */
6359
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006360static void
Owen Taylor3473f882001-02-23 17:55:21 +00006361xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6362 /*
6363 * Is there any DTD definition ?
6364 */
6365 if (RAW == '[') {
6366 ctxt->instate = XML_PARSER_DTD;
6367 NEXT;
6368 /*
6369 * Parse the succession of Markup declarations and
6370 * PEReferences.
6371 * Subsequence (markupdecl | PEReference | S)*
6372 */
6373 while (RAW != ']') {
6374 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006375 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006376
6377 SKIP_BLANKS;
6378 xmlParseMarkupDecl(ctxt);
6379 xmlParsePEReference(ctxt);
6380
6381 /*
6382 * Pop-up of finished entities.
6383 */
6384 while ((RAW == 0) && (ctxt->inputNr > 1))
6385 xmlPopInput(ctxt);
6386
6387 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006388 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006389 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006390 break;
6391 }
6392 }
6393 if (RAW == ']') {
6394 NEXT;
6395 SKIP_BLANKS;
6396 }
6397 }
6398
6399 /*
6400 * We should be at the end of the DOCTYPE declaration.
6401 */
6402 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006403 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006404 }
6405 NEXT;
6406}
6407
Daniel Veillard81273902003-09-30 00:43:48 +00006408#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006409/**
6410 * xmlParseAttribute:
6411 * @ctxt: an XML parser context
6412 * @value: a xmlChar ** used to store the value of the attribute
6413 *
6414 * parse an attribute
6415 *
6416 * [41] Attribute ::= Name Eq AttValue
6417 *
6418 * [ WFC: No External Entity References ]
6419 * Attribute values cannot contain direct or indirect entity references
6420 * to external entities.
6421 *
6422 * [ WFC: No < in Attribute Values ]
6423 * The replacement text of any entity referred to directly or indirectly in
6424 * an attribute value (other than "&lt;") must not contain a <.
6425 *
6426 * [ VC: Attribute Value Type ]
6427 * The attribute must have been declared; the value must be of the type
6428 * declared for it.
6429 *
6430 * [25] Eq ::= S? '=' S?
6431 *
6432 * With namespace:
6433 *
6434 * [NS 11] Attribute ::= QName Eq AttValue
6435 *
6436 * Also the case QName == xmlns:??? is handled independently as a namespace
6437 * definition.
6438 *
6439 * Returns the attribute name, and the value in *value.
6440 */
6441
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006442const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006443xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006444 const xmlChar *name;
6445 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006446
6447 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006449 name = xmlParseName(ctxt);
6450 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006452 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006453 return(NULL);
6454 }
6455
6456 /*
6457 * read the value
6458 */
6459 SKIP_BLANKS;
6460 if (RAW == '=') {
6461 NEXT;
6462 SKIP_BLANKS;
6463 val = xmlParseAttValue(ctxt);
6464 ctxt->instate = XML_PARSER_CONTENT;
6465 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006466 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006467 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006468 return(NULL);
6469 }
6470
6471 /*
6472 * Check that xml:lang conforms to the specification
6473 * No more registered as an error, just generate a warning now
6474 * since this was deprecated in XML second edition
6475 */
6476 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6477 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006478 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6479 "Malformed value for xml:lang : %s\n",
6480 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 }
6482 }
6483
6484 /*
6485 * Check that xml:space conforms to the specification
6486 */
6487 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6488 if (xmlStrEqual(val, BAD_CAST "default"))
6489 *(ctxt->space) = 0;
6490 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6491 *(ctxt->space) = 1;
6492 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006493 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006494"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006495 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006496 }
6497 }
6498
6499 *value = val;
6500 return(name);
6501}
6502
6503/**
6504 * xmlParseStartTag:
6505 * @ctxt: an XML parser context
6506 *
6507 * parse a start of tag either for rule element or
6508 * EmptyElement. In both case we don't parse the tag closing chars.
6509 *
6510 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6511 *
6512 * [ WFC: Unique Att Spec ]
6513 * No attribute name may appear more than once in the same start-tag or
6514 * empty-element tag.
6515 *
6516 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6517 *
6518 * [ WFC: Unique Att Spec ]
6519 * No attribute name may appear more than once in the same start-tag or
6520 * empty-element tag.
6521 *
6522 * With namespace:
6523 *
6524 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6525 *
6526 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6527 *
6528 * Returns the element name parsed
6529 */
6530
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006531const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006532xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006533 const xmlChar *name;
6534 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006536 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006537 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006538 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006539 int i;
6540
6541 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006542 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006543
6544 name = xmlParseName(ctxt);
6545 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006546 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006547 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006548 return(NULL);
6549 }
6550
6551 /*
6552 * Now parse the attributes, it ends up with the ending
6553 *
6554 * (S Attribute)* S?
6555 */
6556 SKIP_BLANKS;
6557 GROW;
6558
Daniel Veillard21a0f912001-02-25 19:54:14 +00006559 while ((RAW != '>') &&
6560 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006561 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006562 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006563 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006564
6565 attname = xmlParseAttribute(ctxt, &attvalue);
6566 if ((attname != NULL) && (attvalue != NULL)) {
6567 /*
6568 * [ WFC: Unique Att Spec ]
6569 * No attribute name may appear more than once in the same
6570 * start-tag or empty-element tag.
6571 */
6572 for (i = 0; i < nbatts;i += 2) {
6573 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006574 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006575 xmlFree(attvalue);
6576 goto failed;
6577 }
6578 }
Owen Taylor3473f882001-02-23 17:55:21 +00006579 /*
6580 * Add the pair to atts
6581 */
6582 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006583 maxatts = 22; /* allow for 10 attrs by default */
6584 atts = (const xmlChar **)
6585 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006586 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006587 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006588 if (attvalue != NULL)
6589 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006590 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006591 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006592 ctxt->atts = atts;
6593 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006595 const xmlChar **n;
6596
Owen Taylor3473f882001-02-23 17:55:21 +00006597 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006598 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006599 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006600 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006601 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006602 if (attvalue != NULL)
6603 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006604 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006605 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006606 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006607 ctxt->atts = atts;
6608 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006609 }
6610 atts[nbatts++] = attname;
6611 atts[nbatts++] = attvalue;
6612 atts[nbatts] = NULL;
6613 atts[nbatts + 1] = NULL;
6614 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006615 if (attvalue != NULL)
6616 xmlFree(attvalue);
6617 }
6618
6619failed:
6620
Daniel Veillard3772de32002-12-17 10:31:45 +00006621 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006622 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6623 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006624 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6626 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006629 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6630 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006631 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6632 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006633 break;
6634 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006635 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006636 GROW;
6637 }
6638
6639 /*
6640 * SAX: Start of Element !
6641 */
6642 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006643 (!ctxt->disableSAX)) {
6644 if (nbatts > 0)
6645 ctxt->sax->startElement(ctxt->userData, name, atts);
6646 else
6647 ctxt->sax->startElement(ctxt->userData, name, NULL);
6648 }
Owen Taylor3473f882001-02-23 17:55:21 +00006649
6650 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006651 /* Free only the content strings */
6652 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006653 if (atts[i] != NULL)
6654 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006655 }
6656 return(name);
6657}
6658
6659/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006660 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006661 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006662 * @line: line of the start tag
6663 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006664 *
6665 * parse an end of tag
6666 *
6667 * [42] ETag ::= '</' Name S? '>'
6668 *
6669 * With namespace
6670 *
6671 * [NS 9] ETag ::= '</' QName S? '>'
6672 */
6673
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006674static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006675xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006676 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006677
6678 GROW;
6679 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006680 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006681 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006682 return;
6683 }
6684 SKIP(2);
6685
Daniel Veillard46de64e2002-05-29 08:21:33 +00006686 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006687
6688 /*
6689 * We should definitely be at the ending "S? '>'" part
6690 */
6691 GROW;
6692 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006693 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006694 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006695 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006696 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006697
6698 /*
6699 * [ WFC: Element Type Match ]
6700 * The Name in an element's end-tag must match the element type in the
6701 * start-tag.
6702 *
6703 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006704 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006705 if (name == NULL) name = BAD_CAST "unparseable";
6706 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006707 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006708 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006709 }
6710
6711 /*
6712 * SAX: End of Tag
6713 */
6714 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6715 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006716 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006717
Daniel Veillarde57ec792003-09-10 10:50:59 +00006718 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006719 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006720 return;
6721}
6722
6723/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006724 * xmlParseEndTag:
6725 * @ctxt: an XML parser context
6726 *
6727 * parse an end of tag
6728 *
6729 * [42] ETag ::= '</' Name S? '>'
6730 *
6731 * With namespace
6732 *
6733 * [NS 9] ETag ::= '</' QName S? '>'
6734 */
6735
6736void
6737xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006738 xmlParseEndTag1(ctxt, 0);
6739}
Daniel Veillard81273902003-09-30 00:43:48 +00006740#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006741
6742/************************************************************************
6743 * *
6744 * SAX 2 specific operations *
6745 * *
6746 ************************************************************************/
6747
6748static const xmlChar *
6749xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6750 int len = 0, l;
6751 int c;
6752 int count = 0;
6753
6754 /*
6755 * Handler for more complex cases
6756 */
6757 GROW;
6758 c = CUR_CHAR(l);
6759 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006760 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006761 return(NULL);
6762 }
6763
6764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006766 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006767 (IS_COMBINING(c)) ||
6768 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006769 if (count++ > 100) {
6770 count = 0;
6771 GROW;
6772 }
6773 len += l;
6774 NEXTL(l);
6775 c = CUR_CHAR(l);
6776 }
6777 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6778}
6779
6780/*
6781 * xmlGetNamespace:
6782 * @ctxt: an XML parser context
6783 * @prefix: the prefix to lookup
6784 *
6785 * Lookup the namespace name for the @prefix (which ca be NULL)
6786 * The prefix must come from the @ctxt->dict dictionnary
6787 *
6788 * Returns the namespace name or NULL if not bound
6789 */
6790static const xmlChar *
6791xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6792 int i;
6793
Daniel Veillarde57ec792003-09-10 10:50:59 +00006794 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006795 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006796 if (ctxt->nsTab[i] == prefix) {
6797 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6798 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006799 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006800 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006801 return(NULL);
6802}
6803
6804/**
6805 * xmlParseNCName:
6806 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006807 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006808 *
6809 * parse an XML name.
6810 *
6811 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6812 * CombiningChar | Extender
6813 *
6814 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6815 *
6816 * Returns the Name parsed or NULL
6817 */
6818
6819static const xmlChar *
6820xmlParseNCName(xmlParserCtxtPtr ctxt) {
6821 const xmlChar *in;
6822 const xmlChar *ret;
6823 int count = 0;
6824
6825 /*
6826 * Accelerator for simple ASCII names
6827 */
6828 in = ctxt->input->cur;
6829 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6830 ((*in >= 0x41) && (*in <= 0x5A)) ||
6831 (*in == '_')) {
6832 in++;
6833 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6834 ((*in >= 0x41) && (*in <= 0x5A)) ||
6835 ((*in >= 0x30) && (*in <= 0x39)) ||
6836 (*in == '_') || (*in == '-') ||
6837 (*in == '.'))
6838 in++;
6839 if ((*in > 0) && (*in < 0x80)) {
6840 count = in - ctxt->input->cur;
6841 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6842 ctxt->input->cur = in;
6843 ctxt->nbChars += count;
6844 ctxt->input->col += count;
6845 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006846 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006847 }
6848 return(ret);
6849 }
6850 }
6851 return(xmlParseNCNameComplex(ctxt));
6852}
6853
6854/**
6855 * xmlParseQName:
6856 * @ctxt: an XML parser context
6857 * @prefix: pointer to store the prefix part
6858 *
6859 * parse an XML Namespace QName
6860 *
6861 * [6] QName ::= (Prefix ':')? LocalPart
6862 * [7] Prefix ::= NCName
6863 * [8] LocalPart ::= NCName
6864 *
6865 * Returns the Name parsed or NULL
6866 */
6867
6868static const xmlChar *
6869xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6870 const xmlChar *l, *p;
6871
6872 GROW;
6873
6874 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006875 if (l == NULL) {
6876 if (CUR == ':') {
6877 l = xmlParseName(ctxt);
6878 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006879 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6880 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006881 *prefix = NULL;
6882 return(l);
6883 }
6884 }
6885 return(NULL);
6886 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006887 if (CUR == ':') {
6888 NEXT;
6889 p = l;
6890 l = xmlParseNCName(ctxt);
6891 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006892 xmlChar *tmp;
6893
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006894 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6895 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006896 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6897 p = xmlDictLookup(ctxt->dict, tmp, -1);
6898 if (tmp != NULL) xmlFree(tmp);
6899 *prefix = NULL;
6900 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006901 }
6902 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006903 xmlChar *tmp;
6904
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006905 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6906 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006907 NEXT;
6908 tmp = (xmlChar *) xmlParseName(ctxt);
6909 if (tmp != NULL) {
6910 tmp = xmlBuildQName(tmp, l, NULL, 0);
6911 l = xmlDictLookup(ctxt->dict, tmp, -1);
6912 if (tmp != NULL) xmlFree(tmp);
6913 *prefix = p;
6914 return(l);
6915 }
6916 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6917 l = xmlDictLookup(ctxt->dict, tmp, -1);
6918 if (tmp != NULL) xmlFree(tmp);
6919 *prefix = p;
6920 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006921 }
6922 *prefix = p;
6923 } else
6924 *prefix = NULL;
6925 return(l);
6926}
6927
6928/**
6929 * xmlParseQNameAndCompare:
6930 * @ctxt: an XML parser context
6931 * @name: the localname
6932 * @prefix: the prefix, if any.
6933 *
6934 * parse an XML name and compares for match
6935 * (specialized for endtag parsing)
6936 *
6937 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6938 * and the name for mismatch
6939 */
6940
6941static const xmlChar *
6942xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6943 xmlChar const *prefix) {
6944 const xmlChar *cmp = name;
6945 const xmlChar *in;
6946 const xmlChar *ret;
6947 const xmlChar *prefix2;
6948
6949 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6950
6951 GROW;
6952 in = ctxt->input->cur;
6953
6954 cmp = prefix;
6955 while (*in != 0 && *in == *cmp) {
6956 ++in;
6957 ++cmp;
6958 }
6959 if ((*cmp == 0) && (*in == ':')) {
6960 in++;
6961 cmp = name;
6962 while (*in != 0 && *in == *cmp) {
6963 ++in;
6964 ++cmp;
6965 }
William M. Brack76e95df2003-10-18 16:20:14 +00006966 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006967 /* success */
6968 ctxt->input->cur = in;
6969 return((const xmlChar*) 1);
6970 }
6971 }
6972 /*
6973 * all strings coms from the dictionary, equality can be done directly
6974 */
6975 ret = xmlParseQName (ctxt, &prefix2);
6976 if ((ret == name) && (prefix == prefix2))
6977 return((const xmlChar*) 1);
6978 return ret;
6979}
6980
6981/**
6982 * xmlParseAttValueInternal:
6983 * @ctxt: an XML parser context
6984 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006985 * @alloc: whether the attribute was reallocated as a new string
6986 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006987 *
6988 * parse a value for an attribute.
6989 * NOTE: if no normalization is needed, the routine will return pointers
6990 * directly from the data buffer.
6991 *
6992 * 3.3.3 Attribute-Value Normalization:
6993 * Before the value of an attribute is passed to the application or
6994 * checked for validity, the XML processor must normalize it as follows:
6995 * - a character reference is processed by appending the referenced
6996 * character to the attribute value
6997 * - an entity reference is processed by recursively processing the
6998 * replacement text of the entity
6999 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7000 * appending #x20 to the normalized value, except that only a single
7001 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7002 * parsed entity or the literal entity value of an internal parsed entity
7003 * - other characters are processed by appending them to the normalized value
7004 * If the declared value is not CDATA, then the XML processor must further
7005 * process the normalized attribute value by discarding any leading and
7006 * trailing space (#x20) characters, and by replacing sequences of space
7007 * (#x20) characters by a single space (#x20) character.
7008 * All attributes for which no declaration has been read should be treated
7009 * by a non-validating parser as if declared CDATA.
7010 *
7011 * Returns the AttValue parsed or NULL. The value has to be freed by the
7012 * caller if it was copied, this can be detected by val[*len] == 0.
7013 */
7014
7015static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007016xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7017 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007018{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007019 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007020 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007021 xmlChar *ret = NULL;
7022
7023 GROW;
7024 in = (xmlChar *) CUR_PTR;
7025 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007027 return (NULL);
7028 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007029 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007030
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007031 /*
7032 * try to handle in this routine the most common case where no
7033 * allocation of a new string is required and where content is
7034 * pure ASCII.
7035 */
7036 limit = *in++;
7037 end = ctxt->input->end;
7038 start = in;
7039 if (in >= end) {
7040 const xmlChar *oldbase = ctxt->input->base;
7041 GROW;
7042 if (oldbase != ctxt->input->base) {
7043 long delta = ctxt->input->base - oldbase;
7044 start = start + delta;
7045 in = in + delta;
7046 }
7047 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007048 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007049 if (normalize) {
7050 /*
7051 * Skip any leading spaces
7052 */
7053 while ((in < end) && (*in != limit) &&
7054 ((*in == 0x20) || (*in == 0x9) ||
7055 (*in == 0xA) || (*in == 0xD))) {
7056 in++;
7057 start = in;
7058 if (in >= end) {
7059 const xmlChar *oldbase = ctxt->input->base;
7060 GROW;
7061 if (oldbase != ctxt->input->base) {
7062 long delta = ctxt->input->base - oldbase;
7063 start = start + delta;
7064 in = in + delta;
7065 }
7066 end = ctxt->input->end;
7067 }
7068 }
7069 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7070 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7071 if ((*in++ == 0x20) && (*in == 0x20)) break;
7072 if (in >= end) {
7073 const xmlChar *oldbase = ctxt->input->base;
7074 GROW;
7075 if (oldbase != ctxt->input->base) {
7076 long delta = ctxt->input->base - oldbase;
7077 start = start + delta;
7078 in = in + delta;
7079 }
7080 end = ctxt->input->end;
7081 }
7082 }
7083 last = in;
7084 /*
7085 * skip the trailing blanks
7086 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007087 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007088 while ((in < end) && (*in != limit) &&
7089 ((*in == 0x20) || (*in == 0x9) ||
7090 (*in == 0xA) || (*in == 0xD))) {
7091 in++;
7092 if (in >= end) {
7093 const xmlChar *oldbase = ctxt->input->base;
7094 GROW;
7095 if (oldbase != ctxt->input->base) {
7096 long delta = ctxt->input->base - oldbase;
7097 start = start + delta;
7098 in = in + delta;
7099 last = last + delta;
7100 }
7101 end = ctxt->input->end;
7102 }
7103 }
7104 if (*in != limit) goto need_complex;
7105 } else {
7106 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7108 in++;
7109 if (in >= end) {
7110 const xmlChar *oldbase = ctxt->input->base;
7111 GROW;
7112 if (oldbase != ctxt->input->base) {
7113 long delta = ctxt->input->base - oldbase;
7114 start = start + delta;
7115 in = in + delta;
7116 }
7117 end = ctxt->input->end;
7118 }
7119 }
7120 last = in;
7121 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007122 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007123 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007124 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007125 *len = last - start;
7126 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007127 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007128 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007129 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007130 }
7131 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007132 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007133 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007134need_complex:
7135 if (alloc) *alloc = 1;
7136 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007137}
7138
7139/**
7140 * xmlParseAttribute2:
7141 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007142 * @pref: the element prefix
7143 * @elem: the element name
7144 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007145 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007146 * @len: an int * to save the length of the attribute
7147 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007148 *
7149 * parse an attribute in the new SAX2 framework.
7150 *
7151 * Returns the attribute name, and the value in *value, .
7152 */
7153
7154static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007155xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7156 const xmlChar *pref, const xmlChar *elem,
7157 const xmlChar **prefix, xmlChar **value,
7158 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007159 const xmlChar *name;
7160 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007161 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007162
7163 *value = NULL;
7164 GROW;
7165 name = xmlParseQName(ctxt, prefix);
7166 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007167 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7168 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007169 return(NULL);
7170 }
7171
7172 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007173 * get the type if needed
7174 */
7175 if (ctxt->attsSpecial != NULL) {
7176 int type;
7177
7178 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7179 pref, elem, *prefix, name);
7180 if (type != 0) normalize = 1;
7181 }
7182
7183 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184 * read the value
7185 */
7186 SKIP_BLANKS;
7187 if (RAW == '=') {
7188 NEXT;
7189 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007190 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007191 ctxt->instate = XML_PARSER_CONTENT;
7192 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007193 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007195 return(NULL);
7196 }
7197
7198 /*
7199 * Check that xml:lang conforms to the specification
7200 * No more registered as an error, just generate a warning now
7201 * since this was deprecated in XML second edition
7202 */
7203 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7204 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007205 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7206 "Malformed value for xml:lang : %s\n",
7207 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007208 }
7209 }
7210
7211 /*
7212 * Check that xml:space conforms to the specification
7213 */
7214 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7215 if (xmlStrEqual(val, BAD_CAST "default"))
7216 *(ctxt->space) = 0;
7217 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7218 *(ctxt->space) = 1;
7219 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007220 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007221"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7222 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223 }
7224 }
7225
7226 *value = val;
7227 return(name);
7228}
7229
7230/**
7231 * xmlParseStartTag2:
7232 * @ctxt: an XML parser context
7233 *
7234 * parse a start of tag either for rule element or
7235 * EmptyElement. In both case we don't parse the tag closing chars.
7236 * This routine is called when running SAX2 parsing
7237 *
7238 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7239 *
7240 * [ WFC: Unique Att Spec ]
7241 * No attribute name may appear more than once in the same start-tag or
7242 * empty-element tag.
7243 *
7244 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7245 *
7246 * [ WFC: Unique Att Spec ]
7247 * No attribute name may appear more than once in the same start-tag or
7248 * empty-element tag.
7249 *
7250 * With namespace:
7251 *
7252 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7253 *
7254 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7255 *
7256 * Returns the element name parsed
7257 */
7258
7259static const xmlChar *
7260xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007261 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007262 const xmlChar *localname;
7263 const xmlChar *prefix;
7264 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007265 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 const xmlChar *nsname;
7267 xmlChar *attvalue;
7268 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007269 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007270 int nratts, nbatts, nbdef;
7271 int i, j, nbNs, attval;
7272 const xmlChar *base;
7273 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274
7275 if (RAW != '<') return(NULL);
7276 NEXT1;
7277
7278 /*
7279 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7280 * point since the attribute values may be stored as pointers to
7281 * the buffer and calling SHRINK would destroy them !
7282 * The Shrinking is only possible once the full set of attribute
7283 * callbacks have been done.
7284 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007285reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007286 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007287 base = ctxt->input->base;
7288 cur = ctxt->input->cur - ctxt->input->base;
7289 nbatts = 0;
7290 nratts = 0;
7291 nbdef = 0;
7292 nbNs = 0;
7293 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007294
7295 localname = xmlParseQName(ctxt, &prefix);
7296 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007297 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7298 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007299 return(NULL);
7300 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007301 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007302
7303 /*
7304 * Now parse the attributes, it ends up with the ending
7305 *
7306 * (S Attribute)* S?
7307 */
7308 SKIP_BLANKS;
7309 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007310 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311
7312 while ((RAW != '>') &&
7313 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007314 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 const xmlChar *q = CUR_PTR;
7316 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007317 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007318
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007319 attname = xmlParseAttribute2(ctxt, prefix, localname,
7320 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007321 if ((attname != NULL) && (attvalue != NULL)) {
7322 if (len < 0) len = xmlStrlen(attvalue);
7323 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007324 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7325 xmlURIPtr uri;
7326
7327 if (*URL != 0) {
7328 uri = xmlParseURI((const char *) URL);
7329 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007330 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7331 "xmlns: %s not a valid URI\n",
7332 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007333 } else {
7334 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007335 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7336 "xmlns: URI %s is not absolute\n",
7337 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007338 }
7339 xmlFreeURI(uri);
7340 }
7341 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007343 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007344 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007345 for (j = 1;j <= nbNs;j++)
7346 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7347 break;
7348 if (j <= nbNs)
7349 xmlErrAttributeDup(ctxt, NULL, attname);
7350 else
7351 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007352 if (alloc != 0) xmlFree(attvalue);
7353 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007354 continue;
7355 }
7356 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007357 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7358 xmlURIPtr uri;
7359
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007360 if (attname == ctxt->str_xml) {
7361 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7363 "xml namespace prefix mapped to wrong URI\n",
7364 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007365 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007366 /*
7367 * Do not keep a namespace definition node
7368 */
7369 if (alloc != 0) xmlFree(attvalue);
7370 SKIP_BLANKS;
7371 continue;
7372 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007373 uri = xmlParseURI((const char *) URL);
7374 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007375 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7376 "xmlns:%s: '%s' is not a valid URI\n",
7377 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007378 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007379 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007380 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7381 "xmlns:%s: URI %s is not absolute\n",
7382 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007383 }
7384 xmlFreeURI(uri);
7385 }
7386
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007388 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007390 for (j = 1;j <= nbNs;j++)
7391 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7392 break;
7393 if (j <= nbNs)
7394 xmlErrAttributeDup(ctxt, aprefix, attname);
7395 else
7396 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007397 if (alloc != 0) xmlFree(attvalue);
7398 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007399 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 continue;
7401 }
7402
7403 /*
7404 * Add the pair to atts
7405 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007406 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7407 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007408 if (attvalue[len] == 0)
7409 xmlFree(attvalue);
7410 goto failed;
7411 }
7412 maxatts = ctxt->maxatts;
7413 atts = ctxt->atts;
7414 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007415 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 atts[nbatts++] = attname;
7417 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007418 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007419 atts[nbatts++] = attvalue;
7420 attvalue += len;
7421 atts[nbatts++] = attvalue;
7422 /*
7423 * tag if some deallocation is needed
7424 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007425 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 } else {
7427 if ((attvalue != NULL) && (attvalue[len] == 0))
7428 xmlFree(attvalue);
7429 }
7430
7431failed:
7432
7433 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007434 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7436 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007437 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7439 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 }
7441 SKIP_BLANKS;
7442 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7443 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007444 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007445 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007446 break;
7447 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007448 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007449 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450 }
7451
Daniel Veillard0fb18932003-09-07 09:14:37 +00007452 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007453 * The attributes defaulting
7454 */
7455 if (ctxt->attsDefault != NULL) {
7456 xmlDefAttrsPtr defaults;
7457
7458 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7459 if (defaults != NULL) {
7460 for (i = 0;i < defaults->nbAttrs;i++) {
7461 attname = defaults->values[4 * i];
7462 aprefix = defaults->values[4 * i + 1];
7463
7464 /*
7465 * special work for namespaces defaulted defs
7466 */
7467 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7468 /*
7469 * check that it's not a defined namespace
7470 */
7471 for (j = 1;j <= nbNs;j++)
7472 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7473 break;
7474 if (j <= nbNs) continue;
7475
7476 nsname = xmlGetNamespace(ctxt, NULL);
7477 if (nsname != defaults->values[4 * i + 2]) {
7478 if (nsPush(ctxt, NULL,
7479 defaults->values[4 * i + 2]) > 0)
7480 nbNs++;
7481 }
7482 } else if (aprefix == ctxt->str_xmlns) {
7483 /*
7484 * check that it's not a defined namespace
7485 */
7486 for (j = 1;j <= nbNs;j++)
7487 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7488 break;
7489 if (j <= nbNs) continue;
7490
7491 nsname = xmlGetNamespace(ctxt, attname);
7492 if (nsname != defaults->values[2]) {
7493 if (nsPush(ctxt, attname,
7494 defaults->values[4 * i + 2]) > 0)
7495 nbNs++;
7496 }
7497 } else {
7498 /*
7499 * check that it's not a defined attribute
7500 */
7501 for (j = 0;j < nbatts;j+=5) {
7502 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7503 break;
7504 }
7505 if (j < nbatts) continue;
7506
7507 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7508 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007509 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007510 }
7511 maxatts = ctxt->maxatts;
7512 atts = ctxt->atts;
7513 }
7514 atts[nbatts++] = attname;
7515 atts[nbatts++] = aprefix;
7516 if (aprefix == NULL)
7517 atts[nbatts++] = NULL;
7518 else
7519 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7520 atts[nbatts++] = defaults->values[4 * i + 2];
7521 atts[nbatts++] = defaults->values[4 * i + 3];
7522 nbdef++;
7523 }
7524 }
7525 }
7526 }
7527
Daniel Veillarde70c8772003-11-25 07:21:18 +00007528 /*
7529 * The attributes checkings
7530 */
7531 for (i = 0; i < nbatts;i += 5) {
7532 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7533 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7534 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7535 "Namespace prefix %s for %s on %s is not defined\n",
7536 atts[i + 1], atts[i], localname);
7537 }
7538 atts[i + 2] = nsname;
7539 /*
7540 * [ WFC: Unique Att Spec ]
7541 * No attribute name may appear more than once in the same
7542 * start-tag or empty-element tag.
7543 * As extended by the Namespace in XML REC.
7544 */
7545 for (j = 0; j < i;j += 5) {
7546 if (atts[i] == atts[j]) {
7547 if (atts[i+1] == atts[j+1]) {
7548 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7549 break;
7550 }
7551 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7552 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7553 "Namespaced Attribute %s in '%s' redefined\n",
7554 atts[i], nsname, NULL);
7555 break;
7556 }
7557 }
7558 }
7559 }
7560
Daniel Veillarde57ec792003-09-10 10:50:59 +00007561 nsname = xmlGetNamespace(ctxt, prefix);
7562 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007563 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7564 "Namespace prefix %s on %s is not defined\n",
7565 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007566 }
7567 *pref = prefix;
7568 *URI = nsname;
7569
7570 /*
7571 * SAX: Start of Element !
7572 */
7573 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7574 (!ctxt->disableSAX)) {
7575 if (nbNs > 0)
7576 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7577 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7578 nbatts / 5, nbdef, atts);
7579 else
7580 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7581 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7582 }
7583
7584 /*
7585 * Free up attribute allocated strings if needed
7586 */
7587 if (attval != 0) {
7588 for (i = 3,j = 0; j < nratts;i += 5,j++)
7589 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7590 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007591 }
7592
7593 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007594
7595base_changed:
7596 /*
7597 * the attribute strings are valid iif the base didn't changed
7598 */
7599 if (attval != 0) {
7600 for (i = 3,j = 0; j < nratts;i += 5,j++)
7601 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7602 xmlFree((xmlChar *) atts[i]);
7603 }
7604 ctxt->input->cur = ctxt->input->base + cur;
7605 if (ctxt->wellFormed == 1) {
7606 goto reparse;
7607 }
7608 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609}
7610
7611/**
7612 * xmlParseEndTag2:
7613 * @ctxt: an XML parser context
7614 * @line: line of the start tag
7615 * @nsNr: number of namespaces on the start tag
7616 *
7617 * parse an end of tag
7618 *
7619 * [42] ETag ::= '</' Name S? '>'
7620 *
7621 * With namespace
7622 *
7623 * [NS 9] ETag ::= '</' QName S? '>'
7624 */
7625
7626static void
7627xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007628 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007629 const xmlChar *name;
7630
7631 GROW;
7632 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007633 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007634 return;
7635 }
7636 SKIP(2);
7637
Daniel Veillard453e71b2004-04-20 17:44:46 +00007638 if ((tlen > 0) && (strncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007639 if (ctxt->input->cur[tlen] == '>') {
7640 ctxt->input->cur += tlen + 1;
7641 goto done;
7642 }
7643 ctxt->input->cur += tlen;
7644 name = (xmlChar*)1;
7645 } else {
7646 if (prefix == NULL)
7647 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7648 else
7649 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7650 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651
7652 /*
7653 * We should definitely be at the ending "S? '>'" part
7654 */
7655 GROW;
7656 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007657 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007658 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007659 } else
7660 NEXT1;
7661
7662 /*
7663 * [ WFC: Element Type Match ]
7664 * The Name in an element's end-tag must match the element type in the
7665 * start-tag.
7666 *
7667 */
7668 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007669 if (name == NULL) name = BAD_CAST "unparseable";
7670 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007671 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007672 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007673 }
7674
7675 /*
7676 * SAX: End of Tag
7677 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007678done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7680 (!ctxt->disableSAX))
7681 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7682
Daniel Veillard0fb18932003-09-07 09:14:37 +00007683 spacePop(ctxt);
7684 if (nsNr != 0)
7685 nsPop(ctxt, nsNr);
7686 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007687}
7688
7689/**
Owen Taylor3473f882001-02-23 17:55:21 +00007690 * xmlParseCDSect:
7691 * @ctxt: an XML parser context
7692 *
7693 * Parse escaped pure raw content.
7694 *
7695 * [18] CDSect ::= CDStart CData CDEnd
7696 *
7697 * [19] CDStart ::= '<![CDATA['
7698 *
7699 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7700 *
7701 * [21] CDEnd ::= ']]>'
7702 */
7703void
7704xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7705 xmlChar *buf = NULL;
7706 int len = 0;
7707 int size = XML_PARSER_BUFFER_SIZE;
7708 int r, rl;
7709 int s, sl;
7710 int cur, l;
7711 int count = 0;
7712
Daniel Veillard8f597c32003-10-06 08:19:27 +00007713 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007714 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007715 SKIP(9);
7716 } else
7717 return;
7718
7719 ctxt->instate = XML_PARSER_CDATA_SECTION;
7720 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007721 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007722 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007723 ctxt->instate = XML_PARSER_CONTENT;
7724 return;
7725 }
7726 NEXTL(rl);
7727 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007728 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007729 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007730 ctxt->instate = XML_PARSER_CONTENT;
7731 return;
7732 }
7733 NEXTL(sl);
7734 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007735 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007736 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007737 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007738 return;
7739 }
William M. Brack871611b2003-10-18 04:53:14 +00007740 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007741 ((r != ']') || (s != ']') || (cur != '>'))) {
7742 if (len + 5 >= size) {
7743 size *= 2;
7744 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7745 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007747 return;
7748 }
7749 }
7750 COPY_BUF(rl,buf,len,r);
7751 r = s;
7752 rl = sl;
7753 s = cur;
7754 sl = l;
7755 count++;
7756 if (count > 50) {
7757 GROW;
7758 count = 0;
7759 }
7760 NEXTL(l);
7761 cur = CUR_CHAR(l);
7762 }
7763 buf[len] = 0;
7764 ctxt->instate = XML_PARSER_CONTENT;
7765 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007766 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007767 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007768 xmlFree(buf);
7769 return;
7770 }
7771 NEXTL(l);
7772
7773 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007774 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007775 */
7776 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7777 if (ctxt->sax->cdataBlock != NULL)
7778 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007779 else if (ctxt->sax->characters != NULL)
7780 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007781 }
7782 xmlFree(buf);
7783}
7784
7785/**
7786 * xmlParseContent:
7787 * @ctxt: an XML parser context
7788 *
7789 * Parse a content:
7790 *
7791 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7792 */
7793
7794void
7795xmlParseContent(xmlParserCtxtPtr ctxt) {
7796 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007797 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007798 ((RAW != '<') || (NXT(1) != '/'))) {
7799 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007800 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007801 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007802
7803 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007804 * First case : a Processing Instruction.
7805 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007806 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007807 xmlParsePI(ctxt);
7808 }
7809
7810 /*
7811 * Second case : a CDSection
7812 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007813 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007814 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007815 xmlParseCDSect(ctxt);
7816 }
7817
7818 /*
7819 * Third case : a comment
7820 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007821 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007822 (NXT(2) == '-') && (NXT(3) == '-')) {
7823 xmlParseComment(ctxt);
7824 ctxt->instate = XML_PARSER_CONTENT;
7825 }
7826
7827 /*
7828 * Fourth case : a sub-element.
7829 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007830 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007831 xmlParseElement(ctxt);
7832 }
7833
7834 /*
7835 * Fifth case : a reference. If if has not been resolved,
7836 * parsing returns it's Name, create the node
7837 */
7838
Daniel Veillard21a0f912001-02-25 19:54:14 +00007839 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007840 xmlParseReference(ctxt);
7841 }
7842
7843 /*
7844 * Last case, text. Note that References are handled directly.
7845 */
7846 else {
7847 xmlParseCharData(ctxt, 0);
7848 }
7849
7850 GROW;
7851 /*
7852 * Pop-up of finished entities.
7853 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007854 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007855 xmlPopInput(ctxt);
7856 SHRINK;
7857
Daniel Veillardfdc91562002-07-01 21:52:03 +00007858 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007859 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7860 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007861 ctxt->instate = XML_PARSER_EOF;
7862 break;
7863 }
7864 }
7865}
7866
7867/**
7868 * xmlParseElement:
7869 * @ctxt: an XML parser context
7870 *
7871 * parse an XML element, this is highly recursive
7872 *
7873 * [39] element ::= EmptyElemTag | STag content ETag
7874 *
7875 * [ WFC: Element Type Match ]
7876 * The Name in an element's end-tag must match the element type in the
7877 * start-tag.
7878 *
Owen Taylor3473f882001-02-23 17:55:21 +00007879 */
7880
7881void
7882xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007883 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884 const xmlChar *prefix;
7885 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007886 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007887 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007888 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007889 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007890
7891 /* Capture start position */
7892 if (ctxt->record_info) {
7893 node_info.begin_pos = ctxt->input->consumed +
7894 (CUR_PTR - ctxt->input->base);
7895 node_info.begin_line = ctxt->input->line;
7896 }
7897
7898 if (ctxt->spaceNr == 0)
7899 spacePush(ctxt, -1);
7900 else
7901 spacePush(ctxt, *ctxt->space);
7902
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007903 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007904#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007905 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007906#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007907 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007908#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007909 else
7910 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007911#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007912 if (name == NULL) {
7913 spacePop(ctxt);
7914 return;
7915 }
7916 namePush(ctxt, name);
7917 ret = ctxt->node;
7918
Daniel Veillard4432df22003-09-28 18:58:27 +00007919#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007920 /*
7921 * [ VC: Root Element Type ]
7922 * The Name in the document type declaration must match the element
7923 * type of the root element.
7924 */
7925 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7926 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7927 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007928#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007929
7930 /*
7931 * Check for an Empty Element.
7932 */
7933 if ((RAW == '/') && (NXT(1) == '>')) {
7934 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007935 if (ctxt->sax2) {
7936 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7937 (!ctxt->disableSAX))
7938 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007939#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 } else {
7941 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7942 (!ctxt->disableSAX))
7943 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007944#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007945 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007946 namePop(ctxt);
7947 spacePop(ctxt);
7948 if (nsNr != ctxt->nsNr)
7949 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007950 if ( ret != NULL && ctxt->record_info ) {
7951 node_info.end_pos = ctxt->input->consumed +
7952 (CUR_PTR - ctxt->input->base);
7953 node_info.end_line = ctxt->input->line;
7954 node_info.node = ret;
7955 xmlParserAddNodeInfo(ctxt, &node_info);
7956 }
7957 return;
7958 }
7959 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007960 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007961 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007962 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7963 "Couldn't find end of Start Tag %s line %d\n",
7964 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007965
7966 /*
7967 * end of parsing of this node.
7968 */
7969 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007971 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 if (nsNr != ctxt->nsNr)
7973 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007974
7975 /*
7976 * Capture end position and add node
7977 */
7978 if ( ret != NULL && ctxt->record_info ) {
7979 node_info.end_pos = ctxt->input->consumed +
7980 (CUR_PTR - ctxt->input->base);
7981 node_info.end_line = ctxt->input->line;
7982 node_info.node = ret;
7983 xmlParserAddNodeInfo(ctxt, &node_info);
7984 }
7985 return;
7986 }
7987
7988 /*
7989 * Parse the content of the element:
7990 */
7991 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007992 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007993 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007994 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007995 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007996
7997 /*
7998 * end of parsing of this node.
7999 */
8000 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008001 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008002 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008003 if (nsNr != ctxt->nsNr)
8004 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008005 return;
8006 }
8007
8008 /*
8009 * parse the end of tag: '</' should be here.
8010 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008011 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008012 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008013 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008014 }
8015#ifdef LIBXML_SAX1_ENABLED
8016 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008018#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008019
8020 /*
8021 * Capture end position and add node
8022 */
8023 if ( ret != NULL && ctxt->record_info ) {
8024 node_info.end_pos = ctxt->input->consumed +
8025 (CUR_PTR - ctxt->input->base);
8026 node_info.end_line = ctxt->input->line;
8027 node_info.node = ret;
8028 xmlParserAddNodeInfo(ctxt, &node_info);
8029 }
8030}
8031
8032/**
8033 * xmlParseVersionNum:
8034 * @ctxt: an XML parser context
8035 *
8036 * parse the XML version value.
8037 *
8038 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8039 *
8040 * Returns the string giving the XML version number, or NULL
8041 */
8042xmlChar *
8043xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8044 xmlChar *buf = NULL;
8045 int len = 0;
8046 int size = 10;
8047 xmlChar cur;
8048
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008049 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008050 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008051 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008052 return(NULL);
8053 }
8054 cur = CUR;
8055 while (((cur >= 'a') && (cur <= 'z')) ||
8056 ((cur >= 'A') && (cur <= 'Z')) ||
8057 ((cur >= '0') && (cur <= '9')) ||
8058 (cur == '_') || (cur == '.') ||
8059 (cur == ':') || (cur == '-')) {
8060 if (len + 1 >= size) {
8061 size *= 2;
8062 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8063 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008064 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008065 return(NULL);
8066 }
8067 }
8068 buf[len++] = cur;
8069 NEXT;
8070 cur=CUR;
8071 }
8072 buf[len] = 0;
8073 return(buf);
8074}
8075
8076/**
8077 * xmlParseVersionInfo:
8078 * @ctxt: an XML parser context
8079 *
8080 * parse the XML version.
8081 *
8082 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8083 *
8084 * [25] Eq ::= S? '=' S?
8085 *
8086 * Returns the version string, e.g. "1.0"
8087 */
8088
8089xmlChar *
8090xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8091 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008092
Daniel Veillarda07050d2003-10-19 14:46:32 +00008093 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008094 SKIP(7);
8095 SKIP_BLANKS;
8096 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008097 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008098 return(NULL);
8099 }
8100 NEXT;
8101 SKIP_BLANKS;
8102 if (RAW == '"') {
8103 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008104 version = xmlParseVersionNum(ctxt);
8105 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008106 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008107 } else
8108 NEXT;
8109 } else if (RAW == '\''){
8110 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008111 version = xmlParseVersionNum(ctxt);
8112 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008113 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008114 } else
8115 NEXT;
8116 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008117 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008118 }
8119 }
8120 return(version);
8121}
8122
8123/**
8124 * xmlParseEncName:
8125 * @ctxt: an XML parser context
8126 *
8127 * parse the XML encoding name
8128 *
8129 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8130 *
8131 * Returns the encoding name value or NULL
8132 */
8133xmlChar *
8134xmlParseEncName(xmlParserCtxtPtr ctxt) {
8135 xmlChar *buf = NULL;
8136 int len = 0;
8137 int size = 10;
8138 xmlChar cur;
8139
8140 cur = CUR;
8141 if (((cur >= 'a') && (cur <= 'z')) ||
8142 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008143 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008144 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008145 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008146 return(NULL);
8147 }
8148
8149 buf[len++] = cur;
8150 NEXT;
8151 cur = CUR;
8152 while (((cur >= 'a') && (cur <= 'z')) ||
8153 ((cur >= 'A') && (cur <= 'Z')) ||
8154 ((cur >= '0') && (cur <= '9')) ||
8155 (cur == '.') || (cur == '_') ||
8156 (cur == '-')) {
8157 if (len + 1 >= size) {
8158 size *= 2;
8159 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8160 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008161 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008162 return(NULL);
8163 }
8164 }
8165 buf[len++] = cur;
8166 NEXT;
8167 cur = CUR;
8168 if (cur == 0) {
8169 SHRINK;
8170 GROW;
8171 cur = CUR;
8172 }
8173 }
8174 buf[len] = 0;
8175 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008176 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008177 }
8178 return(buf);
8179}
8180
8181/**
8182 * xmlParseEncodingDecl:
8183 * @ctxt: an XML parser context
8184 *
8185 * parse the XML encoding declaration
8186 *
8187 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8188 *
8189 * this setups the conversion filters.
8190 *
8191 * Returns the encoding value or NULL
8192 */
8193
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008194const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008195xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8196 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008197
8198 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008199 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008200 SKIP(8);
8201 SKIP_BLANKS;
8202 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008203 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008204 return(NULL);
8205 }
8206 NEXT;
8207 SKIP_BLANKS;
8208 if (RAW == '"') {
8209 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008210 encoding = xmlParseEncName(ctxt);
8211 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008212 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008213 } else
8214 NEXT;
8215 } else if (RAW == '\''){
8216 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008217 encoding = xmlParseEncName(ctxt);
8218 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008219 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008220 } else
8221 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008222 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008223 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008224 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008225 /*
8226 * UTF-16 encoding stwich has already taken place at this stage,
8227 * more over the little-endian/big-endian selection is already done
8228 */
8229 if ((encoding != NULL) &&
8230 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8231 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008232 if (ctxt->encoding != NULL)
8233 xmlFree((xmlChar *) ctxt->encoding);
8234 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008235 }
8236 /*
8237 * UTF-8 encoding is handled natively
8238 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008239 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008240 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8241 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008242 if (ctxt->encoding != NULL)
8243 xmlFree((xmlChar *) ctxt->encoding);
8244 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008245 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008246 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008247 xmlCharEncodingHandlerPtr handler;
8248
8249 if (ctxt->input->encoding != NULL)
8250 xmlFree((xmlChar *) ctxt->input->encoding);
8251 ctxt->input->encoding = encoding;
8252
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008253 handler = xmlFindCharEncodingHandler((const char *) encoding);
8254 if (handler != NULL) {
8255 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008256 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008257 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008258 "Unsupported encoding %s\n", encoding);
8259 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 }
8261 }
8262 }
8263 return(encoding);
8264}
8265
8266/**
8267 * xmlParseSDDecl:
8268 * @ctxt: an XML parser context
8269 *
8270 * parse the XML standalone declaration
8271 *
8272 * [32] SDDecl ::= S 'standalone' Eq
8273 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8274 *
8275 * [ VC: Standalone Document Declaration ]
8276 * TODO The standalone document declaration must have the value "no"
8277 * if any external markup declarations contain declarations of:
8278 * - attributes with default values, if elements to which these
8279 * attributes apply appear in the document without specifications
8280 * of values for these attributes, or
8281 * - entities (other than amp, lt, gt, apos, quot), if references
8282 * to those entities appear in the document, or
8283 * - attributes with values subject to normalization, where the
8284 * attribute appears in the document with a value which will change
8285 * as a result of normalization, or
8286 * - element types with element content, if white space occurs directly
8287 * within any instance of those types.
8288 *
8289 * Returns 1 if standalone, 0 otherwise
8290 */
8291
8292int
8293xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8294 int standalone = -1;
8295
8296 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008297 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008298 SKIP(10);
8299 SKIP_BLANKS;
8300 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008301 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008302 return(standalone);
8303 }
8304 NEXT;
8305 SKIP_BLANKS;
8306 if (RAW == '\''){
8307 NEXT;
8308 if ((RAW == 'n') && (NXT(1) == 'o')) {
8309 standalone = 0;
8310 SKIP(2);
8311 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8312 (NXT(2) == 's')) {
8313 standalone = 1;
8314 SKIP(3);
8315 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008316 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008317 }
8318 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008319 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008320 } else
8321 NEXT;
8322 } else if (RAW == '"'){
8323 NEXT;
8324 if ((RAW == 'n') && (NXT(1) == 'o')) {
8325 standalone = 0;
8326 SKIP(2);
8327 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8328 (NXT(2) == 's')) {
8329 standalone = 1;
8330 SKIP(3);
8331 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008332 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008333 }
8334 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008335 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008336 } else
8337 NEXT;
8338 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008339 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 }
8341 }
8342 return(standalone);
8343}
8344
8345/**
8346 * xmlParseXMLDecl:
8347 * @ctxt: an XML parser context
8348 *
8349 * parse an XML declaration header
8350 *
8351 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8352 */
8353
8354void
8355xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8356 xmlChar *version;
8357
8358 /*
8359 * We know that '<?xml' is here.
8360 */
8361 SKIP(5);
8362
William M. Brack76e95df2003-10-18 16:20:14 +00008363 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8365 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008366 }
8367 SKIP_BLANKS;
8368
8369 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008370 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008371 */
8372 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008373 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008374 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008375 } else {
8376 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8377 /*
8378 * TODO: Blueberry should be detected here
8379 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008380 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8381 "Unsupported version '%s'\n",
8382 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008383 }
8384 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008385 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008386 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008387 }
Owen Taylor3473f882001-02-23 17:55:21 +00008388
8389 /*
8390 * We may have the encoding declaration
8391 */
William M. Brack76e95df2003-10-18 16:20:14 +00008392 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008393 if ((RAW == '?') && (NXT(1) == '>')) {
8394 SKIP(2);
8395 return;
8396 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008397 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008398 }
8399 xmlParseEncodingDecl(ctxt);
8400 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8401 /*
8402 * The XML REC instructs us to stop parsing right here
8403 */
8404 return;
8405 }
8406
8407 /*
8408 * We may have the standalone status.
8409 */
William M. Brack76e95df2003-10-18 16:20:14 +00008410 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008411 if ((RAW == '?') && (NXT(1) == '>')) {
8412 SKIP(2);
8413 return;
8414 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008416 }
8417 SKIP_BLANKS;
8418 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8419
8420 SKIP_BLANKS;
8421 if ((RAW == '?') && (NXT(1) == '>')) {
8422 SKIP(2);
8423 } else if (RAW == '>') {
8424 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008425 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008426 NEXT;
8427 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008428 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008429 MOVETO_ENDTAG(CUR_PTR);
8430 NEXT;
8431 }
8432}
8433
8434/**
8435 * xmlParseMisc:
8436 * @ctxt: an XML parser context
8437 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008438 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008439 *
8440 * [27] Misc ::= Comment | PI | S
8441 */
8442
8443void
8444xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008445 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008446 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008447 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008448 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008449 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008450 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008451 NEXT;
8452 } else
8453 xmlParseComment(ctxt);
8454 }
8455}
8456
8457/**
8458 * xmlParseDocument:
8459 * @ctxt: an XML parser context
8460 *
8461 * parse an XML document (and build a tree if using the standard SAX
8462 * interface).
8463 *
8464 * [1] document ::= prolog element Misc*
8465 *
8466 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8467 *
8468 * Returns 0, -1 in case of error. the parser context is augmented
8469 * as a result of the parsing.
8470 */
8471
8472int
8473xmlParseDocument(xmlParserCtxtPtr ctxt) {
8474 xmlChar start[4];
8475 xmlCharEncoding enc;
8476
8477 xmlInitParser();
8478
8479 GROW;
8480
8481 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008482 * SAX: detecting the level.
8483 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008484 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008485
8486 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008487 * SAX: beginning of the document processing.
8488 */
8489 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8490 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8491
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008492 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8493 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008494 /*
8495 * Get the 4 first bytes and decode the charset
8496 * if enc != XML_CHAR_ENCODING_NONE
8497 * plug some encoding conversion routines.
8498 */
8499 start[0] = RAW;
8500 start[1] = NXT(1);
8501 start[2] = NXT(2);
8502 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008503 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008504 if (enc != XML_CHAR_ENCODING_NONE) {
8505 xmlSwitchEncoding(ctxt, enc);
8506 }
Owen Taylor3473f882001-02-23 17:55:21 +00008507 }
8508
8509
8510 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008511 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008512 }
8513
8514 /*
8515 * Check for the XMLDecl in the Prolog.
8516 */
8517 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008518 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008519
8520 /*
8521 * Note that we will switch encoding on the fly.
8522 */
8523 xmlParseXMLDecl(ctxt);
8524 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8525 /*
8526 * The XML REC instructs us to stop parsing right here
8527 */
8528 return(-1);
8529 }
8530 ctxt->standalone = ctxt->input->standalone;
8531 SKIP_BLANKS;
8532 } else {
8533 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8534 }
8535 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8536 ctxt->sax->startDocument(ctxt->userData);
8537
8538 /*
8539 * The Misc part of the Prolog
8540 */
8541 GROW;
8542 xmlParseMisc(ctxt);
8543
8544 /*
8545 * Then possibly doc type declaration(s) and more Misc
8546 * (doctypedecl Misc*)?
8547 */
8548 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008549 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008550
8551 ctxt->inSubset = 1;
8552 xmlParseDocTypeDecl(ctxt);
8553 if (RAW == '[') {
8554 ctxt->instate = XML_PARSER_DTD;
8555 xmlParseInternalSubset(ctxt);
8556 }
8557
8558 /*
8559 * Create and update the external subset.
8560 */
8561 ctxt->inSubset = 2;
8562 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8563 (!ctxt->disableSAX))
8564 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8565 ctxt->extSubSystem, ctxt->extSubURI);
8566 ctxt->inSubset = 0;
8567
8568
8569 ctxt->instate = XML_PARSER_PROLOG;
8570 xmlParseMisc(ctxt);
8571 }
8572
8573 /*
8574 * Time to start parsing the tree itself
8575 */
8576 GROW;
8577 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008578 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8579 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008580 } else {
8581 ctxt->instate = XML_PARSER_CONTENT;
8582 xmlParseElement(ctxt);
8583 ctxt->instate = XML_PARSER_EPILOG;
8584
8585
8586 /*
8587 * The Misc part at the end
8588 */
8589 xmlParseMisc(ctxt);
8590
Daniel Veillard561b7f82002-03-20 21:55:57 +00008591 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008592 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008593 }
8594 ctxt->instate = XML_PARSER_EOF;
8595 }
8596
8597 /*
8598 * SAX: end of the document processing.
8599 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008601 ctxt->sax->endDocument(ctxt->userData);
8602
Daniel Veillard5997aca2002-03-18 18:36:20 +00008603 /*
8604 * Remove locally kept entity definitions if the tree was not built
8605 */
8606 if ((ctxt->myDoc != NULL) &&
8607 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8608 xmlFreeDoc(ctxt->myDoc);
8609 ctxt->myDoc = NULL;
8610 }
8611
Daniel Veillardc7612992002-02-17 22:47:37 +00008612 if (! ctxt->wellFormed) {
8613 ctxt->valid = 0;
8614 return(-1);
8615 }
Owen Taylor3473f882001-02-23 17:55:21 +00008616 return(0);
8617}
8618
8619/**
8620 * xmlParseExtParsedEnt:
8621 * @ctxt: an XML parser context
8622 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008623 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008624 * An external general parsed entity is well-formed if it matches the
8625 * production labeled extParsedEnt.
8626 *
8627 * [78] extParsedEnt ::= TextDecl? content
8628 *
8629 * Returns 0, -1 in case of error. the parser context is augmented
8630 * as a result of the parsing.
8631 */
8632
8633int
8634xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8635 xmlChar start[4];
8636 xmlCharEncoding enc;
8637
8638 xmlDefaultSAXHandlerInit();
8639
Daniel Veillard309f81d2003-09-23 09:02:53 +00008640 xmlDetectSAX2(ctxt);
8641
Owen Taylor3473f882001-02-23 17:55:21 +00008642 GROW;
8643
8644 /*
8645 * SAX: beginning of the document processing.
8646 */
8647 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8648 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8649
8650 /*
8651 * Get the 4 first bytes and decode the charset
8652 * if enc != XML_CHAR_ENCODING_NONE
8653 * plug some encoding conversion routines.
8654 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008655 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8656 start[0] = RAW;
8657 start[1] = NXT(1);
8658 start[2] = NXT(2);
8659 start[3] = NXT(3);
8660 enc = xmlDetectCharEncoding(start, 4);
8661 if (enc != XML_CHAR_ENCODING_NONE) {
8662 xmlSwitchEncoding(ctxt, enc);
8663 }
Owen Taylor3473f882001-02-23 17:55:21 +00008664 }
8665
8666
8667 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008668 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008669 }
8670
8671 /*
8672 * Check for the XMLDecl in the Prolog.
8673 */
8674 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008675 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008676
8677 /*
8678 * Note that we will switch encoding on the fly.
8679 */
8680 xmlParseXMLDecl(ctxt);
8681 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8682 /*
8683 * The XML REC instructs us to stop parsing right here
8684 */
8685 return(-1);
8686 }
8687 SKIP_BLANKS;
8688 } else {
8689 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8690 }
8691 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8692 ctxt->sax->startDocument(ctxt->userData);
8693
8694 /*
8695 * Doing validity checking on chunk doesn't make sense
8696 */
8697 ctxt->instate = XML_PARSER_CONTENT;
8698 ctxt->validate = 0;
8699 ctxt->loadsubset = 0;
8700 ctxt->depth = 0;
8701
8702 xmlParseContent(ctxt);
8703
8704 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008707 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008708 }
8709
8710 /*
8711 * SAX: end of the document processing.
8712 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008713 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008714 ctxt->sax->endDocument(ctxt->userData);
8715
8716 if (! ctxt->wellFormed) return(-1);
8717 return(0);
8718}
8719
Daniel Veillard73b013f2003-09-30 12:36:01 +00008720#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008721/************************************************************************
8722 * *
8723 * Progressive parsing interfaces *
8724 * *
8725 ************************************************************************/
8726
8727/**
8728 * xmlParseLookupSequence:
8729 * @ctxt: an XML parser context
8730 * @first: the first char to lookup
8731 * @next: the next char to lookup or zero
8732 * @third: the next char to lookup or zero
8733 *
8734 * Try to find if a sequence (first, next, third) or just (first next) or
8735 * (first) is available in the input stream.
8736 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8737 * to avoid rescanning sequences of bytes, it DOES change the state of the
8738 * parser, do not use liberally.
8739 *
8740 * Returns the index to the current parsing point if the full sequence
8741 * is available, -1 otherwise.
8742 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008743static int
Owen Taylor3473f882001-02-23 17:55:21 +00008744xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8745 xmlChar next, xmlChar third) {
8746 int base, len;
8747 xmlParserInputPtr in;
8748 const xmlChar *buf;
8749
8750 in = ctxt->input;
8751 if (in == NULL) return(-1);
8752 base = in->cur - in->base;
8753 if (base < 0) return(-1);
8754 if (ctxt->checkIndex > base)
8755 base = ctxt->checkIndex;
8756 if (in->buf == NULL) {
8757 buf = in->base;
8758 len = in->length;
8759 } else {
8760 buf = in->buf->buffer->content;
8761 len = in->buf->buffer->use;
8762 }
8763 /* take into account the sequence length */
8764 if (third) len -= 2;
8765 else if (next) len --;
8766 for (;base < len;base++) {
8767 if (buf[base] == first) {
8768 if (third != 0) {
8769 if ((buf[base + 1] != next) ||
8770 (buf[base + 2] != third)) continue;
8771 } else if (next != 0) {
8772 if (buf[base + 1] != next) continue;
8773 }
8774 ctxt->checkIndex = 0;
8775#ifdef DEBUG_PUSH
8776 if (next == 0)
8777 xmlGenericError(xmlGenericErrorContext,
8778 "PP: lookup '%c' found at %d\n",
8779 first, base);
8780 else if (third == 0)
8781 xmlGenericError(xmlGenericErrorContext,
8782 "PP: lookup '%c%c' found at %d\n",
8783 first, next, base);
8784 else
8785 xmlGenericError(xmlGenericErrorContext,
8786 "PP: lookup '%c%c%c' found at %d\n",
8787 first, next, third, base);
8788#endif
8789 return(base - (in->cur - in->base));
8790 }
8791 }
8792 ctxt->checkIndex = base;
8793#ifdef DEBUG_PUSH
8794 if (next == 0)
8795 xmlGenericError(xmlGenericErrorContext,
8796 "PP: lookup '%c' failed\n", first);
8797 else if (third == 0)
8798 xmlGenericError(xmlGenericErrorContext,
8799 "PP: lookup '%c%c' failed\n", first, next);
8800 else
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: lookup '%c%c%c' failed\n", first, next, third);
8803#endif
8804 return(-1);
8805}
8806
8807/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008808 * xmlParseGetLasts:
8809 * @ctxt: an XML parser context
8810 * @lastlt: pointer to store the last '<' from the input
8811 * @lastgt: pointer to store the last '>' from the input
8812 *
8813 * Lookup the last < and > in the current chunk
8814 */
8815static void
8816xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8817 const xmlChar **lastgt) {
8818 const xmlChar *tmp;
8819
8820 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8821 xmlGenericError(xmlGenericErrorContext,
8822 "Internal error: xmlParseGetLasts\n");
8823 return;
8824 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008825 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008826 tmp = ctxt->input->end;
8827 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008828 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008829 if (tmp < ctxt->input->base) {
8830 *lastlt = NULL;
8831 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008832 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008833 *lastlt = tmp;
8834 tmp++;
8835 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8836 if (*tmp == '\'') {
8837 tmp++;
8838 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8839 if (tmp < ctxt->input->end) tmp++;
8840 } else if (*tmp == '"') {
8841 tmp++;
8842 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8843 if (tmp < ctxt->input->end) tmp++;
8844 } else
8845 tmp++;
8846 }
8847 if (tmp < ctxt->input->end)
8848 *lastgt = tmp;
8849 else {
8850 tmp = *lastlt;
8851 tmp--;
8852 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8853 if (tmp >= ctxt->input->base)
8854 *lastgt = tmp;
8855 else
8856 *lastgt = NULL;
8857 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008858 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008859 } else {
8860 *lastlt = NULL;
8861 *lastgt = NULL;
8862 }
8863}
8864/**
Owen Taylor3473f882001-02-23 17:55:21 +00008865 * xmlParseTryOrFinish:
8866 * @ctxt: an XML parser context
8867 * @terminate: last chunk indicator
8868 *
8869 * Try to progress on parsing
8870 *
8871 * Returns zero if no parsing was possible
8872 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008873static int
Owen Taylor3473f882001-02-23 17:55:21 +00008874xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8875 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008876 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008877 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008878 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008879
8880#ifdef DEBUG_PUSH
8881 switch (ctxt->instate) {
8882 case XML_PARSER_EOF:
8883 xmlGenericError(xmlGenericErrorContext,
8884 "PP: try EOF\n"); break;
8885 case XML_PARSER_START:
8886 xmlGenericError(xmlGenericErrorContext,
8887 "PP: try START\n"); break;
8888 case XML_PARSER_MISC:
8889 xmlGenericError(xmlGenericErrorContext,
8890 "PP: try MISC\n");break;
8891 case XML_PARSER_COMMENT:
8892 xmlGenericError(xmlGenericErrorContext,
8893 "PP: try COMMENT\n");break;
8894 case XML_PARSER_PROLOG:
8895 xmlGenericError(xmlGenericErrorContext,
8896 "PP: try PROLOG\n");break;
8897 case XML_PARSER_START_TAG:
8898 xmlGenericError(xmlGenericErrorContext,
8899 "PP: try START_TAG\n");break;
8900 case XML_PARSER_CONTENT:
8901 xmlGenericError(xmlGenericErrorContext,
8902 "PP: try CONTENT\n");break;
8903 case XML_PARSER_CDATA_SECTION:
8904 xmlGenericError(xmlGenericErrorContext,
8905 "PP: try CDATA_SECTION\n");break;
8906 case XML_PARSER_END_TAG:
8907 xmlGenericError(xmlGenericErrorContext,
8908 "PP: try END_TAG\n");break;
8909 case XML_PARSER_ENTITY_DECL:
8910 xmlGenericError(xmlGenericErrorContext,
8911 "PP: try ENTITY_DECL\n");break;
8912 case XML_PARSER_ENTITY_VALUE:
8913 xmlGenericError(xmlGenericErrorContext,
8914 "PP: try ENTITY_VALUE\n");break;
8915 case XML_PARSER_ATTRIBUTE_VALUE:
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: try ATTRIBUTE_VALUE\n");break;
8918 case XML_PARSER_DTD:
8919 xmlGenericError(xmlGenericErrorContext,
8920 "PP: try DTD\n");break;
8921 case XML_PARSER_EPILOG:
8922 xmlGenericError(xmlGenericErrorContext,
8923 "PP: try EPILOG\n");break;
8924 case XML_PARSER_PI:
8925 xmlGenericError(xmlGenericErrorContext,
8926 "PP: try PI\n");break;
8927 case XML_PARSER_IGNORE:
8928 xmlGenericError(xmlGenericErrorContext,
8929 "PP: try IGNORE\n");break;
8930 }
8931#endif
8932
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008933 if ((ctxt->input != NULL) &&
8934 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008935 xmlSHRINK(ctxt);
8936 ctxt->checkIndex = 0;
8937 }
8938 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008939
Daniel Veillarda880b122003-04-21 21:36:41 +00008940 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008941 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8942 return(0);
8943
8944
Owen Taylor3473f882001-02-23 17:55:21 +00008945 /*
8946 * Pop-up of finished entities.
8947 */
8948 while ((RAW == 0) && (ctxt->inputNr > 1))
8949 xmlPopInput(ctxt);
8950
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008951 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008952 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008953 avail = ctxt->input->length -
8954 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008955 else {
8956 /*
8957 * If we are operating on converted input, try to flush
8958 * remainng chars to avoid them stalling in the non-converted
8959 * buffer.
8960 */
8961 if ((ctxt->input->buf->raw != NULL) &&
8962 (ctxt->input->buf->raw->use > 0)) {
8963 int base = ctxt->input->base -
8964 ctxt->input->buf->buffer->content;
8965 int current = ctxt->input->cur - ctxt->input->base;
8966
8967 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8968 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8969 ctxt->input->cur = ctxt->input->base + current;
8970 ctxt->input->end =
8971 &ctxt->input->buf->buffer->content[
8972 ctxt->input->buf->buffer->use];
8973 }
8974 avail = ctxt->input->buf->buffer->use -
8975 (ctxt->input->cur - ctxt->input->base);
8976 }
Owen Taylor3473f882001-02-23 17:55:21 +00008977 if (avail < 1)
8978 goto done;
8979 switch (ctxt->instate) {
8980 case XML_PARSER_EOF:
8981 /*
8982 * Document parsing is done !
8983 */
8984 goto done;
8985 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008986 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8987 xmlChar start[4];
8988 xmlCharEncoding enc;
8989
8990 /*
8991 * Very first chars read from the document flow.
8992 */
8993 if (avail < 4)
8994 goto done;
8995
8996 /*
8997 * Get the 4 first bytes and decode the charset
8998 * if enc != XML_CHAR_ENCODING_NONE
8999 * plug some encoding conversion routines.
9000 */
9001 start[0] = RAW;
9002 start[1] = NXT(1);
9003 start[2] = NXT(2);
9004 start[3] = NXT(3);
9005 enc = xmlDetectCharEncoding(start, 4);
9006 if (enc != XML_CHAR_ENCODING_NONE) {
9007 xmlSwitchEncoding(ctxt, enc);
9008 }
9009 break;
9010 }
Owen Taylor3473f882001-02-23 17:55:21 +00009011
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009012 if (avail < 2)
9013 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009014 cur = ctxt->input->cur[0];
9015 next = ctxt->input->cur[1];
9016 if (cur == 0) {
9017 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9018 ctxt->sax->setDocumentLocator(ctxt->userData,
9019 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009020 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009021 ctxt->instate = XML_PARSER_EOF;
9022#ifdef DEBUG_PUSH
9023 xmlGenericError(xmlGenericErrorContext,
9024 "PP: entering EOF\n");
9025#endif
9026 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9027 ctxt->sax->endDocument(ctxt->userData);
9028 goto done;
9029 }
9030 if ((cur == '<') && (next == '?')) {
9031 /* PI or XML decl */
9032 if (avail < 5) return(ret);
9033 if ((!terminate) &&
9034 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9035 return(ret);
9036 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9037 ctxt->sax->setDocumentLocator(ctxt->userData,
9038 &xmlDefaultSAXLocator);
9039 if ((ctxt->input->cur[2] == 'x') &&
9040 (ctxt->input->cur[3] == 'm') &&
9041 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009042 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009043 ret += 5;
9044#ifdef DEBUG_PUSH
9045 xmlGenericError(xmlGenericErrorContext,
9046 "PP: Parsing XML Decl\n");
9047#endif
9048 xmlParseXMLDecl(ctxt);
9049 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9050 /*
9051 * The XML REC instructs us to stop parsing right
9052 * here
9053 */
9054 ctxt->instate = XML_PARSER_EOF;
9055 return(0);
9056 }
9057 ctxt->standalone = ctxt->input->standalone;
9058 if ((ctxt->encoding == NULL) &&
9059 (ctxt->input->encoding != NULL))
9060 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9061 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9062 (!ctxt->disableSAX))
9063 ctxt->sax->startDocument(ctxt->userData);
9064 ctxt->instate = XML_PARSER_MISC;
9065#ifdef DEBUG_PUSH
9066 xmlGenericError(xmlGenericErrorContext,
9067 "PP: entering MISC\n");
9068#endif
9069 } else {
9070 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9071 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9072 (!ctxt->disableSAX))
9073 ctxt->sax->startDocument(ctxt->userData);
9074 ctxt->instate = XML_PARSER_MISC;
9075#ifdef DEBUG_PUSH
9076 xmlGenericError(xmlGenericErrorContext,
9077 "PP: entering MISC\n");
9078#endif
9079 }
9080 } else {
9081 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9082 ctxt->sax->setDocumentLocator(ctxt->userData,
9083 &xmlDefaultSAXLocator);
9084 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009085 if (ctxt->version == NULL) {
9086 xmlErrMemory(ctxt, NULL);
9087 break;
9088 }
Owen Taylor3473f882001-02-23 17:55:21 +00009089 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9090 (!ctxt->disableSAX))
9091 ctxt->sax->startDocument(ctxt->userData);
9092 ctxt->instate = XML_PARSER_MISC;
9093#ifdef DEBUG_PUSH
9094 xmlGenericError(xmlGenericErrorContext,
9095 "PP: entering MISC\n");
9096#endif
9097 }
9098 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009099 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009100 const xmlChar *name;
9101 const xmlChar *prefix;
9102 const xmlChar *URI;
9103 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009104
9105 if ((avail < 2) && (ctxt->inputNr == 1))
9106 goto done;
9107 cur = ctxt->input->cur[0];
9108 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009109 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009110 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009111 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9112 ctxt->sax->endDocument(ctxt->userData);
9113 goto done;
9114 }
9115 if (!terminate) {
9116 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009117 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009118 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009119 goto done;
9120 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9121 goto done;
9122 }
9123 }
9124 if (ctxt->spaceNr == 0)
9125 spacePush(ctxt, -1);
9126 else
9127 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009128#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009129 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009130#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009131 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009132#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009133 else
9134 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009135#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009136 if (name == NULL) {
9137 spacePop(ctxt);
9138 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009139 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9140 ctxt->sax->endDocument(ctxt->userData);
9141 goto done;
9142 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009143#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009144 /*
9145 * [ VC: Root Element Type ]
9146 * The Name in the document type declaration must match
9147 * the element type of the root element.
9148 */
9149 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9150 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9151 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009152#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009153
9154 /*
9155 * Check for an Empty Element.
9156 */
9157 if ((RAW == '/') && (NXT(1) == '>')) {
9158 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009159
9160 if (ctxt->sax2) {
9161 if ((ctxt->sax != NULL) &&
9162 (ctxt->sax->endElementNs != NULL) &&
9163 (!ctxt->disableSAX))
9164 ctxt->sax->endElementNs(ctxt->userData, name,
9165 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009166#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009167 } else {
9168 if ((ctxt->sax != NULL) &&
9169 (ctxt->sax->endElement != NULL) &&
9170 (!ctxt->disableSAX))
9171 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009172#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009173 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009174 spacePop(ctxt);
9175 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009176 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009177 } else {
9178 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009179 }
9180 break;
9181 }
9182 if (RAW == '>') {
9183 NEXT;
9184 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009185 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009186 "Couldn't find end of Start Tag %s\n",
9187 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009188 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009189 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009190 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009191 if (ctxt->sax2)
9192 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009193#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009194 else
9195 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009196#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009197
Daniel Veillarda880b122003-04-21 21:36:41 +00009198 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009199 break;
9200 }
9201 case XML_PARSER_CONTENT: {
9202 const xmlChar *test;
9203 unsigned int cons;
9204 if ((avail < 2) && (ctxt->inputNr == 1))
9205 goto done;
9206 cur = ctxt->input->cur[0];
9207 next = ctxt->input->cur[1];
9208
9209 test = CUR_PTR;
9210 cons = ctxt->input->consumed;
9211 if ((cur == '<') && (next == '/')) {
9212 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009213 break;
9214 } else if ((cur == '<') && (next == '?')) {
9215 if ((!terminate) &&
9216 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9217 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009218 xmlParsePI(ctxt);
9219 } else if ((cur == '<') && (next != '!')) {
9220 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009221 break;
9222 } else if ((cur == '<') && (next == '!') &&
9223 (ctxt->input->cur[2] == '-') &&
9224 (ctxt->input->cur[3] == '-')) {
9225 if ((!terminate) &&
9226 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9227 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009228 xmlParseComment(ctxt);
9229 ctxt->instate = XML_PARSER_CONTENT;
9230 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9231 (ctxt->input->cur[2] == '[') &&
9232 (ctxt->input->cur[3] == 'C') &&
9233 (ctxt->input->cur[4] == 'D') &&
9234 (ctxt->input->cur[5] == 'A') &&
9235 (ctxt->input->cur[6] == 'T') &&
9236 (ctxt->input->cur[7] == 'A') &&
9237 (ctxt->input->cur[8] == '[')) {
9238 SKIP(9);
9239 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009240 break;
9241 } else if ((cur == '<') && (next == '!') &&
9242 (avail < 9)) {
9243 goto done;
9244 } else if (cur == '&') {
9245 if ((!terminate) &&
9246 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9247 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009248 xmlParseReference(ctxt);
9249 } else {
9250 /* TODO Avoid the extra copy, handle directly !!! */
9251 /*
9252 * Goal of the following test is:
9253 * - minimize calls to the SAX 'character' callback
9254 * when they are mergeable
9255 * - handle an problem for isBlank when we only parse
9256 * a sequence of blank chars and the next one is
9257 * not available to check against '<' presence.
9258 * - tries to homogenize the differences in SAX
9259 * callbacks between the push and pull versions
9260 * of the parser.
9261 */
9262 if ((ctxt->inputNr == 1) &&
9263 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9264 if (!terminate) {
9265 if (ctxt->progressive) {
9266 if ((lastlt == NULL) ||
9267 (ctxt->input->cur > lastlt))
9268 goto done;
9269 } else if (xmlParseLookupSequence(ctxt,
9270 '<', 0, 0) < 0) {
9271 goto done;
9272 }
9273 }
9274 }
9275 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009276 xmlParseCharData(ctxt, 0);
9277 }
9278 /*
9279 * Pop-up of finished entities.
9280 */
9281 while ((RAW == 0) && (ctxt->inputNr > 1))
9282 xmlPopInput(ctxt);
9283 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009284 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9285 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009286 ctxt->instate = XML_PARSER_EOF;
9287 break;
9288 }
9289 break;
9290 }
9291 case XML_PARSER_END_TAG:
9292 if (avail < 2)
9293 goto done;
9294 if (!terminate) {
9295 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009296 /* > can be found unescaped in attribute values */
9297 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009298 goto done;
9299 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9300 goto done;
9301 }
9302 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009303 if (ctxt->sax2) {
9304 xmlParseEndTag2(ctxt,
9305 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9306 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009307 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009308 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009309 }
9310#ifdef LIBXML_SAX1_ENABLED
9311 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009312 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009313#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009314 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009315 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009316 } else {
9317 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009318 }
9319 break;
9320 case XML_PARSER_CDATA_SECTION: {
9321 /*
9322 * The Push mode need to have the SAX callback for
9323 * cdataBlock merge back contiguous callbacks.
9324 */
9325 int base;
9326
9327 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9328 if (base < 0) {
9329 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9330 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9331 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009332 ctxt->sax->cdataBlock(ctxt->userData,
9333 ctxt->input->cur,
9334 XML_PARSER_BIG_BUFFER_SIZE);
9335 else if (ctxt->sax->characters != NULL)
9336 ctxt->sax->characters(ctxt->userData,
9337 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009338 XML_PARSER_BIG_BUFFER_SIZE);
9339 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009340 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009341 ctxt->checkIndex = 0;
9342 }
9343 goto done;
9344 } else {
9345 if ((ctxt->sax != NULL) && (base > 0) &&
9346 (!ctxt->disableSAX)) {
9347 if (ctxt->sax->cdataBlock != NULL)
9348 ctxt->sax->cdataBlock(ctxt->userData,
9349 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009350 else if (ctxt->sax->characters != NULL)
9351 ctxt->sax->characters(ctxt->userData,
9352 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009353 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009354 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009355 ctxt->checkIndex = 0;
9356 ctxt->instate = XML_PARSER_CONTENT;
9357#ifdef DEBUG_PUSH
9358 xmlGenericError(xmlGenericErrorContext,
9359 "PP: entering CONTENT\n");
9360#endif
9361 }
9362 break;
9363 }
Owen Taylor3473f882001-02-23 17:55:21 +00009364 case XML_PARSER_MISC:
9365 SKIP_BLANKS;
9366 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009367 avail = ctxt->input->length -
9368 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009369 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009370 avail = ctxt->input->buf->buffer->use -
9371 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009372 if (avail < 2)
9373 goto done;
9374 cur = ctxt->input->cur[0];
9375 next = ctxt->input->cur[1];
9376 if ((cur == '<') && (next == '?')) {
9377 if ((!terminate) &&
9378 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9379 goto done;
9380#ifdef DEBUG_PUSH
9381 xmlGenericError(xmlGenericErrorContext,
9382 "PP: Parsing PI\n");
9383#endif
9384 xmlParsePI(ctxt);
9385 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009386 (ctxt->input->cur[2] == '-') &&
9387 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009388 if ((!terminate) &&
9389 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9390 goto done;
9391#ifdef DEBUG_PUSH
9392 xmlGenericError(xmlGenericErrorContext,
9393 "PP: Parsing Comment\n");
9394#endif
9395 xmlParseComment(ctxt);
9396 ctxt->instate = XML_PARSER_MISC;
9397 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009398 (ctxt->input->cur[2] == 'D') &&
9399 (ctxt->input->cur[3] == 'O') &&
9400 (ctxt->input->cur[4] == 'C') &&
9401 (ctxt->input->cur[5] == 'T') &&
9402 (ctxt->input->cur[6] == 'Y') &&
9403 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009404 (ctxt->input->cur[8] == 'E')) {
9405 if ((!terminate) &&
9406 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9407 goto done;
9408#ifdef DEBUG_PUSH
9409 xmlGenericError(xmlGenericErrorContext,
9410 "PP: Parsing internal subset\n");
9411#endif
9412 ctxt->inSubset = 1;
9413 xmlParseDocTypeDecl(ctxt);
9414 if (RAW == '[') {
9415 ctxt->instate = XML_PARSER_DTD;
9416#ifdef DEBUG_PUSH
9417 xmlGenericError(xmlGenericErrorContext,
9418 "PP: entering DTD\n");
9419#endif
9420 } else {
9421 /*
9422 * Create and update the external subset.
9423 */
9424 ctxt->inSubset = 2;
9425 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9426 (ctxt->sax->externalSubset != NULL))
9427 ctxt->sax->externalSubset(ctxt->userData,
9428 ctxt->intSubName, ctxt->extSubSystem,
9429 ctxt->extSubURI);
9430 ctxt->inSubset = 0;
9431 ctxt->instate = XML_PARSER_PROLOG;
9432#ifdef DEBUG_PUSH
9433 xmlGenericError(xmlGenericErrorContext,
9434 "PP: entering PROLOG\n");
9435#endif
9436 }
9437 } else if ((cur == '<') && (next == '!') &&
9438 (avail < 9)) {
9439 goto done;
9440 } else {
9441 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009442 ctxt->progressive = 1;
9443 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009444#ifdef DEBUG_PUSH
9445 xmlGenericError(xmlGenericErrorContext,
9446 "PP: entering START_TAG\n");
9447#endif
9448 }
9449 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009450 case XML_PARSER_PROLOG:
9451 SKIP_BLANKS;
9452 if (ctxt->input->buf == NULL)
9453 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9454 else
9455 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9456 if (avail < 2)
9457 goto done;
9458 cur = ctxt->input->cur[0];
9459 next = ctxt->input->cur[1];
9460 if ((cur == '<') && (next == '?')) {
9461 if ((!terminate) &&
9462 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9463 goto done;
9464#ifdef DEBUG_PUSH
9465 xmlGenericError(xmlGenericErrorContext,
9466 "PP: Parsing PI\n");
9467#endif
9468 xmlParsePI(ctxt);
9469 } else if ((cur == '<') && (next == '!') &&
9470 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9471 if ((!terminate) &&
9472 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9473 goto done;
9474#ifdef DEBUG_PUSH
9475 xmlGenericError(xmlGenericErrorContext,
9476 "PP: Parsing Comment\n");
9477#endif
9478 xmlParseComment(ctxt);
9479 ctxt->instate = XML_PARSER_PROLOG;
9480 } else if ((cur == '<') && (next == '!') &&
9481 (avail < 4)) {
9482 goto done;
9483 } else {
9484 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009485 if (ctxt->progressive == 0)
9486 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009487 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009488#ifdef DEBUG_PUSH
9489 xmlGenericError(xmlGenericErrorContext,
9490 "PP: entering START_TAG\n");
9491#endif
9492 }
9493 break;
9494 case XML_PARSER_EPILOG:
9495 SKIP_BLANKS;
9496 if (ctxt->input->buf == NULL)
9497 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9498 else
9499 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9500 if (avail < 2)
9501 goto done;
9502 cur = ctxt->input->cur[0];
9503 next = ctxt->input->cur[1];
9504 if ((cur == '<') && (next == '?')) {
9505 if ((!terminate) &&
9506 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9507 goto done;
9508#ifdef DEBUG_PUSH
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: Parsing PI\n");
9511#endif
9512 xmlParsePI(ctxt);
9513 ctxt->instate = XML_PARSER_EPILOG;
9514 } else if ((cur == '<') && (next == '!') &&
9515 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9516 if ((!terminate) &&
9517 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9518 goto done;
9519#ifdef DEBUG_PUSH
9520 xmlGenericError(xmlGenericErrorContext,
9521 "PP: Parsing Comment\n");
9522#endif
9523 xmlParseComment(ctxt);
9524 ctxt->instate = XML_PARSER_EPILOG;
9525 } else if ((cur == '<') && (next == '!') &&
9526 (avail < 4)) {
9527 goto done;
9528 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009529 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009530 ctxt->instate = XML_PARSER_EOF;
9531#ifdef DEBUG_PUSH
9532 xmlGenericError(xmlGenericErrorContext,
9533 "PP: entering EOF\n");
9534#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009535 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009536 ctxt->sax->endDocument(ctxt->userData);
9537 goto done;
9538 }
9539 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009540 case XML_PARSER_DTD: {
9541 /*
9542 * Sorry but progressive parsing of the internal subset
9543 * is not expected to be supported. We first check that
9544 * the full content of the internal subset is available and
9545 * the parsing is launched only at that point.
9546 * Internal subset ends up with "']' S? '>'" in an unescaped
9547 * section and not in a ']]>' sequence which are conditional
9548 * sections (whoever argued to keep that crap in XML deserve
9549 * a place in hell !).
9550 */
9551 int base, i;
9552 xmlChar *buf;
9553 xmlChar quote = 0;
9554
9555 base = ctxt->input->cur - ctxt->input->base;
9556 if (base < 0) return(0);
9557 if (ctxt->checkIndex > base)
9558 base = ctxt->checkIndex;
9559 buf = ctxt->input->buf->buffer->content;
9560 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9561 base++) {
9562 if (quote != 0) {
9563 if (buf[base] == quote)
9564 quote = 0;
9565 continue;
9566 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009567 if ((quote == 0) && (buf[base] == '<')) {
9568 int found = 0;
9569 /* special handling of comments */
9570 if (((unsigned int) base + 4 <
9571 ctxt->input->buf->buffer->use) &&
9572 (buf[base + 1] == '!') &&
9573 (buf[base + 2] == '-') &&
9574 (buf[base + 3] == '-')) {
9575 for (;(unsigned int) base + 3 <
9576 ctxt->input->buf->buffer->use; base++) {
9577 if ((buf[base] == '-') &&
9578 (buf[base + 1] == '-') &&
9579 (buf[base + 2] == '>')) {
9580 found = 1;
9581 base += 2;
9582 break;
9583 }
9584 }
9585 if (!found)
9586 break;
9587 continue;
9588 }
9589 }
Owen Taylor3473f882001-02-23 17:55:21 +00009590 if (buf[base] == '"') {
9591 quote = '"';
9592 continue;
9593 }
9594 if (buf[base] == '\'') {
9595 quote = '\'';
9596 continue;
9597 }
9598 if (buf[base] == ']') {
9599 if ((unsigned int) base +1 >=
9600 ctxt->input->buf->buffer->use)
9601 break;
9602 if (buf[base + 1] == ']') {
9603 /* conditional crap, skip both ']' ! */
9604 base++;
9605 continue;
9606 }
9607 for (i = 0;
9608 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9609 i++) {
9610 if (buf[base + i] == '>')
9611 goto found_end_int_subset;
9612 }
9613 break;
9614 }
9615 }
9616 /*
9617 * We didn't found the end of the Internal subset
9618 */
9619 if (quote == 0)
9620 ctxt->checkIndex = base;
9621#ifdef DEBUG_PUSH
9622 if (next == 0)
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: lookup of int subset end filed\n");
9625#endif
9626 goto done;
9627
9628found_end_int_subset:
9629 xmlParseInternalSubset(ctxt);
9630 ctxt->inSubset = 2;
9631 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9632 (ctxt->sax->externalSubset != NULL))
9633 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9634 ctxt->extSubSystem, ctxt->extSubURI);
9635 ctxt->inSubset = 0;
9636 ctxt->instate = XML_PARSER_PROLOG;
9637 ctxt->checkIndex = 0;
9638#ifdef DEBUG_PUSH
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: entering PROLOG\n");
9641#endif
9642 break;
9643 }
9644 case XML_PARSER_COMMENT:
9645 xmlGenericError(xmlGenericErrorContext,
9646 "PP: internal error, state == COMMENT\n");
9647 ctxt->instate = XML_PARSER_CONTENT;
9648#ifdef DEBUG_PUSH
9649 xmlGenericError(xmlGenericErrorContext,
9650 "PP: entering CONTENT\n");
9651#endif
9652 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009653 case XML_PARSER_IGNORE:
9654 xmlGenericError(xmlGenericErrorContext,
9655 "PP: internal error, state == IGNORE");
9656 ctxt->instate = XML_PARSER_DTD;
9657#ifdef DEBUG_PUSH
9658 xmlGenericError(xmlGenericErrorContext,
9659 "PP: entering DTD\n");
9660#endif
9661 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009662 case XML_PARSER_PI:
9663 xmlGenericError(xmlGenericErrorContext,
9664 "PP: internal error, state == PI\n");
9665 ctxt->instate = XML_PARSER_CONTENT;
9666#ifdef DEBUG_PUSH
9667 xmlGenericError(xmlGenericErrorContext,
9668 "PP: entering CONTENT\n");
9669#endif
9670 break;
9671 case XML_PARSER_ENTITY_DECL:
9672 xmlGenericError(xmlGenericErrorContext,
9673 "PP: internal error, state == ENTITY_DECL\n");
9674 ctxt->instate = XML_PARSER_DTD;
9675#ifdef DEBUG_PUSH
9676 xmlGenericError(xmlGenericErrorContext,
9677 "PP: entering DTD\n");
9678#endif
9679 break;
9680 case XML_PARSER_ENTITY_VALUE:
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: internal error, state == ENTITY_VALUE\n");
9683 ctxt->instate = XML_PARSER_CONTENT;
9684#ifdef DEBUG_PUSH
9685 xmlGenericError(xmlGenericErrorContext,
9686 "PP: entering DTD\n");
9687#endif
9688 break;
9689 case XML_PARSER_ATTRIBUTE_VALUE:
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9692 ctxt->instate = XML_PARSER_START_TAG;
9693#ifdef DEBUG_PUSH
9694 xmlGenericError(xmlGenericErrorContext,
9695 "PP: entering START_TAG\n");
9696#endif
9697 break;
9698 case XML_PARSER_SYSTEM_LITERAL:
9699 xmlGenericError(xmlGenericErrorContext,
9700 "PP: internal error, state == SYSTEM_LITERAL\n");
9701 ctxt->instate = XML_PARSER_START_TAG;
9702#ifdef DEBUG_PUSH
9703 xmlGenericError(xmlGenericErrorContext,
9704 "PP: entering START_TAG\n");
9705#endif
9706 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009707 case XML_PARSER_PUBLIC_LITERAL:
9708 xmlGenericError(xmlGenericErrorContext,
9709 "PP: internal error, state == PUBLIC_LITERAL\n");
9710 ctxt->instate = XML_PARSER_START_TAG;
9711#ifdef DEBUG_PUSH
9712 xmlGenericError(xmlGenericErrorContext,
9713 "PP: entering START_TAG\n");
9714#endif
9715 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009716 }
9717 }
9718done:
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9721#endif
9722 return(ret);
9723}
9724
9725/**
Owen Taylor3473f882001-02-23 17:55:21 +00009726 * xmlParseChunk:
9727 * @ctxt: an XML parser context
9728 * @chunk: an char array
9729 * @size: the size in byte of the chunk
9730 * @terminate: last chunk indicator
9731 *
9732 * Parse a Chunk of memory
9733 *
9734 * Returns zero if no error, the xmlParserErrors otherwise.
9735 */
9736int
9737xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9738 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009739 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9740 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009741 if (ctxt->instate == XML_PARSER_START)
9742 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009743 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9744 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9745 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9746 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009747 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009748
William M. Bracka3215c72004-07-31 16:24:01 +00009749 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9750 if (res < 0) {
9751 ctxt->errNo = XML_PARSER_EOF;
9752 ctxt->disableSAX = 1;
9753 return (XML_PARSER_EOF);
9754 }
Owen Taylor3473f882001-02-23 17:55:21 +00009755 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9756 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009757 ctxt->input->end =
9758 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009759#ifdef DEBUG_PUSH
9760 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9761#endif
9762
Owen Taylor3473f882001-02-23 17:55:21 +00009763 } else if (ctxt->instate != XML_PARSER_EOF) {
9764 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9765 xmlParserInputBufferPtr in = ctxt->input->buf;
9766 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9767 (in->raw != NULL)) {
9768 int nbchars;
9769
9770 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9771 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009772 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009773 xmlGenericError(xmlGenericErrorContext,
9774 "xmlParseChunk: encoder error\n");
9775 return(XML_ERR_INVALID_ENCODING);
9776 }
9777 }
9778 }
9779 }
9780 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009781 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9782 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009783 if (terminate) {
9784 /*
9785 * Check for termination
9786 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009787 int avail = 0;
9788 if (ctxt->input->buf == NULL)
9789 avail = ctxt->input->length -
9790 (ctxt->input->cur - ctxt->input->base);
9791 else
9792 avail = ctxt->input->buf->buffer->use -
9793 (ctxt->input->cur - ctxt->input->base);
9794
Owen Taylor3473f882001-02-23 17:55:21 +00009795 if ((ctxt->instate != XML_PARSER_EOF) &&
9796 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009797 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009798 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009799 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009800 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009801 }
Owen Taylor3473f882001-02-23 17:55:21 +00009802 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009803 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009804 ctxt->sax->endDocument(ctxt->userData);
9805 }
9806 ctxt->instate = XML_PARSER_EOF;
9807 }
9808 return((xmlParserErrors) ctxt->errNo);
9809}
9810
9811/************************************************************************
9812 * *
9813 * I/O front end functions to the parser *
9814 * *
9815 ************************************************************************/
9816
9817/**
9818 * xmlStopParser:
9819 * @ctxt: an XML parser context
9820 *
9821 * Blocks further parser processing
9822 */
9823void
9824xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009825 if (ctxt == NULL)
9826 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009827 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009828 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009829 if (ctxt->input != NULL)
9830 ctxt->input->cur = BAD_CAST"";
9831}
9832
9833/**
9834 * xmlCreatePushParserCtxt:
9835 * @sax: a SAX handler
9836 * @user_data: The user data returned on SAX callbacks
9837 * @chunk: a pointer to an array of chars
9838 * @size: number of chars in the array
9839 * @filename: an optional file name or URI
9840 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009841 * Create a parser context for using the XML parser in push mode.
9842 * If @buffer and @size are non-NULL, the data is used to detect
9843 * the encoding. The remaining characters will be parsed so they
9844 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009845 * To allow content encoding detection, @size should be >= 4
9846 * The value of @filename is used for fetching external entities
9847 * and error/warning reports.
9848 *
9849 * Returns the new parser context or NULL
9850 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009851
Owen Taylor3473f882001-02-23 17:55:21 +00009852xmlParserCtxtPtr
9853xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9854 const char *chunk, int size, const char *filename) {
9855 xmlParserCtxtPtr ctxt;
9856 xmlParserInputPtr inputStream;
9857 xmlParserInputBufferPtr buf;
9858 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9859
9860 /*
9861 * plug some encoding conversion routines
9862 */
9863 if ((chunk != NULL) && (size >= 4))
9864 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9865
9866 buf = xmlAllocParserInputBuffer(enc);
9867 if (buf == NULL) return(NULL);
9868
9869 ctxt = xmlNewParserCtxt();
9870 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009871 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009872 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009873 return(NULL);
9874 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009875 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9876 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009877 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009878 xmlFreeParserInputBuffer(buf);
9879 xmlFreeParserCtxt(ctxt);
9880 return(NULL);
9881 }
Owen Taylor3473f882001-02-23 17:55:21 +00009882 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009883#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009884 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009885#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009886 xmlFree(ctxt->sax);
9887 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9888 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009889 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009890 xmlFreeParserInputBuffer(buf);
9891 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009892 return(NULL);
9893 }
9894 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9895 if (user_data != NULL)
9896 ctxt->userData = user_data;
9897 }
9898 if (filename == NULL) {
9899 ctxt->directory = NULL;
9900 } else {
9901 ctxt->directory = xmlParserGetDirectory(filename);
9902 }
9903
9904 inputStream = xmlNewInputStream(ctxt);
9905 if (inputStream == NULL) {
9906 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009907 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009908 return(NULL);
9909 }
9910
9911 if (filename == NULL)
9912 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +00009913 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +00009914 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009915 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +00009916 if (inputStream->filename == NULL) {
9917 xmlFreeParserCtxt(ctxt);
9918 xmlFreeParserInputBuffer(buf);
9919 return(NULL);
9920 }
9921 }
Owen Taylor3473f882001-02-23 17:55:21 +00009922 inputStream->buf = buf;
9923 inputStream->base = inputStream->buf->buffer->content;
9924 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009925 inputStream->end =
9926 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009927
9928 inputPush(ctxt, inputStream);
9929
9930 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9931 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009932 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9933 int cur = ctxt->input->cur - ctxt->input->base;
9934
Owen Taylor3473f882001-02-23 17:55:21 +00009935 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009936
9937 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9938 ctxt->input->cur = ctxt->input->base + cur;
9939 ctxt->input->end =
9940 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009941#ifdef DEBUG_PUSH
9942 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9943#endif
9944 }
9945
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009946 if (enc != XML_CHAR_ENCODING_NONE) {
9947 xmlSwitchEncoding(ctxt, enc);
9948 }
9949
Owen Taylor3473f882001-02-23 17:55:21 +00009950 return(ctxt);
9951}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009952#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009953
9954/**
9955 * xmlCreateIOParserCtxt:
9956 * @sax: a SAX handler
9957 * @user_data: The user data returned on SAX callbacks
9958 * @ioread: an I/O read function
9959 * @ioclose: an I/O close function
9960 * @ioctx: an I/O handler
9961 * @enc: the charset encoding if known
9962 *
9963 * Create a parser context for using the XML parser with an existing
9964 * I/O stream
9965 *
9966 * Returns the new parser context or NULL
9967 */
9968xmlParserCtxtPtr
9969xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9970 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9971 void *ioctx, xmlCharEncoding enc) {
9972 xmlParserCtxtPtr ctxt;
9973 xmlParserInputPtr inputStream;
9974 xmlParserInputBufferPtr buf;
9975
9976 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9977 if (buf == NULL) return(NULL);
9978
9979 ctxt = xmlNewParserCtxt();
9980 if (ctxt == NULL) {
9981 xmlFree(buf);
9982 return(NULL);
9983 }
9984 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009985#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009986 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009987#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009988 xmlFree(ctxt->sax);
9989 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9990 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009991 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009992 xmlFree(ctxt);
9993 return(NULL);
9994 }
9995 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9996 if (user_data != NULL)
9997 ctxt->userData = user_data;
9998 }
9999
10000 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10001 if (inputStream == NULL) {
10002 xmlFreeParserCtxt(ctxt);
10003 return(NULL);
10004 }
10005 inputPush(ctxt, inputStream);
10006
10007 return(ctxt);
10008}
10009
Daniel Veillard4432df22003-09-28 18:58:27 +000010010#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010011/************************************************************************
10012 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010013 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010014 * *
10015 ************************************************************************/
10016
10017/**
10018 * xmlIOParseDTD:
10019 * @sax: the SAX handler block or NULL
10020 * @input: an Input Buffer
10021 * @enc: the charset encoding if known
10022 *
10023 * Load and parse a DTD
10024 *
10025 * Returns the resulting xmlDtdPtr or NULL in case of error.
10026 * @input will be freed at parsing end.
10027 */
10028
10029xmlDtdPtr
10030xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10031 xmlCharEncoding enc) {
10032 xmlDtdPtr ret = NULL;
10033 xmlParserCtxtPtr ctxt;
10034 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010035 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010036
10037 if (input == NULL)
10038 return(NULL);
10039
10040 ctxt = xmlNewParserCtxt();
10041 if (ctxt == NULL) {
10042 return(NULL);
10043 }
10044
10045 /*
10046 * Set-up the SAX context
10047 */
10048 if (sax != NULL) {
10049 if (ctxt->sax != NULL)
10050 xmlFree(ctxt->sax);
10051 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010052 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010053 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010054 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056 /*
10057 * generate a parser input from the I/O handler
10058 */
10059
Daniel Veillard43caefb2003-12-07 19:32:22 +000010060 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010061 if (pinput == NULL) {
10062 if (sax != NULL) ctxt->sax = NULL;
10063 xmlFreeParserCtxt(ctxt);
10064 return(NULL);
10065 }
10066
10067 /*
10068 * plug some encoding conversion routines here.
10069 */
10070 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010071 if (enc != XML_CHAR_ENCODING_NONE) {
10072 xmlSwitchEncoding(ctxt, enc);
10073 }
Owen Taylor3473f882001-02-23 17:55:21 +000010074
10075 pinput->filename = NULL;
10076 pinput->line = 1;
10077 pinput->col = 1;
10078 pinput->base = ctxt->input->cur;
10079 pinput->cur = ctxt->input->cur;
10080 pinput->free = NULL;
10081
10082 /*
10083 * let's parse that entity knowing it's an external subset.
10084 */
10085 ctxt->inSubset = 2;
10086 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10087 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10088 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010089
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010090 if ((enc == XML_CHAR_ENCODING_NONE) &&
10091 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010092 /*
10093 * Get the 4 first bytes and decode the charset
10094 * if enc != XML_CHAR_ENCODING_NONE
10095 * plug some encoding conversion routines.
10096 */
10097 start[0] = RAW;
10098 start[1] = NXT(1);
10099 start[2] = NXT(2);
10100 start[3] = NXT(3);
10101 enc = xmlDetectCharEncoding(start, 4);
10102 if (enc != XML_CHAR_ENCODING_NONE) {
10103 xmlSwitchEncoding(ctxt, enc);
10104 }
10105 }
10106
Owen Taylor3473f882001-02-23 17:55:21 +000010107 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10108
10109 if (ctxt->myDoc != NULL) {
10110 if (ctxt->wellFormed) {
10111 ret = ctxt->myDoc->extSubset;
10112 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010113 if (ret != NULL) {
10114 xmlNodePtr tmp;
10115
10116 ret->doc = NULL;
10117 tmp = ret->children;
10118 while (tmp != NULL) {
10119 tmp->doc = NULL;
10120 tmp = tmp->next;
10121 }
10122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123 } else {
10124 ret = NULL;
10125 }
10126 xmlFreeDoc(ctxt->myDoc);
10127 ctxt->myDoc = NULL;
10128 }
10129 if (sax != NULL) ctxt->sax = NULL;
10130 xmlFreeParserCtxt(ctxt);
10131
10132 return(ret);
10133}
10134
10135/**
10136 * xmlSAXParseDTD:
10137 * @sax: the SAX handler block
10138 * @ExternalID: a NAME* containing the External ID of the DTD
10139 * @SystemID: a NAME* containing the URL to the DTD
10140 *
10141 * Load and parse an external subset.
10142 *
10143 * Returns the resulting xmlDtdPtr or NULL in case of error.
10144 */
10145
10146xmlDtdPtr
10147xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10148 const xmlChar *SystemID) {
10149 xmlDtdPtr ret = NULL;
10150 xmlParserCtxtPtr ctxt;
10151 xmlParserInputPtr input = NULL;
10152 xmlCharEncoding enc;
10153
10154 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10155
10156 ctxt = xmlNewParserCtxt();
10157 if (ctxt == NULL) {
10158 return(NULL);
10159 }
10160
10161 /*
10162 * Set-up the SAX context
10163 */
10164 if (sax != NULL) {
10165 if (ctxt->sax != NULL)
10166 xmlFree(ctxt->sax);
10167 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010168 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010169 }
10170
10171 /*
10172 * Ask the Entity resolver to load the damn thing
10173 */
10174
10175 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010176 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010177 if (input == NULL) {
10178 if (sax != NULL) ctxt->sax = NULL;
10179 xmlFreeParserCtxt(ctxt);
10180 return(NULL);
10181 }
10182
10183 /*
10184 * plug some encoding conversion routines here.
10185 */
10186 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010187 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10188 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10189 xmlSwitchEncoding(ctxt, enc);
10190 }
Owen Taylor3473f882001-02-23 17:55:21 +000010191
10192 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010193 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010194 input->line = 1;
10195 input->col = 1;
10196 input->base = ctxt->input->cur;
10197 input->cur = ctxt->input->cur;
10198 input->free = NULL;
10199
10200 /*
10201 * let's parse that entity knowing it's an external subset.
10202 */
10203 ctxt->inSubset = 2;
10204 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10205 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10206 ExternalID, SystemID);
10207 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10208
10209 if (ctxt->myDoc != NULL) {
10210 if (ctxt->wellFormed) {
10211 ret = ctxt->myDoc->extSubset;
10212 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010213 if (ret != NULL) {
10214 xmlNodePtr tmp;
10215
10216 ret->doc = NULL;
10217 tmp = ret->children;
10218 while (tmp != NULL) {
10219 tmp->doc = NULL;
10220 tmp = tmp->next;
10221 }
10222 }
Owen Taylor3473f882001-02-23 17:55:21 +000010223 } else {
10224 ret = NULL;
10225 }
10226 xmlFreeDoc(ctxt->myDoc);
10227 ctxt->myDoc = NULL;
10228 }
10229 if (sax != NULL) ctxt->sax = NULL;
10230 xmlFreeParserCtxt(ctxt);
10231
10232 return(ret);
10233}
10234
Daniel Veillard4432df22003-09-28 18:58:27 +000010235
Owen Taylor3473f882001-02-23 17:55:21 +000010236/**
10237 * xmlParseDTD:
10238 * @ExternalID: a NAME* containing the External ID of the DTD
10239 * @SystemID: a NAME* containing the URL to the DTD
10240 *
10241 * Load and parse an external subset.
10242 *
10243 * Returns the resulting xmlDtdPtr or NULL in case of error.
10244 */
10245
10246xmlDtdPtr
10247xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10248 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10249}
Daniel Veillard4432df22003-09-28 18:58:27 +000010250#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010251
10252/************************************************************************
10253 * *
10254 * Front ends when parsing an Entity *
10255 * *
10256 ************************************************************************/
10257
10258/**
Owen Taylor3473f882001-02-23 17:55:21 +000010259 * xmlParseCtxtExternalEntity:
10260 * @ctx: the existing parsing context
10261 * @URL: the URL for the entity to load
10262 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010263 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010264 *
10265 * Parse an external general entity within an existing parsing context
10266 * An external general parsed entity is well-formed if it matches the
10267 * production labeled extParsedEnt.
10268 *
10269 * [78] extParsedEnt ::= TextDecl? content
10270 *
10271 * Returns 0 if the entity is well formed, -1 in case of args problem and
10272 * the parser error code otherwise
10273 */
10274
10275int
10276xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010277 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010278 xmlParserCtxtPtr ctxt;
10279 xmlDocPtr newDoc;
10280 xmlSAXHandlerPtr oldsax = NULL;
10281 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010282 xmlChar start[4];
10283 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010284
10285 if (ctx->depth > 40) {
10286 return(XML_ERR_ENTITY_LOOP);
10287 }
10288
Daniel Veillardcda96922001-08-21 10:56:31 +000010289 if (lst != NULL)
10290 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010291 if ((URL == NULL) && (ID == NULL))
10292 return(-1);
10293 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10294 return(-1);
10295
10296
10297 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10298 if (ctxt == NULL) return(-1);
10299 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010300 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010301 oldsax = ctxt->sax;
10302 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010303 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010304 newDoc = xmlNewDoc(BAD_CAST "1.0");
10305 if (newDoc == NULL) {
10306 xmlFreeParserCtxt(ctxt);
10307 return(-1);
10308 }
10309 if (ctx->myDoc != NULL) {
10310 newDoc->intSubset = ctx->myDoc->intSubset;
10311 newDoc->extSubset = ctx->myDoc->extSubset;
10312 }
10313 if (ctx->myDoc->URL != NULL) {
10314 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10315 }
10316 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10317 if (newDoc->children == NULL) {
10318 ctxt->sax = oldsax;
10319 xmlFreeParserCtxt(ctxt);
10320 newDoc->intSubset = NULL;
10321 newDoc->extSubset = NULL;
10322 xmlFreeDoc(newDoc);
10323 return(-1);
10324 }
10325 nodePush(ctxt, newDoc->children);
10326 if (ctx->myDoc == NULL) {
10327 ctxt->myDoc = newDoc;
10328 } else {
10329 ctxt->myDoc = ctx->myDoc;
10330 newDoc->children->doc = ctx->myDoc;
10331 }
10332
Daniel Veillard87a764e2001-06-20 17:41:10 +000010333 /*
10334 * Get the 4 first bytes and decode the charset
10335 * if enc != XML_CHAR_ENCODING_NONE
10336 * plug some encoding conversion routines.
10337 */
10338 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010339 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10340 start[0] = RAW;
10341 start[1] = NXT(1);
10342 start[2] = NXT(2);
10343 start[3] = NXT(3);
10344 enc = xmlDetectCharEncoding(start, 4);
10345 if (enc != XML_CHAR_ENCODING_NONE) {
10346 xmlSwitchEncoding(ctxt, enc);
10347 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010348 }
10349
Owen Taylor3473f882001-02-23 17:55:21 +000010350 /*
10351 * Parse a possible text declaration first
10352 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010353 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010354 xmlParseTextDecl(ctxt);
10355 }
10356
10357 /*
10358 * Doing validity checking on chunk doesn't make sense
10359 */
10360 ctxt->instate = XML_PARSER_CONTENT;
10361 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010362 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010363 ctxt->loadsubset = ctx->loadsubset;
10364 ctxt->depth = ctx->depth + 1;
10365 ctxt->replaceEntities = ctx->replaceEntities;
10366 if (ctxt->validate) {
10367 ctxt->vctxt.error = ctx->vctxt.error;
10368 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010369 } else {
10370 ctxt->vctxt.error = NULL;
10371 ctxt->vctxt.warning = NULL;
10372 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010373 ctxt->vctxt.nodeTab = NULL;
10374 ctxt->vctxt.nodeNr = 0;
10375 ctxt->vctxt.nodeMax = 0;
10376 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010377 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10378 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010379 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10380 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10381 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010382 ctxt->dictNames = ctx->dictNames;
10383 ctxt->attsDefault = ctx->attsDefault;
10384 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010385 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010386
10387 xmlParseContent(ctxt);
10388
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010389 ctx->validate = ctxt->validate;
10390 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010391 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010392 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010393 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010394 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010395 }
10396 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010397 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010398 }
10399
10400 if (!ctxt->wellFormed) {
10401 if (ctxt->errNo == 0)
10402 ret = 1;
10403 else
10404 ret = ctxt->errNo;
10405 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010406 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010407 xmlNodePtr cur;
10408
10409 /*
10410 * Return the newly created nodeset after unlinking it from
10411 * they pseudo parent.
10412 */
10413 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010414 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010415 while (cur != NULL) {
10416 cur->parent = NULL;
10417 cur = cur->next;
10418 }
10419 newDoc->children->children = NULL;
10420 }
10421 ret = 0;
10422 }
10423 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010424 ctxt->dict = NULL;
10425 ctxt->attsDefault = NULL;
10426 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010427 xmlFreeParserCtxt(ctxt);
10428 newDoc->intSubset = NULL;
10429 newDoc->extSubset = NULL;
10430 xmlFreeDoc(newDoc);
10431
10432 return(ret);
10433}
10434
10435/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010436 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010437 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010438 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010439 * @sax: the SAX handler bloc (possibly NULL)
10440 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10441 * @depth: Used for loop detection, use 0
10442 * @URL: the URL for the entity to load
10443 * @ID: the System ID for the entity to load
10444 * @list: the return value for the set of parsed nodes
10445 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010446 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010447 *
10448 * Returns 0 if the entity is well formed, -1 in case of args problem and
10449 * the parser error code otherwise
10450 */
10451
Daniel Veillard7d515752003-09-26 19:12:37 +000010452static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010453xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10454 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010455 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010456 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010457 xmlParserCtxtPtr ctxt;
10458 xmlDocPtr newDoc;
10459 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010460 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010461 xmlChar start[4];
10462 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010463
10464 if (depth > 40) {
10465 return(XML_ERR_ENTITY_LOOP);
10466 }
10467
10468
10469
10470 if (list != NULL)
10471 *list = NULL;
10472 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010473 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010474 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010475 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010476
10477
10478 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010479 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010480 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010481 if (oldctxt != NULL) {
10482 ctxt->_private = oldctxt->_private;
10483 ctxt->loadsubset = oldctxt->loadsubset;
10484 ctxt->validate = oldctxt->validate;
10485 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010486 ctxt->record_info = oldctxt->record_info;
10487 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10488 ctxt->node_seq.length = oldctxt->node_seq.length;
10489 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010490 } else {
10491 /*
10492 * Doing validity checking on chunk without context
10493 * doesn't make sense
10494 */
10495 ctxt->_private = NULL;
10496 ctxt->validate = 0;
10497 ctxt->external = 2;
10498 ctxt->loadsubset = 0;
10499 }
Owen Taylor3473f882001-02-23 17:55:21 +000010500 if (sax != NULL) {
10501 oldsax = ctxt->sax;
10502 ctxt->sax = sax;
10503 if (user_data != NULL)
10504 ctxt->userData = user_data;
10505 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010506 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010507 newDoc = xmlNewDoc(BAD_CAST "1.0");
10508 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010509 ctxt->node_seq.maximum = 0;
10510 ctxt->node_seq.length = 0;
10511 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010512 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010513 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010514 }
10515 if (doc != NULL) {
10516 newDoc->intSubset = doc->intSubset;
10517 newDoc->extSubset = doc->extSubset;
10518 }
10519 if (doc->URL != NULL) {
10520 newDoc->URL = xmlStrdup(doc->URL);
10521 }
10522 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10523 if (newDoc->children == NULL) {
10524 if (sax != NULL)
10525 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010526 ctxt->node_seq.maximum = 0;
10527 ctxt->node_seq.length = 0;
10528 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010529 xmlFreeParserCtxt(ctxt);
10530 newDoc->intSubset = NULL;
10531 newDoc->extSubset = NULL;
10532 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010533 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 }
10535 nodePush(ctxt, newDoc->children);
10536 if (doc == NULL) {
10537 ctxt->myDoc = newDoc;
10538 } else {
10539 ctxt->myDoc = doc;
10540 newDoc->children->doc = doc;
10541 }
10542
Daniel Veillard87a764e2001-06-20 17:41:10 +000010543 /*
10544 * Get the 4 first bytes and decode the charset
10545 * if enc != XML_CHAR_ENCODING_NONE
10546 * plug some encoding conversion routines.
10547 */
10548 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010549 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10550 start[0] = RAW;
10551 start[1] = NXT(1);
10552 start[2] = NXT(2);
10553 start[3] = NXT(3);
10554 enc = xmlDetectCharEncoding(start, 4);
10555 if (enc != XML_CHAR_ENCODING_NONE) {
10556 xmlSwitchEncoding(ctxt, enc);
10557 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010558 }
10559
Owen Taylor3473f882001-02-23 17:55:21 +000010560 /*
10561 * Parse a possible text declaration first
10562 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010563 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010564 xmlParseTextDecl(ctxt);
10565 }
10566
Owen Taylor3473f882001-02-23 17:55:21 +000010567 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010568 ctxt->depth = depth;
10569
10570 xmlParseContent(ctxt);
10571
Daniel Veillard561b7f82002-03-20 21:55:57 +000010572 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010573 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010574 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010575 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010576 }
10577 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010578 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010579 }
10580
10581 if (!ctxt->wellFormed) {
10582 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010583 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010584 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010585 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010586 } else {
10587 if (list != NULL) {
10588 xmlNodePtr cur;
10589
10590 /*
10591 * Return the newly created nodeset after unlinking it from
10592 * they pseudo parent.
10593 */
10594 cur = newDoc->children->children;
10595 *list = cur;
10596 while (cur != NULL) {
10597 cur->parent = NULL;
10598 cur = cur->next;
10599 }
10600 newDoc->children->children = NULL;
10601 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010602 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010603 }
10604 if (sax != NULL)
10605 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010606 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10607 oldctxt->node_seq.length = ctxt->node_seq.length;
10608 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010609 ctxt->node_seq.maximum = 0;
10610 ctxt->node_seq.length = 0;
10611 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010612 xmlFreeParserCtxt(ctxt);
10613 newDoc->intSubset = NULL;
10614 newDoc->extSubset = NULL;
10615 xmlFreeDoc(newDoc);
10616
10617 return(ret);
10618}
10619
Daniel Veillard81273902003-09-30 00:43:48 +000010620#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010621/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010622 * xmlParseExternalEntity:
10623 * @doc: the document the chunk pertains to
10624 * @sax: the SAX handler bloc (possibly NULL)
10625 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10626 * @depth: Used for loop detection, use 0
10627 * @URL: the URL for the entity to load
10628 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010629 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010630 *
10631 * Parse an external general entity
10632 * An external general parsed entity is well-formed if it matches the
10633 * production labeled extParsedEnt.
10634 *
10635 * [78] extParsedEnt ::= TextDecl? content
10636 *
10637 * Returns 0 if the entity is well formed, -1 in case of args problem and
10638 * the parser error code otherwise
10639 */
10640
10641int
10642xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010643 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010644 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010645 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010646}
10647
10648/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010649 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010650 * @doc: the document the chunk pertains to
10651 * @sax: the SAX handler bloc (possibly NULL)
10652 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10653 * @depth: Used for loop detection, use 0
10654 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010655 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010656 *
10657 * Parse a well-balanced chunk of an XML document
10658 * called by the parser
10659 * The allowed sequence for the Well Balanced Chunk is the one defined by
10660 * the content production in the XML grammar:
10661 *
10662 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10663 *
10664 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10665 * the parser error code otherwise
10666 */
10667
10668int
10669xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010670 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010671 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10672 depth, string, lst, 0 );
10673}
Daniel Veillard81273902003-09-30 00:43:48 +000010674#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010675
10676/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010677 * xmlParseBalancedChunkMemoryInternal:
10678 * @oldctxt: the existing parsing context
10679 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10680 * @user_data: the user data field for the parser context
10681 * @lst: the return value for the set of parsed nodes
10682 *
10683 *
10684 * Parse a well-balanced chunk of an XML document
10685 * called by the parser
10686 * The allowed sequence for the Well Balanced Chunk is the one defined by
10687 * the content production in the XML grammar:
10688 *
10689 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10690 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010691 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10692 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010693 *
10694 * In case recover is set to 1, the nodelist will not be empty even if
10695 * the parsed chunk is not well balanced.
10696 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010697static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010698xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10699 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10700 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010701 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010702 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010703 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010704 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010705 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010706
10707 if (oldctxt->depth > 40) {
10708 return(XML_ERR_ENTITY_LOOP);
10709 }
10710
10711
10712 if (lst != NULL)
10713 *lst = NULL;
10714 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010715 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010716
10717 size = xmlStrlen(string);
10718
10719 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010720 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010721 if (user_data != NULL)
10722 ctxt->userData = user_data;
10723 else
10724 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010725 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10726 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010727 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10728 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10729 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010730
10731 oldsax = ctxt->sax;
10732 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010733 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010734 ctxt->replaceEntities = oldctxt->replaceEntities;
10735 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010736
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010737 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010738 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010739 newDoc = xmlNewDoc(BAD_CAST "1.0");
10740 if (newDoc == NULL) {
10741 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010742 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010743 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010744 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010745 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010746 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010747 } else {
10748 ctxt->myDoc = oldctxt->myDoc;
10749 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010750 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010751 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010752 BAD_CAST "pseudoroot", NULL);
10753 if (ctxt->myDoc->children == NULL) {
10754 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010755 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010756 xmlFreeParserCtxt(ctxt);
10757 if (newDoc != NULL)
10758 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010759 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010760 }
10761 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010762 ctxt->instate = XML_PARSER_CONTENT;
10763 ctxt->depth = oldctxt->depth + 1;
10764
Daniel Veillard328f48c2002-11-15 15:24:34 +000010765 ctxt->validate = 0;
10766 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010767 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10768 /*
10769 * ID/IDREF registration will be done in xmlValidateElement below
10770 */
10771 ctxt->loadsubset |= XML_SKIP_IDS;
10772 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010773 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010774 ctxt->attsDefault = oldctxt->attsDefault;
10775 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010776
Daniel Veillard68e9e742002-11-16 15:35:11 +000010777 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010778 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010779 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010780 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010781 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010782 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010783 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010784 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010785 }
10786
10787 if (!ctxt->wellFormed) {
10788 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010789 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010790 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010791 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010792 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010793 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010794 }
10795
William M. Brack7b9154b2003-09-27 19:23:50 +000010796 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010797 xmlNodePtr cur;
10798
10799 /*
10800 * Return the newly created nodeset after unlinking it from
10801 * they pseudo parent.
10802 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010803 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010804 *lst = cur;
10805 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010806#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010807 if (oldctxt->validate && oldctxt->wellFormed &&
10808 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10809 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10810 oldctxt->myDoc, cur);
10811 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010812#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010813 cur->parent = NULL;
10814 cur = cur->next;
10815 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010816 ctxt->myDoc->children->children = NULL;
10817 }
10818 if (ctxt->myDoc != NULL) {
10819 xmlFreeNode(ctxt->myDoc->children);
10820 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010821 }
10822
10823 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010824 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010825 ctxt->attsDefault = NULL;
10826 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010827 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010828 if (newDoc != NULL)
10829 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010830
10831 return(ret);
10832}
10833
Daniel Veillard29b17482004-08-16 00:39:03 +000010834/**
10835 * xmlParseInNodeContext:
10836 * @node: the context node
10837 * @data: the input string
10838 * @datalen: the input string length in bytes
10839 * @options: a combination of xmlParserOption
10840 * @lst: the return value for the set of parsed nodes
10841 *
10842 * Parse a well-balanced chunk of an XML document
10843 * within the context (DTD, namespaces, etc ...) of the given node.
10844 *
10845 * The allowed sequence for the data is a Well Balanced Chunk defined by
10846 * the content production in the XML grammar:
10847 *
10848 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10849 *
10850 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10851 * error code otherwise
10852 */
10853xmlParserErrors
10854xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
10855 int options, xmlNodePtr *lst) {
10856#ifdef SAX2
10857 xmlParserCtxtPtr ctxt;
10858 xmlDocPtr doc = NULL;
10859 xmlNodePtr fake, cur;
10860 int nsnr = 0;
10861
10862 xmlParserErrors ret = XML_ERR_OK;
10863
10864 /*
10865 * check all input parameters, grab the document
10866 */
10867 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
10868 return(XML_ERR_INTERNAL_ERROR);
10869 switch (node->type) {
10870 case XML_ELEMENT_NODE:
10871 case XML_ATTRIBUTE_NODE:
10872 case XML_TEXT_NODE:
10873 case XML_CDATA_SECTION_NODE:
10874 case XML_ENTITY_REF_NODE:
10875 case XML_PI_NODE:
10876 case XML_COMMENT_NODE:
10877 case XML_DOCUMENT_NODE:
10878 case XML_HTML_DOCUMENT_NODE:
10879 break;
10880 default:
10881 return(XML_ERR_INTERNAL_ERROR);
10882
10883 }
10884 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
10885 (node->type != XML_DOCUMENT_NODE) &&
10886 (node->type != XML_HTML_DOCUMENT_NODE))
10887 node = node->parent;
10888 if (node == NULL)
10889 return(XML_ERR_INTERNAL_ERROR);
10890 if (node->type == XML_ELEMENT_NODE)
10891 doc = node->doc;
10892 else
10893 doc = (xmlDocPtr) node;
10894 if (doc == NULL)
10895 return(XML_ERR_INTERNAL_ERROR);
10896
10897 /*
10898 * allocate a context and set-up everything not related to the
10899 * node position in the tree
10900 */
10901 if (doc->type == XML_DOCUMENT_NODE)
10902 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
10903#ifdef LIBXML_HTML_ENABLED
10904 else if (doc->type == XML_HTML_DOCUMENT_NODE)
10905 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
10906#endif
10907 else
10908 return(XML_ERR_INTERNAL_ERROR);
10909
10910 if (ctxt == NULL)
10911 return(XML_ERR_NO_MEMORY);
10912 fake = xmlNewComment(NULL);
10913 if (fake == NULL) {
10914 xmlFreeParserCtxt(ctxt);
10915 return(XML_ERR_NO_MEMORY);
10916 }
10917 xmlAddChild(node, fake);
10918
10919 xmlCtxtUseOptions(ctxt, options);
10920 if (doc->dict != NULL) {
10921 if (ctxt->dict != NULL)
10922 xmlDictFree(ctxt->dict);
10923 ctxt->dict = doc->dict;
10924 }
10925 xmlDetectSAX2(ctxt);
10926 ctxt->myDoc = doc;
10927
10928 if (node->type == XML_ELEMENT_NODE) {
10929 nodePush(ctxt, node);
10930 /*
10931 * initialize the SAX2 namespaces stack
10932 */
10933 cur = node;
10934 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
10935 xmlNsPtr ns = cur->nsDef;
10936 const xmlChar *iprefix, *ihref;
10937
10938 while (ns != NULL) {
10939 if (ctxt->dict) {
10940 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
10941 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
10942 } else {
10943 iprefix = ns->prefix;
10944 ihref = ns->href;
10945 }
10946
10947 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
10948 nsPush(ctxt, iprefix, ihref);
10949 nsnr++;
10950 }
10951 ns = ns->next;
10952 }
10953 cur = cur->parent;
10954 }
10955 ctxt->instate = XML_PARSER_CONTENT;
10956 }
10957
10958 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
10959 /*
10960 * ID/IDREF registration will be done in xmlValidateElement below
10961 */
10962 ctxt->loadsubset |= XML_SKIP_IDS;
10963 }
10964
10965 xmlParseContent(ctxt);
10966 nsPop(ctxt, nsnr);
10967 if ((RAW == '<') && (NXT(1) == '/')) {
10968 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10969 } else if (RAW != 0) {
10970 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10971 }
10972 if ((ctxt->node != NULL) && (ctxt->node != node)) {
10973 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10974 ctxt->wellFormed = 0;
10975 }
10976
10977 if (!ctxt->wellFormed) {
10978 if (ctxt->errNo == 0)
10979 ret = XML_ERR_INTERNAL_ERROR;
10980 else
10981 ret = (xmlParserErrors)ctxt->errNo;
10982 } else {
10983 ret = XML_ERR_OK;
10984 }
10985
10986 /*
10987 * Return the newly created nodeset after unlinking it from
10988 * the pseudo sibling.
10989 */
10990
10991 cur = fake->next;
10992 fake->next = NULL;
10993 node->last = fake;
10994
10995 if (cur != NULL) {
10996 cur->prev = NULL;
10997 }
10998
10999 *lst = cur;
11000
11001 while (cur != NULL) {
11002 cur->parent = NULL;
11003 cur = cur->next;
11004 }
11005
11006 xmlUnlinkNode(fake);
11007 xmlFreeNode(fake);
11008
11009
11010 if (ret != XML_ERR_OK) {
11011 xmlFreeNodeList(*lst);
11012 *lst = NULL;
11013 }
11014
11015 ctxt->dict = NULL;
11016 xmlFreeParserCtxt(ctxt);
11017
11018 return(ret);
11019#else /* !SAX2 */
11020 return(XML_ERR_INTERNAL_ERROR);
11021#endif
11022}
11023
Daniel Veillard81273902003-09-30 00:43:48 +000011024#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011025/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011026 * xmlParseBalancedChunkMemoryRecover:
11027 * @doc: the document the chunk pertains to
11028 * @sax: the SAX handler bloc (possibly NULL)
11029 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11030 * @depth: Used for loop detection, use 0
11031 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11032 * @lst: the return value for the set of parsed nodes
11033 * @recover: return nodes even if the data is broken (use 0)
11034 *
11035 *
11036 * Parse a well-balanced chunk of an XML document
11037 * called by the parser
11038 * The allowed sequence for the Well Balanced Chunk is the one defined by
11039 * the content production in the XML grammar:
11040 *
11041 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11042 *
11043 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11044 * the parser error code otherwise
11045 *
11046 * In case recover is set to 1, the nodelist will not be empty even if
11047 * the parsed chunk is not well balanced.
11048 */
11049int
11050xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11051 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11052 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011053 xmlParserCtxtPtr ctxt;
11054 xmlDocPtr newDoc;
11055 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011056 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011057 int size;
11058 int ret = 0;
11059
11060 if (depth > 40) {
11061 return(XML_ERR_ENTITY_LOOP);
11062 }
11063
11064
Daniel Veillardcda96922001-08-21 10:56:31 +000011065 if (lst != NULL)
11066 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011067 if (string == NULL)
11068 return(-1);
11069
11070 size = xmlStrlen(string);
11071
11072 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11073 if (ctxt == NULL) return(-1);
11074 ctxt->userData = ctxt;
11075 if (sax != NULL) {
11076 oldsax = ctxt->sax;
11077 ctxt->sax = sax;
11078 if (user_data != NULL)
11079 ctxt->userData = user_data;
11080 }
11081 newDoc = xmlNewDoc(BAD_CAST "1.0");
11082 if (newDoc == NULL) {
11083 xmlFreeParserCtxt(ctxt);
11084 return(-1);
11085 }
11086 if (doc != NULL) {
11087 newDoc->intSubset = doc->intSubset;
11088 newDoc->extSubset = doc->extSubset;
11089 }
11090 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11091 if (newDoc->children == NULL) {
11092 if (sax != NULL)
11093 ctxt->sax = oldsax;
11094 xmlFreeParserCtxt(ctxt);
11095 newDoc->intSubset = NULL;
11096 newDoc->extSubset = NULL;
11097 xmlFreeDoc(newDoc);
11098 return(-1);
11099 }
11100 nodePush(ctxt, newDoc->children);
11101 if (doc == NULL) {
11102 ctxt->myDoc = newDoc;
11103 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011104 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011105 newDoc->children->doc = doc;
11106 }
11107 ctxt->instate = XML_PARSER_CONTENT;
11108 ctxt->depth = depth;
11109
11110 /*
11111 * Doing validity checking on chunk doesn't make sense
11112 */
11113 ctxt->validate = 0;
11114 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011115 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011116
Daniel Veillardb39bc392002-10-26 19:29:51 +000011117 if ( doc != NULL ){
11118 content = doc->children;
11119 doc->children = NULL;
11120 xmlParseContent(ctxt);
11121 doc->children = content;
11122 }
11123 else {
11124 xmlParseContent(ctxt);
11125 }
Owen Taylor3473f882001-02-23 17:55:21 +000011126 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011127 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011128 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011129 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011130 }
11131 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011132 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011133 }
11134
11135 if (!ctxt->wellFormed) {
11136 if (ctxt->errNo == 0)
11137 ret = 1;
11138 else
11139 ret = ctxt->errNo;
11140 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011141 ret = 0;
11142 }
11143
11144 if (lst != NULL && (ret == 0 || recover == 1)) {
11145 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011146
11147 /*
11148 * Return the newly created nodeset after unlinking it from
11149 * they pseudo parent.
11150 */
11151 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011152 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011153 while (cur != NULL) {
11154 cur->parent = NULL;
11155 cur = cur->next;
11156 }
11157 newDoc->children->children = NULL;
11158 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011159
Owen Taylor3473f882001-02-23 17:55:21 +000011160 if (sax != NULL)
11161 ctxt->sax = oldsax;
11162 xmlFreeParserCtxt(ctxt);
11163 newDoc->intSubset = NULL;
11164 newDoc->extSubset = NULL;
11165 xmlFreeDoc(newDoc);
11166
11167 return(ret);
11168}
11169
11170/**
11171 * xmlSAXParseEntity:
11172 * @sax: the SAX handler block
11173 * @filename: the filename
11174 *
11175 * parse an XML external entity out of context and build a tree.
11176 * It use the given SAX function block to handle the parsing callback.
11177 * If sax is NULL, fallback to the default DOM tree building routines.
11178 *
11179 * [78] extParsedEnt ::= TextDecl? content
11180 *
11181 * This correspond to a "Well Balanced" chunk
11182 *
11183 * Returns the resulting document tree
11184 */
11185
11186xmlDocPtr
11187xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11188 xmlDocPtr ret;
11189 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011190
11191 ctxt = xmlCreateFileParserCtxt(filename);
11192 if (ctxt == NULL) {
11193 return(NULL);
11194 }
11195 if (sax != NULL) {
11196 if (ctxt->sax != NULL)
11197 xmlFree(ctxt->sax);
11198 ctxt->sax = sax;
11199 ctxt->userData = NULL;
11200 }
11201
Owen Taylor3473f882001-02-23 17:55:21 +000011202 xmlParseExtParsedEnt(ctxt);
11203
11204 if (ctxt->wellFormed)
11205 ret = ctxt->myDoc;
11206 else {
11207 ret = NULL;
11208 xmlFreeDoc(ctxt->myDoc);
11209 ctxt->myDoc = NULL;
11210 }
11211 if (sax != NULL)
11212 ctxt->sax = NULL;
11213 xmlFreeParserCtxt(ctxt);
11214
11215 return(ret);
11216}
11217
11218/**
11219 * xmlParseEntity:
11220 * @filename: the filename
11221 *
11222 * parse an XML external entity out of context and build a tree.
11223 *
11224 * [78] extParsedEnt ::= TextDecl? content
11225 *
11226 * This correspond to a "Well Balanced" chunk
11227 *
11228 * Returns the resulting document tree
11229 */
11230
11231xmlDocPtr
11232xmlParseEntity(const char *filename) {
11233 return(xmlSAXParseEntity(NULL, filename));
11234}
Daniel Veillard81273902003-09-30 00:43:48 +000011235#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011236
11237/**
11238 * xmlCreateEntityParserCtxt:
11239 * @URL: the entity URL
11240 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011241 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011242 *
11243 * Create a parser context for an external entity
11244 * Automatic support for ZLIB/Compress compressed document is provided
11245 * by default if found at compile-time.
11246 *
11247 * Returns the new parser context or NULL
11248 */
11249xmlParserCtxtPtr
11250xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11251 const xmlChar *base) {
11252 xmlParserCtxtPtr ctxt;
11253 xmlParserInputPtr inputStream;
11254 char *directory = NULL;
11255 xmlChar *uri;
11256
11257 ctxt = xmlNewParserCtxt();
11258 if (ctxt == NULL) {
11259 return(NULL);
11260 }
11261
11262 uri = xmlBuildURI(URL, base);
11263
11264 if (uri == NULL) {
11265 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11266 if (inputStream == NULL) {
11267 xmlFreeParserCtxt(ctxt);
11268 return(NULL);
11269 }
11270
11271 inputPush(ctxt, inputStream);
11272
11273 if ((ctxt->directory == NULL) && (directory == NULL))
11274 directory = xmlParserGetDirectory((char *)URL);
11275 if ((ctxt->directory == NULL) && (directory != NULL))
11276 ctxt->directory = directory;
11277 } else {
11278 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11279 if (inputStream == NULL) {
11280 xmlFree(uri);
11281 xmlFreeParserCtxt(ctxt);
11282 return(NULL);
11283 }
11284
11285 inputPush(ctxt, inputStream);
11286
11287 if ((ctxt->directory == NULL) && (directory == NULL))
11288 directory = xmlParserGetDirectory((char *)uri);
11289 if ((ctxt->directory == NULL) && (directory != NULL))
11290 ctxt->directory = directory;
11291 xmlFree(uri);
11292 }
Owen Taylor3473f882001-02-23 17:55:21 +000011293 return(ctxt);
11294}
11295
11296/************************************************************************
11297 * *
11298 * Front ends when parsing from a file *
11299 * *
11300 ************************************************************************/
11301
11302/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011303 * xmlCreateURLParserCtxt:
11304 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011305 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011306 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011307 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011308 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011309 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011310 *
11311 * Returns the new parser context or NULL
11312 */
11313xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011314xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011315{
11316 xmlParserCtxtPtr ctxt;
11317 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011318 char *directory = NULL;
11319
Owen Taylor3473f882001-02-23 17:55:21 +000011320 ctxt = xmlNewParserCtxt();
11321 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011322 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011323 return(NULL);
11324 }
11325
Daniel Veillard61b93382003-11-03 14:28:31 +000011326 if (options != 0)
11327 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011328
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011329 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011330 if (inputStream == NULL) {
11331 xmlFreeParserCtxt(ctxt);
11332 return(NULL);
11333 }
11334
Owen Taylor3473f882001-02-23 17:55:21 +000011335 inputPush(ctxt, inputStream);
11336 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011337 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011338 if ((ctxt->directory == NULL) && (directory != NULL))
11339 ctxt->directory = directory;
11340
11341 return(ctxt);
11342}
11343
Daniel Veillard61b93382003-11-03 14:28:31 +000011344/**
11345 * xmlCreateFileParserCtxt:
11346 * @filename: the filename
11347 *
11348 * Create a parser context for a file content.
11349 * Automatic support for ZLIB/Compress compressed document is provided
11350 * by default if found at compile-time.
11351 *
11352 * Returns the new parser context or NULL
11353 */
11354xmlParserCtxtPtr
11355xmlCreateFileParserCtxt(const char *filename)
11356{
11357 return(xmlCreateURLParserCtxt(filename, 0));
11358}
11359
Daniel Veillard81273902003-09-30 00:43:48 +000011360#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011361/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011362 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011363 * @sax: the SAX handler block
11364 * @filename: the filename
11365 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11366 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011367 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011368 *
11369 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11370 * compressed document is provided by default if found at compile-time.
11371 * It use the given SAX function block to handle the parsing callback.
11372 * If sax is NULL, fallback to the default DOM tree building routines.
11373 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011374 * User data (void *) is stored within the parser context in the
11375 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011376 *
Owen Taylor3473f882001-02-23 17:55:21 +000011377 * Returns the resulting document tree
11378 */
11379
11380xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011381xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11382 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011383 xmlDocPtr ret;
11384 xmlParserCtxtPtr ctxt;
11385 char *directory = NULL;
11386
Daniel Veillard635ef722001-10-29 11:48:19 +000011387 xmlInitParser();
11388
Owen Taylor3473f882001-02-23 17:55:21 +000011389 ctxt = xmlCreateFileParserCtxt(filename);
11390 if (ctxt == NULL) {
11391 return(NULL);
11392 }
11393 if (sax != NULL) {
11394 if (ctxt->sax != NULL)
11395 xmlFree(ctxt->sax);
11396 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011397 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011398 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011399 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011400 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011401 }
Owen Taylor3473f882001-02-23 17:55:21 +000011402
11403 if ((ctxt->directory == NULL) && (directory == NULL))
11404 directory = xmlParserGetDirectory(filename);
11405 if ((ctxt->directory == NULL) && (directory != NULL))
11406 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11407
Daniel Veillarddad3f682002-11-17 16:47:27 +000011408 ctxt->recovery = recovery;
11409
Owen Taylor3473f882001-02-23 17:55:21 +000011410 xmlParseDocument(ctxt);
11411
William M. Brackc07329e2003-09-08 01:57:30 +000011412 if ((ctxt->wellFormed) || recovery) {
11413 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011414 if (ret != NULL) {
11415 if (ctxt->input->buf->compressed > 0)
11416 ret->compression = 9;
11417 else
11418 ret->compression = ctxt->input->buf->compressed;
11419 }
William M. Brackc07329e2003-09-08 01:57:30 +000011420 }
Owen Taylor3473f882001-02-23 17:55:21 +000011421 else {
11422 ret = NULL;
11423 xmlFreeDoc(ctxt->myDoc);
11424 ctxt->myDoc = NULL;
11425 }
11426 if (sax != NULL)
11427 ctxt->sax = NULL;
11428 xmlFreeParserCtxt(ctxt);
11429
11430 return(ret);
11431}
11432
11433/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011434 * xmlSAXParseFile:
11435 * @sax: the SAX handler block
11436 * @filename: the filename
11437 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11438 * documents
11439 *
11440 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11441 * compressed document is provided by default if found at compile-time.
11442 * It use the given SAX function block to handle the parsing callback.
11443 * If sax is NULL, fallback to the default DOM tree building routines.
11444 *
11445 * Returns the resulting document tree
11446 */
11447
11448xmlDocPtr
11449xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11450 int recovery) {
11451 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11452}
11453
11454/**
Owen Taylor3473f882001-02-23 17:55:21 +000011455 * xmlRecoverDoc:
11456 * @cur: a pointer to an array of xmlChar
11457 *
11458 * parse an XML in-memory document and build a tree.
11459 * In the case the document is not Well Formed, a tree is built anyway
11460 *
11461 * Returns the resulting document tree
11462 */
11463
11464xmlDocPtr
11465xmlRecoverDoc(xmlChar *cur) {
11466 return(xmlSAXParseDoc(NULL, cur, 1));
11467}
11468
11469/**
11470 * xmlParseFile:
11471 * @filename: the filename
11472 *
11473 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11474 * compressed document is provided by default if found at compile-time.
11475 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011476 * Returns the resulting document tree if the file was wellformed,
11477 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011478 */
11479
11480xmlDocPtr
11481xmlParseFile(const char *filename) {
11482 return(xmlSAXParseFile(NULL, filename, 0));
11483}
11484
11485/**
11486 * xmlRecoverFile:
11487 * @filename: the filename
11488 *
11489 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11490 * compressed document is provided by default if found at compile-time.
11491 * In the case the document is not Well Formed, a tree is built anyway
11492 *
11493 * Returns the resulting document tree
11494 */
11495
11496xmlDocPtr
11497xmlRecoverFile(const char *filename) {
11498 return(xmlSAXParseFile(NULL, filename, 1));
11499}
11500
11501
11502/**
11503 * xmlSetupParserForBuffer:
11504 * @ctxt: an XML parser context
11505 * @buffer: a xmlChar * buffer
11506 * @filename: a file name
11507 *
11508 * Setup the parser context to parse a new buffer; Clears any prior
11509 * contents from the parser context. The buffer parameter must not be
11510 * NULL, but the filename parameter can be
11511 */
11512void
11513xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11514 const char* filename)
11515{
11516 xmlParserInputPtr input;
11517
11518 input = xmlNewInputStream(ctxt);
11519 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011520 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011521 xmlFree(ctxt);
11522 return;
11523 }
11524
11525 xmlClearParserCtxt(ctxt);
11526 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011527 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011528 input->base = buffer;
11529 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011530 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011531 inputPush(ctxt, input);
11532}
11533
11534/**
11535 * xmlSAXUserParseFile:
11536 * @sax: a SAX handler
11537 * @user_data: The user data returned on SAX callbacks
11538 * @filename: a file name
11539 *
11540 * parse an XML file and call the given SAX handler routines.
11541 * Automatic support for ZLIB/Compress compressed document is provided
11542 *
11543 * Returns 0 in case of success or a error number otherwise
11544 */
11545int
11546xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11547 const char *filename) {
11548 int ret = 0;
11549 xmlParserCtxtPtr ctxt;
11550
11551 ctxt = xmlCreateFileParserCtxt(filename);
11552 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011553#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011554 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011555#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011556 xmlFree(ctxt->sax);
11557 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011558 xmlDetectSAX2(ctxt);
11559
Owen Taylor3473f882001-02-23 17:55:21 +000011560 if (user_data != NULL)
11561 ctxt->userData = user_data;
11562
11563 xmlParseDocument(ctxt);
11564
11565 if (ctxt->wellFormed)
11566 ret = 0;
11567 else {
11568 if (ctxt->errNo != 0)
11569 ret = ctxt->errNo;
11570 else
11571 ret = -1;
11572 }
11573 if (sax != NULL)
11574 ctxt->sax = NULL;
11575 xmlFreeParserCtxt(ctxt);
11576
11577 return ret;
11578}
Daniel Veillard81273902003-09-30 00:43:48 +000011579#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011580
11581/************************************************************************
11582 * *
11583 * Front ends when parsing from memory *
11584 * *
11585 ************************************************************************/
11586
11587/**
11588 * xmlCreateMemoryParserCtxt:
11589 * @buffer: a pointer to a char array
11590 * @size: the size of the array
11591 *
11592 * Create a parser context for an XML in-memory document.
11593 *
11594 * Returns the new parser context or NULL
11595 */
11596xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011597xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011598 xmlParserCtxtPtr ctxt;
11599 xmlParserInputPtr input;
11600 xmlParserInputBufferPtr buf;
11601
11602 if (buffer == NULL)
11603 return(NULL);
11604 if (size <= 0)
11605 return(NULL);
11606
11607 ctxt = xmlNewParserCtxt();
11608 if (ctxt == NULL)
11609 return(NULL);
11610
Daniel Veillard53350552003-09-18 13:35:51 +000011611 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011612 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011613 if (buf == NULL) {
11614 xmlFreeParserCtxt(ctxt);
11615 return(NULL);
11616 }
Owen Taylor3473f882001-02-23 17:55:21 +000011617
11618 input = xmlNewInputStream(ctxt);
11619 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011620 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011621 xmlFreeParserCtxt(ctxt);
11622 return(NULL);
11623 }
11624
11625 input->filename = NULL;
11626 input->buf = buf;
11627 input->base = input->buf->buffer->content;
11628 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011629 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011630
11631 inputPush(ctxt, input);
11632 return(ctxt);
11633}
11634
Daniel Veillard81273902003-09-30 00:43:48 +000011635#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011636/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011637 * xmlSAXParseMemoryWithData:
11638 * @sax: the SAX handler block
11639 * @buffer: an pointer to a char array
11640 * @size: the size of the array
11641 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11642 * documents
11643 * @data: the userdata
11644 *
11645 * parse an XML in-memory block and use the given SAX function block
11646 * to handle the parsing callback. If sax is NULL, fallback to the default
11647 * DOM tree building routines.
11648 *
11649 * User data (void *) is stored within the parser context in the
11650 * context's _private member, so it is available nearly everywhere in libxml
11651 *
11652 * Returns the resulting document tree
11653 */
11654
11655xmlDocPtr
11656xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11657 int size, int recovery, void *data) {
11658 xmlDocPtr ret;
11659 xmlParserCtxtPtr ctxt;
11660
11661 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11662 if (ctxt == NULL) return(NULL);
11663 if (sax != NULL) {
11664 if (ctxt->sax != NULL)
11665 xmlFree(ctxt->sax);
11666 ctxt->sax = sax;
11667 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011668 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011669 if (data!=NULL) {
11670 ctxt->_private=data;
11671 }
11672
Daniel Veillardadba5f12003-04-04 16:09:01 +000011673 ctxt->recovery = recovery;
11674
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011675 xmlParseDocument(ctxt);
11676
11677 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11678 else {
11679 ret = NULL;
11680 xmlFreeDoc(ctxt->myDoc);
11681 ctxt->myDoc = NULL;
11682 }
11683 if (sax != NULL)
11684 ctxt->sax = NULL;
11685 xmlFreeParserCtxt(ctxt);
11686
11687 return(ret);
11688}
11689
11690/**
Owen Taylor3473f882001-02-23 17:55:21 +000011691 * xmlSAXParseMemory:
11692 * @sax: the SAX handler block
11693 * @buffer: an pointer to a char array
11694 * @size: the size of the array
11695 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11696 * documents
11697 *
11698 * parse an XML in-memory block and use the given SAX function block
11699 * to handle the parsing callback. If sax is NULL, fallback to the default
11700 * DOM tree building routines.
11701 *
11702 * Returns the resulting document tree
11703 */
11704xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011705xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11706 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011707 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011708}
11709
11710/**
11711 * xmlParseMemory:
11712 * @buffer: an pointer to a char array
11713 * @size: the size of the array
11714 *
11715 * parse an XML in-memory block and build a tree.
11716 *
11717 * Returns the resulting document tree
11718 */
11719
Daniel Veillard50822cb2001-07-26 20:05:51 +000011720xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011721 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11722}
11723
11724/**
11725 * xmlRecoverMemory:
11726 * @buffer: an pointer to a char array
11727 * @size: the size of the array
11728 *
11729 * parse an XML in-memory block and build a tree.
11730 * In the case the document is not Well Formed, a tree is built anyway
11731 *
11732 * Returns the resulting document tree
11733 */
11734
Daniel Veillard50822cb2001-07-26 20:05:51 +000011735xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011736 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11737}
11738
11739/**
11740 * xmlSAXUserParseMemory:
11741 * @sax: a SAX handler
11742 * @user_data: The user data returned on SAX callbacks
11743 * @buffer: an in-memory XML document input
11744 * @size: the length of the XML document in bytes
11745 *
11746 * A better SAX parsing routine.
11747 * parse an XML in-memory buffer and call the given SAX handler routines.
11748 *
11749 * Returns 0 in case of success or a error number otherwise
11750 */
11751int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011752 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011753 int ret = 0;
11754 xmlParserCtxtPtr ctxt;
11755 xmlSAXHandlerPtr oldsax = NULL;
11756
Daniel Veillard9e923512002-08-14 08:48:52 +000011757 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11759 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011760 oldsax = ctxt->sax;
11761 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011762 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011763 if (user_data != NULL)
11764 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011765
11766 xmlParseDocument(ctxt);
11767
11768 if (ctxt->wellFormed)
11769 ret = 0;
11770 else {
11771 if (ctxt->errNo != 0)
11772 ret = ctxt->errNo;
11773 else
11774 ret = -1;
11775 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011776 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011777 xmlFreeParserCtxt(ctxt);
11778
11779 return ret;
11780}
Daniel Veillard81273902003-09-30 00:43:48 +000011781#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011782
11783/**
11784 * xmlCreateDocParserCtxt:
11785 * @cur: a pointer to an array of xmlChar
11786 *
11787 * Creates a parser context for an XML in-memory document.
11788 *
11789 * Returns the new parser context or NULL
11790 */
11791xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011792xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011793 int len;
11794
11795 if (cur == NULL)
11796 return(NULL);
11797 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011798 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011799}
11800
Daniel Veillard81273902003-09-30 00:43:48 +000011801#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011802/**
11803 * xmlSAXParseDoc:
11804 * @sax: the SAX handler block
11805 * @cur: a pointer to an array of xmlChar
11806 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11807 * documents
11808 *
11809 * parse an XML in-memory document and build a tree.
11810 * It use the given SAX function block to handle the parsing callback.
11811 * If sax is NULL, fallback to the default DOM tree building routines.
11812 *
11813 * Returns the resulting document tree
11814 */
11815
11816xmlDocPtr
11817xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11818 xmlDocPtr ret;
11819 xmlParserCtxtPtr ctxt;
11820
11821 if (cur == NULL) return(NULL);
11822
11823
11824 ctxt = xmlCreateDocParserCtxt(cur);
11825 if (ctxt == NULL) return(NULL);
11826 if (sax != NULL) {
11827 ctxt->sax = sax;
11828 ctxt->userData = NULL;
11829 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011830 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011831
11832 xmlParseDocument(ctxt);
11833 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11834 else {
11835 ret = NULL;
11836 xmlFreeDoc(ctxt->myDoc);
11837 ctxt->myDoc = NULL;
11838 }
11839 if (sax != NULL)
11840 ctxt->sax = NULL;
11841 xmlFreeParserCtxt(ctxt);
11842
11843 return(ret);
11844}
11845
11846/**
11847 * xmlParseDoc:
11848 * @cur: a pointer to an array of xmlChar
11849 *
11850 * parse an XML in-memory document and build a tree.
11851 *
11852 * Returns the resulting document tree
11853 */
11854
11855xmlDocPtr
11856xmlParseDoc(xmlChar *cur) {
11857 return(xmlSAXParseDoc(NULL, cur, 0));
11858}
Daniel Veillard81273902003-09-30 00:43:48 +000011859#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011860
Daniel Veillard81273902003-09-30 00:43:48 +000011861#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011862/************************************************************************
11863 * *
11864 * Specific function to keep track of entities references *
11865 * and used by the XSLT debugger *
11866 * *
11867 ************************************************************************/
11868
11869static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11870
11871/**
11872 * xmlAddEntityReference:
11873 * @ent : A valid entity
11874 * @firstNode : A valid first node for children of entity
11875 * @lastNode : A valid last node of children entity
11876 *
11877 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11878 */
11879static void
11880xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11881 xmlNodePtr lastNode)
11882{
11883 if (xmlEntityRefFunc != NULL) {
11884 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11885 }
11886}
11887
11888
11889/**
11890 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011891 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011892 *
11893 * Set the function to call call back when a xml reference has been made
11894 */
11895void
11896xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11897{
11898 xmlEntityRefFunc = func;
11899}
Daniel Veillard81273902003-09-30 00:43:48 +000011900#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011901
11902/************************************************************************
11903 * *
11904 * Miscellaneous *
11905 * *
11906 ************************************************************************/
11907
11908#ifdef LIBXML_XPATH_ENABLED
11909#include <libxml/xpath.h>
11910#endif
11911
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011912extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011913static int xmlParserInitialized = 0;
11914
11915/**
11916 * xmlInitParser:
11917 *
11918 * Initialization function for the XML parser.
11919 * This is not reentrant. Call once before processing in case of
11920 * use in multithreaded programs.
11921 */
11922
11923void
11924xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011925 if (xmlParserInitialized != 0)
11926 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011927
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011928 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11929 (xmlGenericError == NULL))
11930 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011931 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011932 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011933 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011934 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011935 xmlDefaultSAXHandlerInit();
11936 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011937#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011938 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011939#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011940#ifdef LIBXML_HTML_ENABLED
11941 htmlInitAutoClose();
11942 htmlDefaultSAXHandlerInit();
11943#endif
11944#ifdef LIBXML_XPATH_ENABLED
11945 xmlXPathInit();
11946#endif
11947 xmlParserInitialized = 1;
11948}
11949
11950/**
11951 * xmlCleanupParser:
11952 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011953 * Cleanup function for the XML library. It tries to reclaim all
11954 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011955 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011956 * function should not prevent reusing the library but one should
11957 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011958 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011959 */
11960
11961void
11962xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011963 if (!xmlParserInitialized)
11964 return;
11965
Owen Taylor3473f882001-02-23 17:55:21 +000011966 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011967#ifdef LIBXML_CATALOG_ENABLED
11968 xmlCatalogCleanup();
11969#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011970 xmlCleanupInputCallbacks();
11971#ifdef LIBXML_OUTPUT_ENABLED
11972 xmlCleanupOutputCallbacks();
11973#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011974 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011975 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011976 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011977 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011978 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011979}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011980
11981/************************************************************************
11982 * *
11983 * New set (2.6.0) of simpler and more flexible APIs *
11984 * *
11985 ************************************************************************/
11986
11987/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011988 * DICT_FREE:
11989 * @str: a string
11990 *
11991 * Free a string if it is not owned by the "dict" dictionnary in the
11992 * current scope
11993 */
11994#define DICT_FREE(str) \
11995 if ((str) && ((!dict) || \
11996 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11997 xmlFree((char *)(str));
11998
11999/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012000 * xmlCtxtReset:
12001 * @ctxt: an XML parser context
12002 *
12003 * Reset a parser context
12004 */
12005void
12006xmlCtxtReset(xmlParserCtxtPtr ctxt)
12007{
12008 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012009 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012010
12011 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12012 xmlFreeInputStream(input);
12013 }
12014 ctxt->inputNr = 0;
12015 ctxt->input = NULL;
12016
12017 ctxt->spaceNr = 0;
12018 ctxt->spaceTab[0] = -1;
12019 ctxt->space = &ctxt->spaceTab[0];
12020
12021
12022 ctxt->nodeNr = 0;
12023 ctxt->node = NULL;
12024
12025 ctxt->nameNr = 0;
12026 ctxt->name = NULL;
12027
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012028 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012029 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012030 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012031 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012032 DICT_FREE(ctxt->directory);
12033 ctxt->directory = NULL;
12034 DICT_FREE(ctxt->extSubURI);
12035 ctxt->extSubURI = NULL;
12036 DICT_FREE(ctxt->extSubSystem);
12037 ctxt->extSubSystem = NULL;
12038 if (ctxt->myDoc != NULL)
12039 xmlFreeDoc(ctxt->myDoc);
12040 ctxt->myDoc = NULL;
12041
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012042 ctxt->standalone = -1;
12043 ctxt->hasExternalSubset = 0;
12044 ctxt->hasPErefs = 0;
12045 ctxt->html = 0;
12046 ctxt->external = 0;
12047 ctxt->instate = XML_PARSER_START;
12048 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012049
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012050 ctxt->wellFormed = 1;
12051 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012052 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012053 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012054#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012055 ctxt->vctxt.userData = ctxt;
12056 ctxt->vctxt.error = xmlParserValidityError;
12057 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012058#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012059 ctxt->record_info = 0;
12060 ctxt->nbChars = 0;
12061 ctxt->checkIndex = 0;
12062 ctxt->inSubset = 0;
12063 ctxt->errNo = XML_ERR_OK;
12064 ctxt->depth = 0;
12065 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12066 ctxt->catalogs = NULL;
12067 xmlInitNodeInfoSeq(&ctxt->node_seq);
12068
12069 if (ctxt->attsDefault != NULL) {
12070 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12071 ctxt->attsDefault = NULL;
12072 }
12073 if (ctxt->attsSpecial != NULL) {
12074 xmlHashFree(ctxt->attsSpecial, NULL);
12075 ctxt->attsSpecial = NULL;
12076 }
12077
Daniel Veillard4432df22003-09-28 18:58:27 +000012078#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012079 if (ctxt->catalogs != NULL)
12080 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012081#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012082 if (ctxt->lastError.code != XML_ERR_OK)
12083 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012084}
12085
12086/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012087 * xmlCtxtResetPush:
12088 * @ctxt: an XML parser context
12089 * @chunk: a pointer to an array of chars
12090 * @size: number of chars in the array
12091 * @filename: an optional file name or URI
12092 * @encoding: the document encoding, or NULL
12093 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012094 * Reset a push parser context
12095 *
12096 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012097 */
12098int
12099xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12100 int size, const char *filename, const char *encoding)
12101{
12102 xmlParserInputPtr inputStream;
12103 xmlParserInputBufferPtr buf;
12104 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12105
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012106 if (ctxt == NULL)
12107 return(1);
12108
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012109 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12110 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12111
12112 buf = xmlAllocParserInputBuffer(enc);
12113 if (buf == NULL)
12114 return(1);
12115
12116 if (ctxt == NULL) {
12117 xmlFreeParserInputBuffer(buf);
12118 return(1);
12119 }
12120
12121 xmlCtxtReset(ctxt);
12122
12123 if (ctxt->pushTab == NULL) {
12124 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12125 sizeof(xmlChar *));
12126 if (ctxt->pushTab == NULL) {
12127 xmlErrMemory(ctxt, NULL);
12128 xmlFreeParserInputBuffer(buf);
12129 return(1);
12130 }
12131 }
12132
12133 if (filename == NULL) {
12134 ctxt->directory = NULL;
12135 } else {
12136 ctxt->directory = xmlParserGetDirectory(filename);
12137 }
12138
12139 inputStream = xmlNewInputStream(ctxt);
12140 if (inputStream == NULL) {
12141 xmlFreeParserInputBuffer(buf);
12142 return(1);
12143 }
12144
12145 if (filename == NULL)
12146 inputStream->filename = NULL;
12147 else
12148 inputStream->filename = (char *)
12149 xmlCanonicPath((const xmlChar *) filename);
12150 inputStream->buf = buf;
12151 inputStream->base = inputStream->buf->buffer->content;
12152 inputStream->cur = inputStream->buf->buffer->content;
12153 inputStream->end =
12154 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12155
12156 inputPush(ctxt, inputStream);
12157
12158 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12159 (ctxt->input->buf != NULL)) {
12160 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12161 int cur = ctxt->input->cur - ctxt->input->base;
12162
12163 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12164
12165 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12166 ctxt->input->cur = ctxt->input->base + cur;
12167 ctxt->input->end =
12168 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12169 use];
12170#ifdef DEBUG_PUSH
12171 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12172#endif
12173 }
12174
12175 if (encoding != NULL) {
12176 xmlCharEncodingHandlerPtr hdlr;
12177
12178 hdlr = xmlFindCharEncodingHandler(encoding);
12179 if (hdlr != NULL) {
12180 xmlSwitchToEncoding(ctxt, hdlr);
12181 } else {
12182 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12183 "Unsupported encoding %s\n", BAD_CAST encoding);
12184 }
12185 } else if (enc != XML_CHAR_ENCODING_NONE) {
12186 xmlSwitchEncoding(ctxt, enc);
12187 }
12188
12189 return(0);
12190}
12191
12192/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012193 * xmlCtxtUseOptions:
12194 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012195 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012196 *
12197 * Applies the options to the parser context
12198 *
12199 * Returns 0 in case of success, the set of unknown or unimplemented options
12200 * in case of error.
12201 */
12202int
12203xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12204{
12205 if (options & XML_PARSE_RECOVER) {
12206 ctxt->recovery = 1;
12207 options -= XML_PARSE_RECOVER;
12208 } else
12209 ctxt->recovery = 0;
12210 if (options & XML_PARSE_DTDLOAD) {
12211 ctxt->loadsubset = XML_DETECT_IDS;
12212 options -= XML_PARSE_DTDLOAD;
12213 } else
12214 ctxt->loadsubset = 0;
12215 if (options & XML_PARSE_DTDATTR) {
12216 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12217 options -= XML_PARSE_DTDATTR;
12218 }
12219 if (options & XML_PARSE_NOENT) {
12220 ctxt->replaceEntities = 1;
12221 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12222 options -= XML_PARSE_NOENT;
12223 } else
12224 ctxt->replaceEntities = 0;
12225 if (options & XML_PARSE_NOWARNING) {
12226 ctxt->sax->warning = NULL;
12227 options -= XML_PARSE_NOWARNING;
12228 }
12229 if (options & XML_PARSE_NOERROR) {
12230 ctxt->sax->error = NULL;
12231 ctxt->sax->fatalError = NULL;
12232 options -= XML_PARSE_NOERROR;
12233 }
12234 if (options & XML_PARSE_PEDANTIC) {
12235 ctxt->pedantic = 1;
12236 options -= XML_PARSE_PEDANTIC;
12237 } else
12238 ctxt->pedantic = 0;
12239 if (options & XML_PARSE_NOBLANKS) {
12240 ctxt->keepBlanks = 0;
12241 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12242 options -= XML_PARSE_NOBLANKS;
12243 } else
12244 ctxt->keepBlanks = 1;
12245 if (options & XML_PARSE_DTDVALID) {
12246 ctxt->validate = 1;
12247 if (options & XML_PARSE_NOWARNING)
12248 ctxt->vctxt.warning = NULL;
12249 if (options & XML_PARSE_NOERROR)
12250 ctxt->vctxt.error = NULL;
12251 options -= XML_PARSE_DTDVALID;
12252 } else
12253 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012254#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012255 if (options & XML_PARSE_SAX1) {
12256 ctxt->sax->startElement = xmlSAX2StartElement;
12257 ctxt->sax->endElement = xmlSAX2EndElement;
12258 ctxt->sax->startElementNs = NULL;
12259 ctxt->sax->endElementNs = NULL;
12260 ctxt->sax->initialized = 1;
12261 options -= XML_PARSE_SAX1;
12262 }
Daniel Veillard81273902003-09-30 00:43:48 +000012263#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012264 if (options & XML_PARSE_NODICT) {
12265 ctxt->dictNames = 0;
12266 options -= XML_PARSE_NODICT;
12267 } else {
12268 ctxt->dictNames = 1;
12269 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012270 if (options & XML_PARSE_NOCDATA) {
12271 ctxt->sax->cdataBlock = NULL;
12272 options -= XML_PARSE_NOCDATA;
12273 }
12274 if (options & XML_PARSE_NSCLEAN) {
12275 ctxt->options |= XML_PARSE_NSCLEAN;
12276 options -= XML_PARSE_NSCLEAN;
12277 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012278 if (options & XML_PARSE_NONET) {
12279 ctxt->options |= XML_PARSE_NONET;
12280 options -= XML_PARSE_NONET;
12281 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012282 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012283 return (options);
12284}
12285
12286/**
12287 * xmlDoRead:
12288 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012289 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012290 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012291 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012292 * @reuse: keep the context for reuse
12293 *
12294 * Common front-end for the xmlRead functions
12295 *
12296 * Returns the resulting document tree or NULL
12297 */
12298static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012299xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12300 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012301{
12302 xmlDocPtr ret;
12303
12304 xmlCtxtUseOptions(ctxt, options);
12305 if (encoding != NULL) {
12306 xmlCharEncodingHandlerPtr hdlr;
12307
12308 hdlr = xmlFindCharEncodingHandler(encoding);
12309 if (hdlr != NULL)
12310 xmlSwitchToEncoding(ctxt, hdlr);
12311 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012312 if ((URL != NULL) && (ctxt->input != NULL) &&
12313 (ctxt->input->filename == NULL))
12314 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012315 xmlParseDocument(ctxt);
12316 if ((ctxt->wellFormed) || ctxt->recovery)
12317 ret = ctxt->myDoc;
12318 else {
12319 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012320 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012321 xmlFreeDoc(ctxt->myDoc);
12322 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012323 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012324 ctxt->myDoc = NULL;
12325 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012326 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012327 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012328
12329 return (ret);
12330}
12331
12332/**
12333 * xmlReadDoc:
12334 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012335 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012336 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012337 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012338 *
12339 * parse an XML in-memory document and build a tree.
12340 *
12341 * Returns the resulting document tree
12342 */
12343xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012344xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345{
12346 xmlParserCtxtPtr ctxt;
12347
12348 if (cur == NULL)
12349 return (NULL);
12350
12351 ctxt = xmlCreateDocParserCtxt(cur);
12352 if (ctxt == NULL)
12353 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012354 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012355}
12356
12357/**
12358 * xmlReadFile:
12359 * @filename: a file or URL
12360 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012361 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012362 *
12363 * parse an XML file from the filesystem or the network.
12364 *
12365 * Returns the resulting document tree
12366 */
12367xmlDocPtr
12368xmlReadFile(const char *filename, const char *encoding, int options)
12369{
12370 xmlParserCtxtPtr ctxt;
12371
Daniel Veillard61b93382003-11-03 14:28:31 +000012372 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012373 if (ctxt == NULL)
12374 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012375 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012376}
12377
12378/**
12379 * xmlReadMemory:
12380 * @buffer: a pointer to a char array
12381 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012382 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012383 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012384 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012385 *
12386 * parse an XML in-memory document and build a tree.
12387 *
12388 * Returns the resulting document tree
12389 */
12390xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012391xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012392{
12393 xmlParserCtxtPtr ctxt;
12394
12395 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12396 if (ctxt == NULL)
12397 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012398 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012399}
12400
12401/**
12402 * xmlReadFd:
12403 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012404 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012405 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012406 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012407 *
12408 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012409 * NOTE that the file descriptor will not be closed when the
12410 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012411 *
12412 * Returns the resulting document tree
12413 */
12414xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012415xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012416{
12417 xmlParserCtxtPtr ctxt;
12418 xmlParserInputBufferPtr input;
12419 xmlParserInputPtr stream;
12420
12421 if (fd < 0)
12422 return (NULL);
12423
12424 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12425 if (input == NULL)
12426 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012427 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012428 ctxt = xmlNewParserCtxt();
12429 if (ctxt == NULL) {
12430 xmlFreeParserInputBuffer(input);
12431 return (NULL);
12432 }
12433 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12434 if (stream == NULL) {
12435 xmlFreeParserInputBuffer(input);
12436 xmlFreeParserCtxt(ctxt);
12437 return (NULL);
12438 }
12439 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012440 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012441}
12442
12443/**
12444 * xmlReadIO:
12445 * @ioread: an I/O read function
12446 * @ioclose: an I/O close function
12447 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012448 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012449 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012450 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012451 *
12452 * parse an XML document from I/O functions and source and build a tree.
12453 *
12454 * Returns the resulting document tree
12455 */
12456xmlDocPtr
12457xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012458 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012459{
12460 xmlParserCtxtPtr ctxt;
12461 xmlParserInputBufferPtr input;
12462 xmlParserInputPtr stream;
12463
12464 if (ioread == NULL)
12465 return (NULL);
12466
12467 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12468 XML_CHAR_ENCODING_NONE);
12469 if (input == NULL)
12470 return (NULL);
12471 ctxt = xmlNewParserCtxt();
12472 if (ctxt == NULL) {
12473 xmlFreeParserInputBuffer(input);
12474 return (NULL);
12475 }
12476 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12477 if (stream == NULL) {
12478 xmlFreeParserInputBuffer(input);
12479 xmlFreeParserCtxt(ctxt);
12480 return (NULL);
12481 }
12482 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012483 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012484}
12485
12486/**
12487 * xmlCtxtReadDoc:
12488 * @ctxt: an XML parser context
12489 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012490 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012491 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012492 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012493 *
12494 * parse an XML in-memory document and build a tree.
12495 * This reuses the existing @ctxt parser context
12496 *
12497 * Returns the resulting document tree
12498 */
12499xmlDocPtr
12500xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012501 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012502{
12503 xmlParserInputPtr stream;
12504
12505 if (cur == NULL)
12506 return (NULL);
12507 if (ctxt == NULL)
12508 return (NULL);
12509
12510 xmlCtxtReset(ctxt);
12511
12512 stream = xmlNewStringInputStream(ctxt, cur);
12513 if (stream == NULL) {
12514 return (NULL);
12515 }
12516 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012517 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012518}
12519
12520/**
12521 * xmlCtxtReadFile:
12522 * @ctxt: an XML parser context
12523 * @filename: a file or URL
12524 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012525 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012526 *
12527 * parse an XML file from the filesystem or the network.
12528 * This reuses the existing @ctxt parser context
12529 *
12530 * Returns the resulting document tree
12531 */
12532xmlDocPtr
12533xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12534 const char *encoding, int options)
12535{
12536 xmlParserInputPtr stream;
12537
12538 if (filename == NULL)
12539 return (NULL);
12540 if (ctxt == NULL)
12541 return (NULL);
12542
12543 xmlCtxtReset(ctxt);
12544
12545 stream = xmlNewInputFromFile(ctxt, filename);
12546 if (stream == NULL) {
12547 return (NULL);
12548 }
12549 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012550 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012551}
12552
12553/**
12554 * xmlCtxtReadMemory:
12555 * @ctxt: an XML parser context
12556 * @buffer: a pointer to a char array
12557 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012558 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012559 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012560 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561 *
12562 * parse an XML in-memory document and build a tree.
12563 * This reuses the existing @ctxt parser context
12564 *
12565 * Returns the resulting document tree
12566 */
12567xmlDocPtr
12568xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012569 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012570{
12571 xmlParserInputBufferPtr input;
12572 xmlParserInputPtr stream;
12573
12574 if (ctxt == NULL)
12575 return (NULL);
12576 if (buffer == NULL)
12577 return (NULL);
12578
12579 xmlCtxtReset(ctxt);
12580
12581 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12582 if (input == NULL) {
12583 return(NULL);
12584 }
12585
12586 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12587 if (stream == NULL) {
12588 xmlFreeParserInputBuffer(input);
12589 return(NULL);
12590 }
12591
12592 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012593 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012594}
12595
12596/**
12597 * xmlCtxtReadFd:
12598 * @ctxt: an XML parser context
12599 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012600 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012601 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012602 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012603 *
12604 * parse an XML from a file descriptor and build a tree.
12605 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012606 * NOTE that the file descriptor will not be closed when the
12607 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012608 *
12609 * Returns the resulting document tree
12610 */
12611xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012612xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12613 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012614{
12615 xmlParserInputBufferPtr input;
12616 xmlParserInputPtr stream;
12617
12618 if (fd < 0)
12619 return (NULL);
12620 if (ctxt == NULL)
12621 return (NULL);
12622
12623 xmlCtxtReset(ctxt);
12624
12625
12626 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12627 if (input == NULL)
12628 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012629 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012630 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12631 if (stream == NULL) {
12632 xmlFreeParserInputBuffer(input);
12633 return (NULL);
12634 }
12635 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012636 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012637}
12638
12639/**
12640 * xmlCtxtReadIO:
12641 * @ctxt: an XML parser context
12642 * @ioread: an I/O read function
12643 * @ioclose: an I/O close function
12644 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012645 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012646 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012647 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012648 *
12649 * parse an XML document from I/O functions and source and build a tree.
12650 * This reuses the existing @ctxt parser context
12651 *
12652 * Returns the resulting document tree
12653 */
12654xmlDocPtr
12655xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12656 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012657 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012658 const char *encoding, int options)
12659{
12660 xmlParserInputBufferPtr input;
12661 xmlParserInputPtr stream;
12662
12663 if (ioread == NULL)
12664 return (NULL);
12665 if (ctxt == NULL)
12666 return (NULL);
12667
12668 xmlCtxtReset(ctxt);
12669
12670 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12671 XML_CHAR_ENCODING_NONE);
12672 if (input == NULL)
12673 return (NULL);
12674 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12675 if (stream == NULL) {
12676 xmlFreeParserInputBuffer(input);
12677 return (NULL);
12678 }
12679 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012680 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012681}