blob: 9d6db090b1cbc0d723d36558b919c853b7a7af96 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
144 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000145 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000146 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000147 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
148 (const char *) localname, NULL, NULL, 0, 0,
149 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000150 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000151 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000152 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
153 (const char *) prefix, (const char *) localname,
154 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
155 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000156 ctxt->wellFormed = 0;
157 if (ctxt->recovery == 0)
158 ctxt->disableSAX = 1;
159}
160
161/**
162 * xmlFatalErr:
163 * @ctxt: an XML parser context
164 * @error: the error number
165 * @extra: extra information string
166 *
167 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
168 */
169static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000170xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171{
172 const char *errmsg;
173
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174 switch (error) {
175 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176 errmsg = "CharRef: invalid hexadecimal value\n";
177 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000178 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179 errmsg = "CharRef: invalid decimal value\n";
180 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000181 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "internal error";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "PEReference at end of document\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "PEReference in prolog\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference in epilog\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference: no name\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference: expecting ';'\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "Detected an entity reference loop\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "EntityValue: \" or ' expected\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReferences forbidden in internal subset\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "AttValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Unescaped '<' not allowed in attributes values\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "SystemLiteral \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unfinished System or Public ID \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Sequence ']]>' not allowed in content\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "PUBLIC, the Public Identifier is missing\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Comment must not contain '--' (double-hyphen)\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "xmlParsePI : no target name\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Invalid PI name\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "NOTATION: Name expected here\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "'>' required to close NOTATION declaration\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Entity value required\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Fragment not allowed";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'(' required to start ATTLIST enumeration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NmToken expected in ATTLIST enumeration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "')' required to finish ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "ContentDecl : Name or '(' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg =
279 "PEReference: forbidden within markup decl in internal subset\n";
280 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000282 errmsg = "expected '>'\n";
283 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000284 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "XML conditional section '[' expected\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "Content error in the external subset\n";
289 break;
290 case XML_ERR_CONDSEC_INVALID_KEYWORD:
291 errmsg =
292 "conditional section INCLUDE or IGNORE keyword expected\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "XML conditional section not closed\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "Text declaration '<?xml' required\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "parsing XML declaration: '?>' expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "external parsed entities cannot be standalone\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "EntityRef: expecting ';'\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "DOCTYPE improperly terminated\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EndTag: '</' not found\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "expected '='\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "String not closed expecting \" or '\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "String not started expecting ' or \"\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "Invalid XML encoding name\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "standalone accepts only 'yes' or 'no'\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Document is empty\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Extra content at the end of the document\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "chunk is not well balanced\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "extra content at the end of well balanced chunk\n";
341 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000342 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Malformed declaration expecting version\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 case:
347 errmsg = "\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 default:
351 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 }
353 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000354 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
356 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 ctxt->wellFormed = 0;
358 if (ctxt->recovery == 0)
359 ctxt->disableSAX = 1;
360}
361
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000362/**
363 * xmlFatalErrMsg:
364 * @ctxt: an XML parser context
365 * @error: the error number
366 * @msg: the error message
367 *
368 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
369 */
370static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
372 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000374 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000375 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377 ctxt->wellFormed = 0;
378 if (ctxt->recovery == 0)
379 ctxt->disableSAX = 1;
380}
381
382/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000383 * xmlWarningMsg:
384 * @ctxt: an XML parser context
385 * @error: the error number
386 * @msg: the error message
387 * @str1: extra data
388 * @str2: extra data
389 *
390 * Handle a warning.
391 */
392static void
393xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394 const char *msg, const xmlChar *str1, const xmlChar *str2)
395{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000396 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000397
Daniel Veillard24eb9782003-10-04 21:08:09 +0000398 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000399 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000400 schannel = ctxt->sax->serror;
401 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000402 (ctxt->sax) ? ctxt->sax->warning : NULL,
403 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 ctxt, NULL, XML_FROM_PARSER, error,
405 XML_ERR_WARNING, NULL, 0,
406 (const char *) str1, (const char *) str2, NULL, 0, 0,
407 msg, (const char *) str1, (const char *) str2);
408}
409
410/**
411 * xmlValidityError:
412 * @ctxt: an XML parser context
413 * @error: the error number
414 * @msg: the error message
415 * @str1: extra data
416 *
417 * Handle a warning.
418 */
419static void
420xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
421 const char *msg, const xmlChar *str1)
422{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000423 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000424 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000425 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000426 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000427 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000428 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000429 ctxt, NULL, XML_FROM_DTD, error,
430 XML_ERR_ERROR, NULL, 0, (const char *) str1,
431 NULL, NULL, 0, 0,
432 msg, (const char *) str1);
433 ctxt->valid = 0;
434}
435
436/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000437 * xmlFatalErrMsgInt:
438 * @ctxt: an XML parser context
439 * @error: the error number
440 * @msg: the error message
441 * @val: an integer value
442 *
443 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
444 */
445static void
446xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000448{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000449 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000450 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
452 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 ctxt->wellFormed = 0;
454 if (ctxt->recovery == 0)
455 ctxt->disableSAX = 1;
456}
457
458/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000459 * xmlFatalErrMsgStrIntStr:
460 * @ctxt: an XML parser context
461 * @error: the error number
462 * @msg: the error message
463 * @str1: an string info
464 * @val: an integer value
465 * @str2: an string info
466 *
467 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
468 */
469static void
470xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
471 const char *msg, const xmlChar *str1, int val,
472 const xmlChar *str2)
473{
474 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000475 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000476 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
477 NULL, 0, (const char *) str1, (const char *) str2,
478 NULL, val, 0, msg, str1, val, str2);
479 ctxt->wellFormed = 0;
480 if (ctxt->recovery == 0)
481 ctxt->disableSAX = 1;
482}
483
484/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000485 * xmlFatalErrMsgStr:
486 * @ctxt: an XML parser context
487 * @error: the error number
488 * @msg: the error message
489 * @val: a string value
490 *
491 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
492 */
493static void
494xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000496{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000497 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000498 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 XML_FROM_PARSER, error, XML_ERR_FATAL,
500 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
501 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000502 ctxt->wellFormed = 0;
503 if (ctxt->recovery == 0)
504 ctxt->disableSAX = 1;
505}
506
507/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000508 * xmlErrMsgStr:
509 * @ctxt: an XML parser context
510 * @error: the error number
511 * @msg: the error message
512 * @val: a string value
513 *
514 * Handle a non fatal parser error
515 */
516static void
517xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
518 const char *msg, const xmlChar * val)
519{
520 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000521 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000522 XML_FROM_PARSER, error, XML_ERR_ERROR,
523 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
524 val);
525}
526
527/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000528 * xmlNsErr:
529 * @ctxt: an XML parser context
530 * @error: the error number
531 * @msg: the message
532 * @info1: extra information string
533 * @info2: extra information string
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000540 const xmlChar * info1, const xmlChar * info2,
541 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000542{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000543 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000544 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 XML_ERR_ERROR, NULL, 0, (const char *) info1,
546 (const char *) info2, (const char *) info3, 0, 0, msg,
547 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000548 ctxt->nsWellFormed = 0;
549}
550
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000551/************************************************************************
552 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000553 * SAX2 defaulted attributes handling *
554 * *
555 ************************************************************************/
556
557/**
558 * xmlDetectSAX2:
559 * @ctxt: an XML parser context
560 *
561 * Do the SAX2 detection and specific intialization
562 */
563static void
564xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
565 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000566#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000567 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
568 ((ctxt->sax->startElementNs != NULL) ||
569 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000570#else
571 ctxt->sax2 = 1;
572#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000573
574 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
575 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
576 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
577}
578
Daniel Veillarde57ec792003-09-10 10:50:59 +0000579typedef struct _xmlDefAttrs xmlDefAttrs;
580typedef xmlDefAttrs *xmlDefAttrsPtr;
581struct _xmlDefAttrs {
582 int nbAttrs; /* number of defaulted attributes on that element */
583 int maxAttrs; /* the size of the array */
584 const xmlChar *values[4]; /* array of localname/prefix/values */
585};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586
587/**
588 * xmlAddDefAttrs:
589 * @ctxt: an XML parser context
590 * @fullname: the element fullname
591 * @fullattr: the attribute fullname
592 * @value: the attribute value
593 *
594 * Add a defaulted attribute for an element
595 */
596static void
597xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
598 const xmlChar *fullname,
599 const xmlChar *fullattr,
600 const xmlChar *value) {
601 xmlDefAttrsPtr defaults;
602 int len;
603 const xmlChar *name;
604 const xmlChar *prefix;
605
606 if (ctxt->attsDefault == NULL) {
607 ctxt->attsDefault = xmlHashCreate(10);
608 if (ctxt->attsDefault == NULL)
609 goto mem_error;
610 }
611
612 /*
613 * plit the element name into prefix:localname , the string found
614 * are within the DTD and hen not associated to namespace names.
615 */
616 name = xmlSplitQName3(fullname, &len);
617 if (name == NULL) {
618 name = xmlDictLookup(ctxt->dict, fullname, -1);
619 prefix = NULL;
620 } else {
621 name = xmlDictLookup(ctxt->dict, name, -1);
622 prefix = xmlDictLookup(ctxt->dict, fullname, len);
623 }
624
625 /*
626 * make sure there is some storage
627 */
628 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
629 if (defaults == NULL) {
630 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
631 12 * sizeof(const xmlChar *));
632 if (defaults == NULL)
633 goto mem_error;
634 defaults->maxAttrs = 4;
635 defaults->nbAttrs = 0;
636 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
637 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
638 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
639 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
640 if (defaults == NULL)
641 goto mem_error;
642 defaults->maxAttrs *= 2;
643 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
644 }
645
646 /*
647 * plit the element name into prefix:localname , the string found
648 * are within the DTD and hen not associated to namespace names.
649 */
650 name = xmlSplitQName3(fullattr, &len);
651 if (name == NULL) {
652 name = xmlDictLookup(ctxt->dict, fullattr, -1);
653 prefix = NULL;
654 } else {
655 name = xmlDictLookup(ctxt->dict, name, -1);
656 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
657 }
658
659 defaults->values[4 * defaults->nbAttrs] = name;
660 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
661 /* intern the string and precompute the end */
662 len = xmlStrlen(value);
663 value = xmlDictLookup(ctxt->dict, value, len);
664 defaults->values[4 * defaults->nbAttrs + 2] = value;
665 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
666 defaults->nbAttrs++;
667
668 return;
669
670mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000671 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000672 return;
673}
674
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000675/**
676 * xmlAddSpecialAttr:
677 * @ctxt: an XML parser context
678 * @fullname: the element fullname
679 * @fullattr: the attribute fullname
680 * @type: the attribute type
681 *
682 * Register that this attribute is not CDATA
683 */
684static void
685xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
686 const xmlChar *fullname,
687 const xmlChar *fullattr,
688 int type)
689{
690 if (ctxt->attsSpecial == NULL) {
691 ctxt->attsSpecial = xmlHashCreate(10);
692 if (ctxt->attsSpecial == NULL)
693 goto mem_error;
694 }
695
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000696 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
697 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000698 return;
699
700mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000701 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000702 return;
703}
704
Daniel Veillard4432df22003-09-28 18:58:27 +0000705/**
706 * xmlCheckLanguageID:
707 * @lang: pointer to the string value
708 *
709 * Checks that the value conforms to the LanguageID production:
710 *
711 * NOTE: this is somewhat deprecated, those productions were removed from
712 * the XML Second edition.
713 *
714 * [33] LanguageID ::= Langcode ('-' Subcode)*
715 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
716 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
717 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
718 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
719 * [38] Subcode ::= ([a-z] | [A-Z])+
720 *
721 * Returns 1 if correct 0 otherwise
722 **/
723int
724xmlCheckLanguageID(const xmlChar * lang)
725{
726 const xmlChar *cur = lang;
727
728 if (cur == NULL)
729 return (0);
730 if (((cur[0] == 'i') && (cur[1] == '-')) ||
731 ((cur[0] == 'I') && (cur[1] == '-'))) {
732 /*
733 * IANA code
734 */
735 cur += 2;
736 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
737 ((cur[0] >= 'a') && (cur[0] <= 'z')))
738 cur++;
739 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
740 ((cur[0] == 'X') && (cur[1] == '-'))) {
741 /*
742 * User code
743 */
744 cur += 2;
745 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
746 ((cur[0] >= 'a') && (cur[0] <= 'z')))
747 cur++;
748 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
749 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
750 /*
751 * ISO639
752 */
753 cur++;
754 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
755 ((cur[0] >= 'a') && (cur[0] <= 'z')))
756 cur++;
757 else
758 return (0);
759 } else
760 return (0);
761 while (cur[0] != 0) { /* non input consuming */
762 if (cur[0] != '-')
763 return (0);
764 cur++;
765 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
766 ((cur[0] >= 'a') && (cur[0] <= 'z')))
767 cur++;
768 else
769 return (0);
770 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
771 ((cur[0] >= 'a') && (cur[0] <= 'z')))
772 cur++;
773 }
774 return (1);
775}
776
Owen Taylor3473f882001-02-23 17:55:21 +0000777/************************************************************************
778 * *
779 * Parser stacks related functions and macros *
780 * *
781 ************************************************************************/
782
783xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
784 const xmlChar ** str);
785
Daniel Veillard0fb18932003-09-07 09:14:37 +0000786#ifdef SAX2
787/**
788 * nsPush:
789 * @ctxt: an XML parser context
790 * @prefix: the namespace prefix or NULL
791 * @URL: the namespace name
792 *
793 * Pushes a new parser namespace on top of the ns stack
794 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000795 * Returns -1 in case of error, -2 if the namespace should be discarded
796 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000797 */
798static int
799nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
800{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000801 if (ctxt->options & XML_PARSE_NSCLEAN) {
802 int i;
803 for (i = 0;i < ctxt->nsNr;i += 2) {
804 if (ctxt->nsTab[i] == prefix) {
805 /* in scope */
806 if (ctxt->nsTab[i + 1] == URL)
807 return(-2);
808 /* out of scope keep it */
809 break;
810 }
811 }
812 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000813 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
814 ctxt->nsMax = 10;
815 ctxt->nsNr = 0;
816 ctxt->nsTab = (const xmlChar **)
817 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
818 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000819 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000820 ctxt->nsMax = 0;
821 return (-1);
822 }
823 } else if (ctxt->nsNr >= ctxt->nsMax) {
824 ctxt->nsMax *= 2;
825 ctxt->nsTab = (const xmlChar **)
826 xmlRealloc(ctxt->nsTab,
827 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
828 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000830 ctxt->nsMax /= 2;
831 return (-1);
832 }
833 }
834 ctxt->nsTab[ctxt->nsNr++] = prefix;
835 ctxt->nsTab[ctxt->nsNr++] = URL;
836 return (ctxt->nsNr);
837}
838/**
839 * nsPop:
840 * @ctxt: an XML parser context
841 * @nr: the number to pop
842 *
843 * Pops the top @nr parser prefix/namespace from the ns stack
844 *
845 * Returns the number of namespaces removed
846 */
847static int
848nsPop(xmlParserCtxtPtr ctxt, int nr)
849{
850 int i;
851
852 if (ctxt->nsTab == NULL) return(0);
853 if (ctxt->nsNr < nr) {
854 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
855 nr = ctxt->nsNr;
856 }
857 if (ctxt->nsNr <= 0)
858 return (0);
859
860 for (i = 0;i < nr;i++) {
861 ctxt->nsNr--;
862 ctxt->nsTab[ctxt->nsNr] = NULL;
863 }
864 return(nr);
865}
866#endif
867
868static int
869xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
870 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000871 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000872 int maxatts;
873
874 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000876 atts = (const xmlChar **)
877 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000878 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000879 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000880 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
881 if (attallocs == NULL) goto mem_error;
882 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000883 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 } else if (nr + 5 > ctxt->maxatts) {
885 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000886 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
887 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000889 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
891 (maxatts / 5) * sizeof(int));
892 if (attallocs == NULL) goto mem_error;
893 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000894 ctxt->maxatts = maxatts;
895 }
896 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000897mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000898 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000899 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000900}
901
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000902/**
903 * inputPush:
904 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000905 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000906 *
907 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000908 *
909 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000910 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000911extern int
912inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
913{
914 if (ctxt->inputNr >= ctxt->inputMax) {
915 ctxt->inputMax *= 2;
916 ctxt->inputTab =
917 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
918 ctxt->inputMax *
919 sizeof(ctxt->inputTab[0]));
920 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000921 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000922 return (0);
923 }
924 }
925 ctxt->inputTab[ctxt->inputNr] = value;
926 ctxt->input = value;
927 return (ctxt->inputNr++);
928}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000929/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000930 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000931 * @ctxt: an XML parser context
932 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000933 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000934 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000935 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000936 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000937extern xmlParserInputPtr
938inputPop(xmlParserCtxtPtr ctxt)
939{
940 xmlParserInputPtr ret;
941
942 if (ctxt->inputNr <= 0)
943 return (0);
944 ctxt->inputNr--;
945 if (ctxt->inputNr > 0)
946 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
947 else
948 ctxt->input = NULL;
949 ret = ctxt->inputTab[ctxt->inputNr];
950 ctxt->inputTab[ctxt->inputNr] = 0;
951 return (ret);
952}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000953/**
954 * nodePush:
955 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000956 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000957 *
958 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000959 *
960 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000961 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962extern int
963nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
964{
965 if (ctxt->nodeNr >= ctxt->nodeMax) {
966 ctxt->nodeMax *= 2;
967 ctxt->nodeTab =
968 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
969 ctxt->nodeMax *
970 sizeof(ctxt->nodeTab[0]));
971 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000972 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 return (0);
974 }
975 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000976 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000977 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000978 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
979 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000980 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000981 return(0);
982 }
Daniel Veillard1c732d22002-11-30 11:22:59 +0000983 ctxt->nodeTab[ctxt->nodeNr] = value;
984 ctxt->node = value;
985 return (ctxt->nodeNr++);
986}
987/**
988 * nodePop:
989 * @ctxt: an XML parser context
990 *
991 * Pops the top element node from the node stack
992 *
993 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000994 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000995extern xmlNodePtr
996nodePop(xmlParserCtxtPtr ctxt)
997{
998 xmlNodePtr ret;
999
1000 if (ctxt->nodeNr <= 0)
1001 return (0);
1002 ctxt->nodeNr--;
1003 if (ctxt->nodeNr > 0)
1004 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1005 else
1006 ctxt->node = NULL;
1007 ret = ctxt->nodeTab[ctxt->nodeNr];
1008 ctxt->nodeTab[ctxt->nodeNr] = 0;
1009 return (ret);
1010}
1011/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001012 * nameNsPush:
1013 * @ctxt: an XML parser context
1014 * @value: the element name
1015 * @prefix: the element prefix
1016 * @URI: the element namespace name
1017 *
1018 * Pushes a new element name/prefix/URL on top of the name stack
1019 *
1020 * Returns -1 in case of error, the index in the stack otherwise
1021 */
1022static int
1023nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1024 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1025{
1026 if (ctxt->nameNr >= ctxt->nameMax) {
1027 const xmlChar * *tmp;
1028 void **tmp2;
1029 ctxt->nameMax *= 2;
1030 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1031 ctxt->nameMax *
1032 sizeof(ctxt->nameTab[0]));
1033 if (tmp == NULL) {
1034 ctxt->nameMax /= 2;
1035 goto mem_error;
1036 }
1037 ctxt->nameTab = tmp;
1038 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1039 ctxt->nameMax * 3 *
1040 sizeof(ctxt->pushTab[0]));
1041 if (tmp2 == NULL) {
1042 ctxt->nameMax /= 2;
1043 goto mem_error;
1044 }
1045 ctxt->pushTab = tmp2;
1046 }
1047 ctxt->nameTab[ctxt->nameNr] = value;
1048 ctxt->name = value;
1049 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1050 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001051 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001052 return (ctxt->nameNr++);
1053mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001054 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001055 return (-1);
1056}
1057/**
1058 * nameNsPop:
1059 * @ctxt: an XML parser context
1060 *
1061 * Pops the top element/prefix/URI name from the name stack
1062 *
1063 * Returns the name just removed
1064 */
1065static const xmlChar *
1066nameNsPop(xmlParserCtxtPtr ctxt)
1067{
1068 const xmlChar *ret;
1069
1070 if (ctxt->nameNr <= 0)
1071 return (0);
1072 ctxt->nameNr--;
1073 if (ctxt->nameNr > 0)
1074 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1075 else
1076 ctxt->name = NULL;
1077 ret = ctxt->nameTab[ctxt->nameNr];
1078 ctxt->nameTab[ctxt->nameNr] = NULL;
1079 return (ret);
1080}
1081
1082/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001083 * namePush:
1084 * @ctxt: an XML parser context
1085 * @value: the element name
1086 *
1087 * Pushes a new element name on top of the name stack
1088 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001090 */
1091extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001092namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001093{
1094 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001096 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001098 ctxt->nameMax *
1099 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001100 if (tmp == NULL) {
1101 ctxt->nameMax /= 2;
1102 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001103 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001104 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001105 }
1106 ctxt->nameTab[ctxt->nameNr] = value;
1107 ctxt->name = value;
1108 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001109mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001110 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001112}
1113/**
1114 * namePop:
1115 * @ctxt: an XML parser context
1116 *
1117 * Pops the top element name from the name stack
1118 *
1119 * Returns the name just removed
1120 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001121extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001122namePop(xmlParserCtxtPtr ctxt)
1123{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001124 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001125
1126 if (ctxt->nameNr <= 0)
1127 return (0);
1128 ctxt->nameNr--;
1129 if (ctxt->nameNr > 0)
1130 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1131 else
1132 ctxt->name = NULL;
1133 ret = ctxt->nameTab[ctxt->nameNr];
1134 ctxt->nameTab[ctxt->nameNr] = 0;
1135 return (ret);
1136}
Owen Taylor3473f882001-02-23 17:55:21 +00001137
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001138static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001139 if (ctxt->spaceNr >= ctxt->spaceMax) {
1140 ctxt->spaceMax *= 2;
1141 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1142 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1143 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001145 return(0);
1146 }
1147 }
1148 ctxt->spaceTab[ctxt->spaceNr] = val;
1149 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1150 return(ctxt->spaceNr++);
1151}
1152
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001153static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001154 int ret;
1155 if (ctxt->spaceNr <= 0) return(0);
1156 ctxt->spaceNr--;
1157 if (ctxt->spaceNr > 0)
1158 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1159 else
1160 ctxt->space = NULL;
1161 ret = ctxt->spaceTab[ctxt->spaceNr];
1162 ctxt->spaceTab[ctxt->spaceNr] = -1;
1163 return(ret);
1164}
1165
1166/*
1167 * Macros for accessing the content. Those should be used only by the parser,
1168 * and not exported.
1169 *
1170 * Dirty macros, i.e. one often need to make assumption on the context to
1171 * use them
1172 *
1173 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1174 * To be used with extreme caution since operations consuming
1175 * characters may move the input buffer to a different location !
1176 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1177 * This should be used internally by the parser
1178 * only to compare to ASCII values otherwise it would break when
1179 * running with UTF-8 encoding.
1180 * RAW same as CUR but in the input buffer, bypass any token
1181 * extraction that may have been done
1182 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1183 * to compare on ASCII based substring.
1184 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001185 * strings without newlines within the parser.
1186 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1187 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001188 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1189 *
1190 * NEXT Skip to the next character, this does the proper decoding
1191 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001192 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001193 * CUR_CHAR(l) returns the current unicode character (int), set l
1194 * to the number of xmlChars used for the encoding [0-5].
1195 * CUR_SCHAR same but operate on a string instead of the context
1196 * COPY_BUF copy the current unicode char to the target buffer, increment
1197 * the index
1198 * GROW, SHRINK handling of input buffers
1199 */
1200
Daniel Veillardfdc91562002-07-01 21:52:03 +00001201#define RAW (*ctxt->input->cur)
1202#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001203#define NXT(val) ctxt->input->cur[(val)]
1204#define CUR_PTR ctxt->input->cur
1205
Daniel Veillarda07050d2003-10-19 14:46:32 +00001206#define CMP4( s, c1, c2, c3, c4 ) \
1207 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1208 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1209#define CMP5( s, c1, c2, c3, c4, c5 ) \
1210 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1211#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1212 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1213#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1214 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1215#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1216 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1217#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1218 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1219 ((unsigned char *) s)[ 8 ] == c9 )
1220#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1221 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1222 ((unsigned char *) s)[ 9 ] == c10 )
1223
Owen Taylor3473f882001-02-23 17:55:21 +00001224#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001225 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001226 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001227 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001228 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1229 xmlPopInput(ctxt); \
1230 } while (0)
1231
Daniel Veillarda880b122003-04-21 21:36:41 +00001232#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001233 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1234 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001235 xmlSHRINK (ctxt);
1236
1237static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1238 xmlParserInputShrink(ctxt->input);
1239 if ((*ctxt->input->cur == 0) &&
1240 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1241 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001242 }
Owen Taylor3473f882001-02-23 17:55:21 +00001243
Daniel Veillarda880b122003-04-21 21:36:41 +00001244#define GROW if ((ctxt->progressive == 0) && \
1245 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001246 xmlGROW (ctxt);
1247
1248static void xmlGROW (xmlParserCtxtPtr ctxt) {
1249 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1250 if ((*ctxt->input->cur == 0) &&
1251 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1252 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001253}
Owen Taylor3473f882001-02-23 17:55:21 +00001254
1255#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1256
1257#define NEXT xmlNextChar(ctxt)
1258
Daniel Veillard21a0f912001-02-25 19:54:14 +00001259#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001260 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001261 ctxt->input->cur++; \
1262 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001263 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1265 }
1266
Owen Taylor3473f882001-02-23 17:55:21 +00001267#define NEXTL(l) do { \
1268 if (*(ctxt->input->cur) == '\n') { \
1269 ctxt->input->line++; ctxt->input->col = 1; \
1270 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001271 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001273 } while (0)
1274
1275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1277
1278#define COPY_BUF(l,b,i,v) \
1279 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001280 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001281
1282/**
1283 * xmlSkipBlankChars:
1284 * @ctxt: the XML parser context
1285 *
1286 * skip all blanks character found at that point in the input streams.
1287 * It pops up finished entities in the process if allowable at that point.
1288 *
1289 * Returns the number of space chars skipped
1290 */
1291
1292int
1293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001294 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001295
1296 /*
1297 * It's Okay to use CUR/NEXT here since all the blanks are on
1298 * the ASCII range.
1299 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001300 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1301 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001302 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001303 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001304 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001305 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001306 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001307 if (*cur == '\n') {
1308 ctxt->input->line++; ctxt->input->col = 1;
1309 }
1310 cur++;
1311 res++;
1312 if (*cur == 0) {
1313 ctxt->input->cur = cur;
1314 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1315 cur = ctxt->input->cur;
1316 }
1317 }
1318 ctxt->input->cur = cur;
1319 } else {
1320 int cur;
1321 do {
1322 cur = CUR;
1323 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1324 NEXT;
1325 cur = CUR;
1326 res++;
1327 }
1328 while ((cur == 0) && (ctxt->inputNr > 1) &&
1329 (ctxt->instate != XML_PARSER_COMMENT)) {
1330 xmlPopInput(ctxt);
1331 cur = CUR;
1332 }
1333 /*
1334 * Need to handle support of entities branching here
1335 */
1336 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1337 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1338 }
Owen Taylor3473f882001-02-23 17:55:21 +00001339 return(res);
1340}
1341
1342/************************************************************************
1343 * *
1344 * Commodity functions to handle entities *
1345 * *
1346 ************************************************************************/
1347
1348/**
1349 * xmlPopInput:
1350 * @ctxt: an XML parser context
1351 *
1352 * xmlPopInput: the current input pointed by ctxt->input came to an end
1353 * pop it and return the next char.
1354 *
1355 * Returns the current xmlChar in the parser context
1356 */
1357xmlChar
1358xmlPopInput(xmlParserCtxtPtr ctxt) {
1359 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1360 if (xmlParserDebugEntities)
1361 xmlGenericError(xmlGenericErrorContext,
1362 "Popping input %d\n", ctxt->inputNr);
1363 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001364 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001365 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1366 return(xmlPopInput(ctxt));
1367 return(CUR);
1368}
1369
1370/**
1371 * xmlPushInput:
1372 * @ctxt: an XML parser context
1373 * @input: an XML parser input fragment (entity, XML fragment ...).
1374 *
1375 * xmlPushInput: switch to a new input stream which is stacked on top
1376 * of the previous one(s).
1377 */
1378void
1379xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1380 if (input == NULL) return;
1381
1382 if (xmlParserDebugEntities) {
1383 if ((ctxt->input != NULL) && (ctxt->input->filename))
1384 xmlGenericError(xmlGenericErrorContext,
1385 "%s(%d): ", ctxt->input->filename,
1386 ctxt->input->line);
1387 xmlGenericError(xmlGenericErrorContext,
1388 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1389 }
1390 inputPush(ctxt, input);
1391 GROW;
1392}
1393
1394/**
1395 * xmlParseCharRef:
1396 * @ctxt: an XML parser context
1397 *
1398 * parse Reference declarations
1399 *
1400 * [66] CharRef ::= '&#' [0-9]+ ';' |
1401 * '&#x' [0-9a-fA-F]+ ';'
1402 *
1403 * [ WFC: Legal Character ]
1404 * Characters referred to using character references must match the
1405 * production for Char.
1406 *
1407 * Returns the value parsed (as an int), 0 in case of error
1408 */
1409int
1410xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001411 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001412 int count = 0;
1413
Owen Taylor3473f882001-02-23 17:55:21 +00001414 /*
1415 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1416 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001417 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001418 (NXT(2) == 'x')) {
1419 SKIP(3);
1420 GROW;
1421 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001422 if (count++ > 20) {
1423 count = 0;
1424 GROW;
1425 }
1426 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001427 val = val * 16 + (CUR - '0');
1428 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1429 val = val * 16 + (CUR - 'a') + 10;
1430 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1431 val = val * 16 + (CUR - 'A') + 10;
1432 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001433 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001434 val = 0;
1435 break;
1436 }
1437 NEXT;
1438 count++;
1439 }
1440 if (RAW == ';') {
1441 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001442 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 ctxt->nbChars ++;
1444 ctxt->input->cur++;
1445 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001446 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001447 SKIP(2);
1448 GROW;
1449 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001450 if (count++ > 20) {
1451 count = 0;
1452 GROW;
1453 }
1454 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001455 val = val * 10 + (CUR - '0');
1456 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001457 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = 0;
1459 break;
1460 }
1461 NEXT;
1462 count++;
1463 }
1464 if (RAW == ';') {
1465 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001466 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001467 ctxt->nbChars ++;
1468 ctxt->input->cur++;
1469 }
1470 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001471 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001472 }
1473
1474 /*
1475 * [ WFC: Legal Character ]
1476 * Characters referred to using character references must match the
1477 * production for Char.
1478 */
William M. Brack871611b2003-10-18 04:53:14 +00001479 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001480 return(val);
1481 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001482 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1483 "xmlParseCharRef: invalid xmlChar value %d\n",
1484 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001485 }
1486 return(0);
1487}
1488
1489/**
1490 * xmlParseStringCharRef:
1491 * @ctxt: an XML parser context
1492 * @str: a pointer to an index in the string
1493 *
1494 * parse Reference declarations, variant parsing from a string rather
1495 * than an an input flow.
1496 *
1497 * [66] CharRef ::= '&#' [0-9]+ ';' |
1498 * '&#x' [0-9a-fA-F]+ ';'
1499 *
1500 * [ WFC: Legal Character ]
1501 * Characters referred to using character references must match the
1502 * production for Char.
1503 *
1504 * Returns the value parsed (as an int), 0 in case of error, str will be
1505 * updated to the current value of the index
1506 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507static int
Owen Taylor3473f882001-02-23 17:55:21 +00001508xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1509 const xmlChar *ptr;
1510 xmlChar cur;
1511 int val = 0;
1512
1513 if ((str == NULL) || (*str == NULL)) return(0);
1514 ptr = *str;
1515 cur = *ptr;
1516 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1517 ptr += 3;
1518 cur = *ptr;
1519 while (cur != ';') { /* Non input consuming loop */
1520 if ((cur >= '0') && (cur <= '9'))
1521 val = val * 16 + (cur - '0');
1522 else if ((cur >= 'a') && (cur <= 'f'))
1523 val = val * 16 + (cur - 'a') + 10;
1524 else if ((cur >= 'A') && (cur <= 'F'))
1525 val = val * 16 + (cur - 'A') + 10;
1526 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001527 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = 0;
1529 break;
1530 }
1531 ptr++;
1532 cur = *ptr;
1533 }
1534 if (cur == ';')
1535 ptr++;
1536 } else if ((cur == '&') && (ptr[1] == '#')){
1537 ptr += 2;
1538 cur = *ptr;
1539 while (cur != ';') { /* Non input consuming loops */
1540 if ((cur >= '0') && (cur <= '9'))
1541 val = val * 10 + (cur - '0');
1542 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001544 val = 0;
1545 break;
1546 }
1547 ptr++;
1548 cur = *ptr;
1549 }
1550 if (cur == ';')
1551 ptr++;
1552 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001553 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001554 return(0);
1555 }
1556 *str = ptr;
1557
1558 /*
1559 * [ WFC: Legal Character ]
1560 * Characters referred to using character references must match the
1561 * production for Char.
1562 */
William M. Brack871611b2003-10-18 04:53:14 +00001563 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001564 return(val);
1565 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001566 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1567 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1568 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001569 }
1570 return(0);
1571}
1572
1573/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001574 * xmlNewBlanksWrapperInputStream:
1575 * @ctxt: an XML parser context
1576 * @entity: an Entity pointer
1577 *
1578 * Create a new input stream for wrapping
1579 * blanks around a PEReference
1580 *
1581 * Returns the new input stream or NULL
1582 */
1583
1584static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1585
Daniel Veillardf4862f02002-09-10 11:13:43 +00001586static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001587xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1588 xmlParserInputPtr input;
1589 xmlChar *buffer;
1590 size_t length;
1591 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001592 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1593 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001594 return(NULL);
1595 }
1596 if (xmlParserDebugEntities)
1597 xmlGenericError(xmlGenericErrorContext,
1598 "new blanks wrapper for entity: %s\n", entity->name);
1599 input = xmlNewInputStream(ctxt);
1600 if (input == NULL) {
1601 return(NULL);
1602 }
1603 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001604 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001606 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001607 return(NULL);
1608 }
1609 buffer [0] = ' ';
1610 buffer [1] = '%';
1611 buffer [length-3] = ';';
1612 buffer [length-2] = ' ';
1613 buffer [length-1] = 0;
1614 memcpy(buffer + 2, entity->name, length - 5);
1615 input->free = deallocblankswrapper;
1616 input->base = buffer;
1617 input->cur = buffer;
1618 input->length = length;
1619 input->end = &buffer[length];
1620 return(input);
1621}
1622
1623/**
Owen Taylor3473f882001-02-23 17:55:21 +00001624 * xmlParserHandlePEReference:
1625 * @ctxt: the parser context
1626 *
1627 * [69] PEReference ::= '%' Name ';'
1628 *
1629 * [ WFC: No Recursion ]
1630 * A parsed entity must not contain a recursive
1631 * reference to itself, either directly or indirectly.
1632 *
1633 * [ WFC: Entity Declared ]
1634 * In a document without any DTD, a document with only an internal DTD
1635 * subset which contains no parameter entity references, or a document
1636 * with "standalone='yes'", ... ... The declaration of a parameter
1637 * entity must precede any reference to it...
1638 *
1639 * [ VC: Entity Declared ]
1640 * In a document with an external subset or external parameter entities
1641 * with "standalone='no'", ... ... The declaration of a parameter entity
1642 * must precede any reference to it...
1643 *
1644 * [ WFC: In DTD ]
1645 * Parameter-entity references may only appear in the DTD.
1646 * NOTE: misleading but this is handled.
1647 *
1648 * A PEReference may have been detected in the current input stream
1649 * the handling is done accordingly to
1650 * http://www.w3.org/TR/REC-xml#entproc
1651 * i.e.
1652 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001653 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001654 */
1655void
1656xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001657 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001658 xmlEntityPtr entity = NULL;
1659 xmlParserInputPtr input;
1660
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (RAW != '%') return;
1662 switch(ctxt->instate) {
1663 case XML_PARSER_CDATA_SECTION:
1664 return;
1665 case XML_PARSER_COMMENT:
1666 return;
1667 case XML_PARSER_START_TAG:
1668 return;
1669 case XML_PARSER_END_TAG:
1670 return;
1671 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001672 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001673 return;
1674 case XML_PARSER_PROLOG:
1675 case XML_PARSER_START:
1676 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001677 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 return;
1679 case XML_PARSER_ENTITY_DECL:
1680 case XML_PARSER_CONTENT:
1681 case XML_PARSER_ATTRIBUTE_VALUE:
1682 case XML_PARSER_PI:
1683 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001684 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001685 /* we just ignore it there */
1686 return;
1687 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001689 return;
1690 case XML_PARSER_ENTITY_VALUE:
1691 /*
1692 * NOTE: in the case of entity values, we don't do the
1693 * substitution here since we need the literal
1694 * entity value to be able to save the internal
1695 * subset of the document.
1696 * This will be handled by xmlStringDecodeEntities
1697 */
1698 return;
1699 case XML_PARSER_DTD:
1700 /*
1701 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1702 * In the internal DTD subset, parameter-entity references
1703 * can occur only where markup declarations can occur, not
1704 * within markup declarations.
1705 * In that case this is handled in xmlParseMarkupDecl
1706 */
1707 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1708 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001709 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001710 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001711 break;
1712 case XML_PARSER_IGNORE:
1713 return;
1714 }
1715
1716 NEXT;
1717 name = xmlParseName(ctxt);
1718 if (xmlParserDebugEntities)
1719 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001720 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001721 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001722 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001723 } else {
1724 if (RAW == ';') {
1725 NEXT;
1726 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1727 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1728 if (entity == NULL) {
1729
1730 /*
1731 * [ WFC: Entity Declared ]
1732 * In a document without any DTD, a document with only an
1733 * internal DTD subset which contains no parameter entity
1734 * references, or a document with "standalone='yes'", ...
1735 * ... The declaration of a parameter entity must precede
1736 * any reference to it...
1737 */
1738 if ((ctxt->standalone == 1) ||
1739 ((ctxt->hasExternalSubset == 0) &&
1740 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001741 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001742 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001743 } else {
1744 /*
1745 * [ VC: Entity Declared ]
1746 * In a document with an external subset or external
1747 * parameter entities with "standalone='no'", ...
1748 * ... The declaration of a parameter entity must precede
1749 * any reference to it...
1750 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001751 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1752 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1753 "PEReference: %%%s; not found\n",
1754 name);
1755 } else
1756 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1757 "PEReference: %%%s; not found\n",
1758 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001759 ctxt->valid = 0;
1760 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001761 } else if (ctxt->input->free != deallocblankswrapper) {
1762 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1763 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001764 } else {
1765 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1766 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001767 xmlChar start[4];
1768 xmlCharEncoding enc;
1769
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
1771 * handle the extra spaces added before and after
1772 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001773 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001774 */
1775 input = xmlNewEntityInputStream(ctxt, entity);
1776 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001777
1778 /*
1779 * Get the 4 first bytes and decode the charset
1780 * if enc != XML_CHAR_ENCODING_NONE
1781 * plug some encoding conversion routines.
1782 */
1783 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001784 if (entity->length >= 4) {
1785 start[0] = RAW;
1786 start[1] = NXT(1);
1787 start[2] = NXT(2);
1788 start[3] = NXT(3);
1789 enc = xmlDetectCharEncoding(start, 4);
1790 if (enc != XML_CHAR_ENCODING_NONE) {
1791 xmlSwitchEncoding(ctxt, enc);
1792 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001793 }
1794
Owen Taylor3473f882001-02-23 17:55:21 +00001795 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001796 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1797 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001798 xmlParseTextDecl(ctxt);
1799 }
Owen Taylor3473f882001-02-23 17:55:21 +00001800 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001801 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1802 "PEReference: %s is not a parameter entity\n",
1803 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001804 }
1805 }
1806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001807 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001808 }
Owen Taylor3473f882001-02-23 17:55:21 +00001809 }
1810}
1811
1812/*
1813 * Macro used to grow the current buffer.
1814 */
1815#define growBuffer(buffer) { \
1816 buffer##_size *= 2; \
1817 buffer = (xmlChar *) \
1818 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001819 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001820}
1821
1822/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001823 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001824 * @ctxt: the parser context
1825 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001826 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1828 * @end: an end marker xmlChar, 0 if none
1829 * @end2: an end marker xmlChar, 0 if none
1830 * @end3: an end marker xmlChar, 0 if none
1831 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001832 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001833 *
1834 * [67] Reference ::= EntityRef | CharRef
1835 *
1836 * [69] PEReference ::= '%' Name ';'
1837 *
1838 * Returns A newly allocated string with the substitution done. The caller
1839 * must deallocate it !
1840 */
1841xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001842xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1843 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001844 xmlChar *buffer = NULL;
1845 int buffer_size = 0;
1846
1847 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001848 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001849 xmlEntityPtr ent;
1850 int c,l;
1851 int nbchars = 0;
1852
Daniel Veillarde57ec792003-09-10 10:50:59 +00001853 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001854 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001858 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001859 return(NULL);
1860 }
1861
1862 /*
1863 * allocate a translation buffer.
1864 */
1865 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001866 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001867 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001868
1869 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001870 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001871 * we are operating on already parsed values.
1872 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 if (str < last)
1874 c = CUR_SCHAR(str, l);
1875 else
1876 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001877 while ((c != 0) && (c != end) && /* non input consuming loop */
1878 (c != end2) && (c != end3)) {
1879
1880 if (c == 0) break;
1881 if ((c == '&') && (str[1] == '#')) {
1882 int val = xmlParseStringCharRef(ctxt, &str);
1883 if (val != 0) {
1884 COPY_BUF(0,buffer,nbchars,val);
1885 }
1886 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1887 if (xmlParserDebugEntities)
1888 xmlGenericError(xmlGenericErrorContext,
1889 "String decoding Entity Reference: %.30s\n",
1890 str);
1891 ent = xmlParseStringEntityRef(ctxt, &str);
1892 if ((ent != NULL) &&
1893 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1894 if (ent->content != NULL) {
1895 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1896 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001897 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1898 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001899 }
1900 } else if ((ent != NULL) && (ent->content != NULL)) {
1901 xmlChar *rep;
1902
1903 ctxt->depth++;
1904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1905 0, 0, 0);
1906 ctxt->depth--;
1907 if (rep != NULL) {
1908 current = rep;
1909 while (*current != 0) { /* non input consuming loop */
1910 buffer[nbchars++] = *current++;
1911 if (nbchars >
1912 buffer_size - XML_PARSER_BUFFER_SIZE) {
1913 growBuffer(buffer);
1914 }
1915 }
1916 xmlFree(rep);
1917 }
1918 } else if (ent != NULL) {
1919 int i = xmlStrlen(ent->name);
1920 const xmlChar *cur = ent->name;
1921
1922 buffer[nbchars++] = '&';
1923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1924 growBuffer(buffer);
1925 }
1926 for (;i > 0;i--)
1927 buffer[nbchars++] = *cur++;
1928 buffer[nbchars++] = ';';
1929 }
1930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1931 if (xmlParserDebugEntities)
1932 xmlGenericError(xmlGenericErrorContext,
1933 "String decoding PE Reference: %.30s\n", str);
1934 ent = xmlParseStringPEReference(ctxt, &str);
1935 if (ent != NULL) {
1936 xmlChar *rep;
1937
1938 ctxt->depth++;
1939 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1940 0, 0, 0);
1941 ctxt->depth--;
1942 if (rep != NULL) {
1943 current = rep;
1944 while (*current != 0) { /* non input consuming loop */
1945 buffer[nbchars++] = *current++;
1946 if (nbchars >
1947 buffer_size - XML_PARSER_BUFFER_SIZE) {
1948 growBuffer(buffer);
1949 }
1950 }
1951 xmlFree(rep);
1952 }
1953 }
1954 } else {
1955 COPY_BUF(l,buffer,nbchars,c);
1956 str += l;
1957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1958 growBuffer(buffer);
1959 }
1960 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 }
1966 buffer[nbchars++] = 0;
1967 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001968
1969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001972}
1973
Daniel Veillarde57ec792003-09-10 10:50:59 +00001974/**
1975 * xmlStringDecodeEntities:
1976 * @ctxt: the parser context
1977 * @str: the input string
1978 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1979 * @end: an end marker xmlChar, 0 if none
1980 * @end2: an end marker xmlChar, 0 if none
1981 * @end3: an end marker xmlChar, 0 if none
1982 *
1983 * Takes a entity string content and process to do the adequate substitutions.
1984 *
1985 * [67] Reference ::= EntityRef | CharRef
1986 *
1987 * [69] PEReference ::= '%' Name ';'
1988 *
1989 * Returns A newly allocated string with the substitution done. The caller
1990 * must deallocate it !
1991 */
1992xmlChar *
1993xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1994 xmlChar end, xmlChar end2, xmlChar end3) {
1995 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1996 end, end2, end3));
1997}
Owen Taylor3473f882001-02-23 17:55:21 +00001998
1999/************************************************************************
2000 * *
2001 * Commodity functions to handle xmlChars *
2002 * *
2003 ************************************************************************/
2004
2005/**
2006 * xmlStrndup:
2007 * @cur: the input xmlChar *
2008 * @len: the len of @cur
2009 *
2010 * a strndup for array of xmlChar's
2011 *
2012 * Returns a new xmlChar * or NULL
2013 */
2014xmlChar *
2015xmlStrndup(const xmlChar *cur, int len) {
2016 xmlChar *ret;
2017
2018 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002019 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002020 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002021 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002022 return(NULL);
2023 }
2024 memcpy(ret, cur, len * sizeof(xmlChar));
2025 ret[len] = 0;
2026 return(ret);
2027}
2028
2029/**
2030 * xmlStrdup:
2031 * @cur: the input xmlChar *
2032 *
2033 * a strdup for array of xmlChar's. Since they are supposed to be
2034 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2035 * a termination mark of '0'.
2036 *
2037 * Returns a new xmlChar * or NULL
2038 */
2039xmlChar *
2040xmlStrdup(const xmlChar *cur) {
2041 const xmlChar *p = cur;
2042
2043 if (cur == NULL) return(NULL);
2044 while (*p != 0) p++; /* non input consuming */
2045 return(xmlStrndup(cur, p - cur));
2046}
2047
2048/**
2049 * xmlCharStrndup:
2050 * @cur: the input char *
2051 * @len: the len of @cur
2052 *
2053 * a strndup for char's to xmlChar's
2054 *
2055 * Returns a new xmlChar * or NULL
2056 */
2057
2058xmlChar *
2059xmlCharStrndup(const char *cur, int len) {
2060 int i;
2061 xmlChar *ret;
2062
2063 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002064 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002065 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 return(NULL);
2068 }
2069 for (i = 0;i < len;i++)
2070 ret[i] = (xmlChar) cur[i];
2071 ret[len] = 0;
2072 return(ret);
2073}
2074
2075/**
2076 * xmlCharStrdup:
2077 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002078 *
2079 * a strdup for char's to xmlChar's
2080 *
2081 * Returns a new xmlChar * or NULL
2082 */
2083
2084xmlChar *
2085xmlCharStrdup(const char *cur) {
2086 const char *p = cur;
2087
2088 if (cur == NULL) return(NULL);
2089 while (*p != '\0') p++; /* non input consuming */
2090 return(xmlCharStrndup(cur, p - cur));
2091}
2092
2093/**
2094 * xmlStrcmp:
2095 * @str1: the first xmlChar *
2096 * @str2: the second xmlChar *
2097 *
2098 * a strcmp for xmlChar's
2099 *
2100 * Returns the integer result of the comparison
2101 */
2102
2103int
2104xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2105 register int tmp;
2106
2107 if (str1 == str2) return(0);
2108 if (str1 == NULL) return(-1);
2109 if (str2 == NULL) return(1);
2110 do {
2111 tmp = *str1++ - *str2;
2112 if (tmp != 0) return(tmp);
2113 } while (*str2++ != 0);
2114 return 0;
2115}
2116
2117/**
2118 * xmlStrEqual:
2119 * @str1: the first xmlChar *
2120 * @str2: the second xmlChar *
2121 *
2122 * Check if both string are equal of have same content
2123 * Should be a bit more readable and faster than xmlStrEqual()
2124 *
2125 * Returns 1 if they are equal, 0 if they are different
2126 */
2127
2128int
2129xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2130 if (str1 == str2) return(1);
2131 if (str1 == NULL) return(0);
2132 if (str2 == NULL) return(0);
2133 do {
2134 if (*str1++ != *str2) return(0);
2135 } while (*str2++);
2136 return(1);
2137}
2138
2139/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002140 * xmlStrQEqual:
2141 * @pref: the prefix of the QName
2142 * @name: the localname of the QName
2143 * @str: the second xmlChar *
2144 *
2145 * Check if a QName is Equal to a given string
2146 *
2147 * Returns 1 if they are equal, 0 if they are different
2148 */
2149
2150int
2151xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2152 if (pref == NULL) return(xmlStrEqual(name, str));
2153 if (name == NULL) return(0);
2154 if (str == NULL) return(0);
2155
2156 do {
2157 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002158 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002159 if (*str++ != ':') return(0);
2160 do {
2161 if (*name++ != *str) return(0);
2162 } while (*str++);
2163 return(1);
2164}
2165
2166/**
Owen Taylor3473f882001-02-23 17:55:21 +00002167 * xmlStrncmp:
2168 * @str1: the first xmlChar *
2169 * @str2: the second xmlChar *
2170 * @len: the max comparison length
2171 *
2172 * a strncmp for xmlChar's
2173 *
2174 * Returns the integer result of the comparison
2175 */
2176
2177int
2178xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2179 register int tmp;
2180
2181 if (len <= 0) return(0);
2182 if (str1 == str2) return(0);
2183 if (str1 == NULL) return(-1);
2184 if (str2 == NULL) return(1);
2185 do {
2186 tmp = *str1++ - *str2;
2187 if (tmp != 0 || --len == 0) return(tmp);
2188 } while (*str2++ != 0);
2189 return 0;
2190}
2191
Daniel Veillardb44025c2001-10-11 22:55:55 +00002192static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002193 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2194 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2195 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2196 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2197 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2198 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2199 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2200 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2201 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2202 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2203 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2204 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2205 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2206 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2207 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2208 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2209 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2210 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2211 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2212 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2213 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2214 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2215 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2216 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2217 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2218 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2219 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2220 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2221 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2222 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2223 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2224 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2225};
2226
2227/**
2228 * xmlStrcasecmp:
2229 * @str1: the first xmlChar *
2230 * @str2: the second xmlChar *
2231 *
2232 * a strcasecmp for xmlChar's
2233 *
2234 * Returns the integer result of the comparison
2235 */
2236
2237int
2238xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2239 register int tmp;
2240
2241 if (str1 == str2) return(0);
2242 if (str1 == NULL) return(-1);
2243 if (str2 == NULL) return(1);
2244 do {
2245 tmp = casemap[*str1++] - casemap[*str2];
2246 if (tmp != 0) return(tmp);
2247 } while (*str2++ != 0);
2248 return 0;
2249}
2250
2251/**
2252 * xmlStrncasecmp:
2253 * @str1: the first xmlChar *
2254 * @str2: the second xmlChar *
2255 * @len: the max comparison length
2256 *
2257 * a strncasecmp for xmlChar's
2258 *
2259 * Returns the integer result of the comparison
2260 */
2261
2262int
2263xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2264 register int tmp;
2265
2266 if (len <= 0) return(0);
2267 if (str1 == str2) return(0);
2268 if (str1 == NULL) return(-1);
2269 if (str2 == NULL) return(1);
2270 do {
2271 tmp = casemap[*str1++] - casemap[*str2];
2272 if (tmp != 0 || --len == 0) return(tmp);
2273 } while (*str2++ != 0);
2274 return 0;
2275}
2276
2277/**
2278 * xmlStrchr:
2279 * @str: the xmlChar * array
2280 * @val: the xmlChar to search
2281 *
2282 * a strchr for xmlChar's
2283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002284 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002285 */
2286
2287const xmlChar *
2288xmlStrchr(const xmlChar *str, xmlChar val) {
2289 if (str == NULL) return(NULL);
2290 while (*str != 0) { /* non input consuming */
2291 if (*str == val) return((xmlChar *) str);
2292 str++;
2293 }
2294 return(NULL);
2295}
2296
2297/**
2298 * xmlStrstr:
2299 * @str: the xmlChar * array (haystack)
2300 * @val: the xmlChar to search (needle)
2301 *
2302 * a strstr for xmlChar's
2303 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002304 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002305 */
2306
2307const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002308xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002309 int n;
2310
2311 if (str == NULL) return(NULL);
2312 if (val == NULL) return(NULL);
2313 n = xmlStrlen(val);
2314
2315 if (n == 0) return(str);
2316 while (*str != 0) { /* non input consuming */
2317 if (*str == *val) {
2318 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2319 }
2320 str++;
2321 }
2322 return(NULL);
2323}
2324
2325/**
2326 * xmlStrcasestr:
2327 * @str: the xmlChar * array (haystack)
2328 * @val: the xmlChar to search (needle)
2329 *
2330 * a case-ignoring strstr for xmlChar's
2331 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002332 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002333 */
2334
2335const xmlChar *
2336xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2337 int n;
2338
2339 if (str == NULL) return(NULL);
2340 if (val == NULL) return(NULL);
2341 n = xmlStrlen(val);
2342
2343 if (n == 0) return(str);
2344 while (*str != 0) { /* non input consuming */
2345 if (casemap[*str] == casemap[*val])
2346 if (!xmlStrncasecmp(str, val, n)) return(str);
2347 str++;
2348 }
2349 return(NULL);
2350}
2351
2352/**
2353 * xmlStrsub:
2354 * @str: the xmlChar * array (haystack)
2355 * @start: the index of the first char (zero based)
2356 * @len: the length of the substring
2357 *
2358 * Extract a substring of a given string
2359 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002360 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 */
2362
2363xmlChar *
2364xmlStrsub(const xmlChar *str, int start, int len) {
2365 int i;
2366
2367 if (str == NULL) return(NULL);
2368 if (start < 0) return(NULL);
2369 if (len < 0) return(NULL);
2370
2371 for (i = 0;i < start;i++) {
2372 if (*str == 0) return(NULL);
2373 str++;
2374 }
2375 if (*str == 0) return(NULL);
2376 return(xmlStrndup(str, len));
2377}
2378
2379/**
2380 * xmlStrlen:
2381 * @str: the xmlChar * array
2382 *
2383 * length of a xmlChar's string
2384 *
2385 * Returns the number of xmlChar contained in the ARRAY.
2386 */
2387
2388int
2389xmlStrlen(const xmlChar *str) {
2390 int len = 0;
2391
2392 if (str == NULL) return(0);
2393 while (*str != 0) { /* non input consuming */
2394 str++;
2395 len++;
2396 }
2397 return(len);
2398}
2399
2400/**
2401 * xmlStrncat:
2402 * @cur: the original xmlChar * array
2403 * @add: the xmlChar * array added
2404 * @len: the length of @add
2405 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002406 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002407 * first bytes of @add.
2408 *
2409 * Returns a new xmlChar *, the original @cur is reallocated if needed
2410 * and should not be freed
2411 */
2412
2413xmlChar *
2414xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2415 int size;
2416 xmlChar *ret;
2417
2418 if ((add == NULL) || (len == 0))
2419 return(cur);
2420 if (cur == NULL)
2421 return(xmlStrndup(add, len));
2422
2423 size = xmlStrlen(cur);
2424 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2425 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002426 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 return(cur);
2428 }
2429 memcpy(&ret[size], add, len * sizeof(xmlChar));
2430 ret[size + len] = 0;
2431 return(ret);
2432}
2433
2434/**
2435 * xmlStrcat:
2436 * @cur: the original xmlChar * array
2437 * @add: the xmlChar * array added
2438 *
2439 * a strcat for array of xmlChar's. Since they are supposed to be
2440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2441 * a termination mark of '0'.
2442 *
2443 * Returns a new xmlChar * containing the concatenated string.
2444 */
2445xmlChar *
2446xmlStrcat(xmlChar *cur, const xmlChar *add) {
2447 const xmlChar *p = add;
2448
2449 if (add == NULL) return(cur);
2450 if (cur == NULL)
2451 return(xmlStrdup(add));
2452
2453 while (*p != 0) p++; /* non input consuming */
2454 return(xmlStrncat(cur, add, p - add));
2455}
2456
Aleksey Sanine7acf432003-10-02 20:05:27 +00002457/**
2458 * xmlStrPrintf:
2459 * @buf: the result buffer.
2460 * @len: the result buffer length.
2461 * @msg: the message with printf formatting.
2462 * @...: extra parameters for the message.
2463 *
2464 * Formats @msg and places result into @buf.
2465 *
2466 * Returns the number of characters written to @buf or -1 if an error occurs.
2467 */
2468int
2469xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2470 va_list args;
2471 int ret;
2472
2473 if((buf == NULL) || (msg == NULL)) {
2474 return(-1);
2475 }
2476
2477 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002478 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002479 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002480 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002481
2482 return(ret);
2483}
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485/************************************************************************
2486 * *
2487 * Commodity functions, cleanup needed ? *
2488 * *
2489 ************************************************************************/
2490
2491/**
2492 * areBlanks:
2493 * @ctxt: an XML parser context
2494 * @str: a xmlChar *
2495 * @len: the size of @str
2496 *
2497 * Is this a sequence of blank chars that one can ignore ?
2498 *
2499 * Returns 1 if ignorable 0 otherwise.
2500 */
2501
2502static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2503 int i, ret;
2504 xmlNodePtr lastChild;
2505
Daniel Veillard05c13a22001-09-09 08:38:09 +00002506 /*
2507 * Don't spend time trying to differentiate them, the same callback is
2508 * used !
2509 */
2510 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002511 return(0);
2512
Owen Taylor3473f882001-02-23 17:55:21 +00002513 /*
2514 * Check for xml:space value.
2515 */
2516 if (*(ctxt->space) == 1)
2517 return(0);
2518
2519 /*
2520 * Check that the string is made of blanks
2521 */
2522 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002523 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002524
2525 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002526 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002527 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002528 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 if (ctxt->myDoc != NULL) {
2530 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2531 if (ret == 0) return(1);
2532 if (ret == 1) return(0);
2533 }
2534
2535 /*
2536 * Otherwise, heuristic :-\
2537 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002538 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002539 if ((ctxt->node->children == NULL) &&
2540 (RAW == '<') && (NXT(1) == '/')) return(0);
2541
2542 lastChild = xmlGetLastChild(ctxt->node);
2543 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002544 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2545 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002546 } else if (xmlNodeIsText(lastChild))
2547 return(0);
2548 else if ((ctxt->node->children != NULL) &&
2549 (xmlNodeIsText(ctxt->node->children)))
2550 return(0);
2551 return(1);
2552}
2553
Owen Taylor3473f882001-02-23 17:55:21 +00002554/************************************************************************
2555 * *
2556 * Extra stuff for namespace support *
2557 * Relates to http://www.w3.org/TR/WD-xml-names *
2558 * *
2559 ************************************************************************/
2560
2561/**
2562 * xmlSplitQName:
2563 * @ctxt: an XML parser context
2564 * @name: an XML parser context
2565 * @prefix: a xmlChar **
2566 *
2567 * parse an UTF8 encoded XML qualified name string
2568 *
2569 * [NS 5] QName ::= (Prefix ':')? LocalPart
2570 *
2571 * [NS 6] Prefix ::= NCName
2572 *
2573 * [NS 7] LocalPart ::= NCName
2574 *
2575 * Returns the local part, and prefix is updated
2576 * to get the Prefix if any.
2577 */
2578
2579xmlChar *
2580xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2581 xmlChar buf[XML_MAX_NAMELEN + 5];
2582 xmlChar *buffer = NULL;
2583 int len = 0;
2584 int max = XML_MAX_NAMELEN;
2585 xmlChar *ret = NULL;
2586 const xmlChar *cur = name;
2587 int c;
2588
2589 *prefix = NULL;
2590
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002591 if (cur == NULL) return(NULL);
2592
Owen Taylor3473f882001-02-23 17:55:21 +00002593#ifndef XML_XML_NAMESPACE
2594 /* xml: prefix is not really a namespace */
2595 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2596 (cur[2] == 'l') && (cur[3] == ':'))
2597 return(xmlStrdup(name));
2598#endif
2599
Daniel Veillard597bc482003-07-24 16:08:28 +00002600 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (cur[0] == ':')
2602 return(xmlStrdup(name));
2603
2604 c = *cur++;
2605 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2606 buf[len++] = c;
2607 c = *cur++;
2608 }
2609 if (len >= max) {
2610 /*
2611 * Okay someone managed to make a huge name, so he's ready to pay
2612 * for the processing speed.
2613 */
2614 max = len * 2;
2615
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002616 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002617 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002618 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 return(NULL);
2620 }
2621 memcpy(buffer, buf, len);
2622 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2623 if (len + 10 > max) {
2624 max *= 2;
2625 buffer = (xmlChar *) xmlRealloc(buffer,
2626 max * sizeof(xmlChar));
2627 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002628 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002629 return(NULL);
2630 }
2631 }
2632 buffer[len++] = c;
2633 c = *cur++;
2634 }
2635 buffer[len] = 0;
2636 }
2637
Daniel Veillard597bc482003-07-24 16:08:28 +00002638 /* nasty but well=formed
2639 if ((c == ':') && (*cur == 0)) {
2640 return(xmlStrdup(name));
2641 } */
2642
Owen Taylor3473f882001-02-23 17:55:21 +00002643 if (buffer == NULL)
2644 ret = xmlStrndup(buf, len);
2645 else {
2646 ret = buffer;
2647 buffer = NULL;
2648 max = XML_MAX_NAMELEN;
2649 }
2650
2651
2652 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002653 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002655 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002656 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002657 }
Owen Taylor3473f882001-02-23 17:55:21 +00002658 len = 0;
2659
Daniel Veillardbb284f42002-10-16 18:02:47 +00002660 /*
2661 * Check that the first character is proper to start
2662 * a new name
2663 */
2664 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2665 ((c >= 0x41) && (c <= 0x5A)) ||
2666 (c == '_') || (c == ':'))) {
2667 int l;
2668 int first = CUR_SCHAR(cur, l);
2669
2670 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002671 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002672 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002673 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002674 }
2675 }
2676 cur++;
2677
Owen Taylor3473f882001-02-23 17:55:21 +00002678 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2679 buf[len++] = c;
2680 c = *cur++;
2681 }
2682 if (len >= max) {
2683 /*
2684 * Okay someone managed to make a huge name, so he's ready to pay
2685 * for the processing speed.
2686 */
2687 max = len * 2;
2688
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002689 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002690 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002691 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002692 return(NULL);
2693 }
2694 memcpy(buffer, buf, len);
2695 while (c != 0) { /* tested bigname2.xml */
2696 if (len + 10 > max) {
2697 max *= 2;
2698 buffer = (xmlChar *) xmlRealloc(buffer,
2699 max * sizeof(xmlChar));
2700 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002701 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002702 return(NULL);
2703 }
2704 }
2705 buffer[len++] = c;
2706 c = *cur++;
2707 }
2708 buffer[len] = 0;
2709 }
2710
2711 if (buffer == NULL)
2712 ret = xmlStrndup(buf, len);
2713 else {
2714 ret = buffer;
2715 }
2716 }
2717
2718 return(ret);
2719}
2720
2721/************************************************************************
2722 * *
2723 * The parser itself *
2724 * Relates to http://www.w3.org/TR/REC-xml *
2725 * *
2726 ************************************************************************/
2727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002729static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002730 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/**
2733 * xmlParseName:
2734 * @ctxt: an XML parser context
2735 *
2736 * parse an XML name.
2737 *
2738 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2739 * CombiningChar | Extender
2740 *
2741 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2742 *
2743 * [6] Names ::= Name (S Name)*
2744 *
2745 * Returns the Name parsed or NULL
2746 */
2747
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002748const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002749xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002750 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002751 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 int count = 0;
2753
2754 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755
2756 /*
2757 * Accelerator for simple ASCII names
2758 */
2759 in = ctxt->input->cur;
2760 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2761 ((*in >= 0x41) && (*in <= 0x5A)) ||
2762 (*in == '_') || (*in == ':')) {
2763 in++;
2764 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2765 ((*in >= 0x41) && (*in <= 0x5A)) ||
2766 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002767 (*in == '_') || (*in == '-') ||
2768 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002770 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002771 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002772 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002773 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002774 ctxt->nbChars += count;
2775 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002776 if (ret == NULL)
2777 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002778 return(ret);
2779 }
2780 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002781 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002782}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002783
Daniel Veillard46de64e2002-05-29 08:21:33 +00002784/**
2785 * xmlParseNameAndCompare:
2786 * @ctxt: an XML parser context
2787 *
2788 * parse an XML name and compares for match
2789 * (specialized for endtag parsing)
2790 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002791 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2792 * and the name for mismatch
2793 */
2794
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002795static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002796xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2797 const xmlChar *cmp = other;
2798 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002799 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002800
2801 GROW;
2802
2803 in = ctxt->input->cur;
2804 while (*in != 0 && *in == *cmp) {
2805 ++in;
2806 ++cmp;
2807 }
William M. Brack76e95df2003-10-18 16:20:14 +00002808 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002809 /* success */
2810 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002811 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002812 }
2813 /* failure (or end of input buffer), check with full function */
2814 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002815 /* strings coming from the dictionnary direct compare possible */
2816 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002817 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002818 }
2819 return ret;
2820}
2821
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002822static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002823xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002824 int len = 0, l;
2825 int c;
2826 int count = 0;
2827
2828 /*
2829 * Handler for more complex cases
2830 */
2831 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 c = CUR_CHAR(l);
2833 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2834 (!IS_LETTER(c) && (c != '_') &&
2835 (c != ':'))) {
2836 return(NULL);
2837 }
2838
2839 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002840 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002841 (c == '.') || (c == '-') ||
2842 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002843 (IS_COMBINING(c)) ||
2844 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002845 if (count++ > 100) {
2846 count = 0;
2847 GROW;
2848 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002849 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 NEXTL(l);
2851 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002853 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002854}
2855
2856/**
2857 * xmlParseStringName:
2858 * @ctxt: an XML parser context
2859 * @str: a pointer to the string pointer (IN/OUT)
2860 *
2861 * parse an XML name.
2862 *
2863 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2864 * CombiningChar | Extender
2865 *
2866 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2867 *
2868 * [6] Names ::= Name (S Name)*
2869 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002870 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002871 * is updated to the current location in the string.
2872 */
2873
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002874static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002875xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2876 xmlChar buf[XML_MAX_NAMELEN + 5];
2877 const xmlChar *cur = *str;
2878 int len = 0, l;
2879 int c;
2880
2881 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002882 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002883 (c != ':')) {
2884 return(NULL);
2885 }
2886
William M. Brack871611b2003-10-18 04:53:14 +00002887 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002888 (c == '.') || (c == '-') ||
2889 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002890 (IS_COMBINING(c)) ||
2891 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002892 COPY_BUF(l,buf,len,c);
2893 cur += l;
2894 c = CUR_SCHAR(cur, l);
2895 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2896 /*
2897 * Okay someone managed to make a huge name, so he's ready to pay
2898 * for the processing speed.
2899 */
2900 xmlChar *buffer;
2901 int max = len * 2;
2902
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002903 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002904 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002905 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002906 return(NULL);
2907 }
2908 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002910 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002911 (c == '.') || (c == '-') ||
2912 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002913 (IS_COMBINING(c)) ||
2914 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (len + 10 > max) {
2916 max *= 2;
2917 buffer = (xmlChar *) xmlRealloc(buffer,
2918 max * sizeof(xmlChar));
2919 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002920 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(NULL);
2922 }
2923 }
2924 COPY_BUF(l,buffer,len,c);
2925 cur += l;
2926 c = CUR_SCHAR(cur, l);
2927 }
2928 buffer[len] = 0;
2929 *str = cur;
2930 return(buffer);
2931 }
2932 }
2933 *str = cur;
2934 return(xmlStrndup(buf, len));
2935}
2936
2937/**
2938 * xmlParseNmtoken:
2939 * @ctxt: an XML parser context
2940 *
2941 * parse an XML Nmtoken.
2942 *
2943 * [7] Nmtoken ::= (NameChar)+
2944 *
2945 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2946 *
2947 * Returns the Nmtoken parsed or NULL
2948 */
2949
2950xmlChar *
2951xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2952 xmlChar buf[XML_MAX_NAMELEN + 5];
2953 int len = 0, l;
2954 int c;
2955 int count = 0;
2956
2957 GROW;
2958 c = CUR_CHAR(l);
2959
William M. Brack871611b2003-10-18 04:53:14 +00002960 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002961 (c == '.') || (c == '-') ||
2962 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002963 (IS_COMBINING(c)) ||
2964 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002965 if (count++ > 100) {
2966 count = 0;
2967 GROW;
2968 }
2969 COPY_BUF(l,buf,len,c);
2970 NEXTL(l);
2971 c = CUR_CHAR(l);
2972 if (len >= XML_MAX_NAMELEN) {
2973 /*
2974 * Okay someone managed to make a huge token, so he's ready to pay
2975 * for the processing speed.
2976 */
2977 xmlChar *buffer;
2978 int max = len * 2;
2979
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002980 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002981 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002982 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002983 return(NULL);
2984 }
2985 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002986 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002987 (c == '.') || (c == '-') ||
2988 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002989 (IS_COMBINING(c)) ||
2990 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002991 if (count++ > 100) {
2992 count = 0;
2993 GROW;
2994 }
2995 if (len + 10 > max) {
2996 max *= 2;
2997 buffer = (xmlChar *) xmlRealloc(buffer,
2998 max * sizeof(xmlChar));
2999 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003000 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003001 return(NULL);
3002 }
3003 }
3004 COPY_BUF(l,buffer,len,c);
3005 NEXTL(l);
3006 c = CUR_CHAR(l);
3007 }
3008 buffer[len] = 0;
3009 return(buffer);
3010 }
3011 }
3012 if (len == 0)
3013 return(NULL);
3014 return(xmlStrndup(buf, len));
3015}
3016
3017/**
3018 * xmlParseEntityValue:
3019 * @ctxt: an XML parser context
3020 * @orig: if non-NULL store a copy of the original entity value
3021 *
3022 * parse a value for ENTITY declarations
3023 *
3024 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3025 * "'" ([^%&'] | PEReference | Reference)* "'"
3026 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003027 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003028 */
3029
3030xmlChar *
3031xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3032 xmlChar *buf = NULL;
3033 int len = 0;
3034 int size = XML_PARSER_BUFFER_SIZE;
3035 int c, l;
3036 xmlChar stop;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = NULL;
3039 xmlParserInputPtr input;
3040
3041 if (RAW == '"') stop = '"';
3042 else if (RAW == '\'') stop = '\'';
3043 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003045 return(NULL);
3046 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052
3053 /*
3054 * The content of the entity definition is copied in a buffer.
3055 */
3056
3057 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3058 input = ctxt->input;
3059 GROW;
3060 NEXT;
3061 c = CUR_CHAR(l);
3062 /*
3063 * NOTE: 4.4.5 Included in Literal
3064 * When a parameter entity reference appears in a literal entity
3065 * value, ... a single or double quote character in the replacement
3066 * text is always treated as a normal data character and will not
3067 * terminate the literal.
3068 * In practice it means we stop the loop only when back at parsing
3069 * the initial entity and the quote is found
3070 */
William M. Brack871611b2003-10-18 04:53:14 +00003071 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003072 (ctxt->input != input))) {
3073 if (len + 5 >= size) {
3074 size *= 2;
3075 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3076 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003077 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003078 return(NULL);
3079 }
3080 }
3081 COPY_BUF(l,buf,len,c);
3082 NEXTL(l);
3083 /*
3084 * Pop-up of finished entities.
3085 */
3086 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3087 xmlPopInput(ctxt);
3088
3089 GROW;
3090 c = CUR_CHAR(l);
3091 if (c == 0) {
3092 GROW;
3093 c = CUR_CHAR(l);
3094 }
3095 }
3096 buf[len] = 0;
3097
3098 /*
3099 * Raise problem w.r.t. '&' and '%' being used in non-entities
3100 * reference constructs. Note Charref will be handled in
3101 * xmlStringDecodeEntities()
3102 */
3103 cur = buf;
3104 while (*cur != 0) { /* non input consuming */
3105 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3106 xmlChar *name;
3107 xmlChar tmp = *cur;
3108
3109 cur++;
3110 name = xmlParseStringName(ctxt, &cur);
3111 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003112 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003113 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003114 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003115 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003116 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3117 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003119 }
3120 if (name != NULL)
3121 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003122 if (*cur == 0)
3123 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003124 }
3125 cur++;
3126 }
3127
3128 /*
3129 * Then PEReference entities are substituted.
3130 */
3131 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003132 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 xmlFree(buf);
3134 } else {
3135 NEXT;
3136 /*
3137 * NOTE: 4.4.7 Bypassed
3138 * When a general entity reference appears in the EntityValue in
3139 * an entity declaration, it is bypassed and left as is.
3140 * so XML_SUBSTITUTE_REF is not set here.
3141 */
3142 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3143 0, 0, 0);
3144 if (orig != NULL)
3145 *orig = buf;
3146 else
3147 xmlFree(buf);
3148 }
3149
3150 return(ret);
3151}
3152
3153/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003154 * xmlParseAttValueComplex:
3155 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003156 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003157 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003158 *
3159 * parse a value for an attribute, this is the fallback function
3160 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003161 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003162 *
3163 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3164 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003165static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003166xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003167 xmlChar limit = 0;
3168 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003169 int len = 0;
3170 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003171 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003172 xmlChar *current = NULL;
3173 xmlEntityPtr ent;
3174
Owen Taylor3473f882001-02-23 17:55:21 +00003175 if (NXT(0) == '"') {
3176 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3177 limit = '"';
3178 NEXT;
3179 } else if (NXT(0) == '\'') {
3180 limit = '\'';
3181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3182 NEXT;
3183 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003185 return(NULL);
3186 }
3187
3188 /*
3189 * allocate a translation buffer.
3190 */
3191 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003192 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003194
3195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003197 */
3198 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003199 while ((NXT(0) != limit) && /* checked */
3200 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003201 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003202 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003203 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003204 if (NXT(1) == '#') {
3205 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003206
Owen Taylor3473f882001-02-23 17:55:21 +00003207 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003208 if (ctxt->replaceEntities) {
3209 if (len > buf_size - 10) {
3210 growBuffer(buf);
3211 }
3212 buf[len++] = '&';
3213 } else {
3214 /*
3215 * The reparsing will be done in xmlStringGetNodeList()
3216 * called by the attribute() function in SAX.c
3217 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003218 if (len > buf_size - 10) {
3219 growBuffer(buf);
3220 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003221 buf[len++] = '&';
3222 buf[len++] = '#';
3223 buf[len++] = '3';
3224 buf[len++] = '8';
3225 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003226 }
3227 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
Owen Taylor3473f882001-02-23 17:55:21 +00003231 len += xmlCopyChar(0, &buf[len], val);
3232 }
3233 } else {
3234 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003235 if ((ent != NULL) &&
3236 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3237 if (len > buf_size - 10) {
3238 growBuffer(buf);
3239 }
3240 if ((ctxt->replaceEntities == 0) &&
3241 (ent->content[0] == '&')) {
3242 buf[len++] = '&';
3243 buf[len++] = '#';
3244 buf[len++] = '3';
3245 buf[len++] = '8';
3246 buf[len++] = ';';
3247 } else {
3248 buf[len++] = ent->content[0];
3249 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003250 } else if ((ent != NULL) &&
3251 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003252 xmlChar *rep;
3253
3254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3255 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003256 XML_SUBSTITUTE_REF,
3257 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 if (rep != NULL) {
3259 current = rep;
3260 while (*current != 0) { /* non input consuming */
3261 buf[len++] = *current++;
3262 if (len > buf_size - 10) {
3263 growBuffer(buf);
3264 }
3265 }
3266 xmlFree(rep);
3267 }
3268 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003269 if (len > buf_size - 10) {
3270 growBuffer(buf);
3271 }
Owen Taylor3473f882001-02-23 17:55:21 +00003272 if (ent->content != NULL)
3273 buf[len++] = ent->content[0];
3274 }
3275 } else if (ent != NULL) {
3276 int i = xmlStrlen(ent->name);
3277 const xmlChar *cur = ent->name;
3278
3279 /*
3280 * This may look absurd but is needed to detect
3281 * entities problems
3282 */
3283 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3284 (ent->content != NULL)) {
3285 xmlChar *rep;
3286 rep = xmlStringDecodeEntities(ctxt, ent->content,
3287 XML_SUBSTITUTE_REF, 0, 0, 0);
3288 if (rep != NULL)
3289 xmlFree(rep);
3290 }
3291
3292 /*
3293 * Just output the reference
3294 */
3295 buf[len++] = '&';
3296 if (len > buf_size - i - 10) {
3297 growBuffer(buf);
3298 }
3299 for (;i > 0;i--)
3300 buf[len++] = *cur++;
3301 buf[len++] = ';';
3302 }
3303 }
3304 } else {
3305 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003306 if ((len != 0) || (!normalize)) {
3307 if ((!normalize) || (!in_space)) {
3308 COPY_BUF(l,buf,len,0x20);
3309 if (len > buf_size - 10) {
3310 growBuffer(buf);
3311 }
3312 }
3313 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003316 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003317 COPY_BUF(l,buf,len,c);
3318 if (len > buf_size - 10) {
3319 growBuffer(buf);
3320 }
3321 }
3322 NEXTL(l);
3323 }
3324 GROW;
3325 c = CUR_CHAR(l);
3326 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003327 if ((in_space) && (normalize)) {
3328 while (buf[len - 1] == 0x20) len--;
3329 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003330 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003331 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003332 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003333 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003334 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3335 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003336 } else
3337 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003338 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003339 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003340
3341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003342 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003343 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003344}
3345
3346/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003347 * xmlParseAttValue:
3348 * @ctxt: an XML parser context
3349 *
3350 * parse a value for an attribute
3351 * Note: the parser won't do substitution of entities here, this
3352 * will be handled later in xmlStringGetNodeList
3353 *
3354 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3355 * "'" ([^<&'] | Reference)* "'"
3356 *
3357 * 3.3.3 Attribute-Value Normalization:
3358 * Before the value of an attribute is passed to the application or
3359 * checked for validity, the XML processor must normalize it as follows:
3360 * - a character reference is processed by appending the referenced
3361 * character to the attribute value
3362 * - an entity reference is processed by recursively processing the
3363 * replacement text of the entity
3364 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3365 * appending #x20 to the normalized value, except that only a single
3366 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3367 * parsed entity or the literal entity value of an internal parsed entity
3368 * - other characters are processed by appending them to the normalized value
3369 * If the declared value is not CDATA, then the XML processor must further
3370 * process the normalized attribute value by discarding any leading and
3371 * trailing space (#x20) characters, and by replacing sequences of space
3372 * (#x20) characters by a single space (#x20) character.
3373 * All attributes for which no declaration has been read should be treated
3374 * by a non-validating parser as if declared CDATA.
3375 *
3376 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3377 */
3378
3379
3380xmlChar *
3381xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003382 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003383}
3384
3385/**
Owen Taylor3473f882001-02-23 17:55:21 +00003386 * xmlParseSystemLiteral:
3387 * @ctxt: an XML parser context
3388 *
3389 * parse an XML Literal
3390 *
3391 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3392 *
3393 * Returns the SystemLiteral parsed or NULL
3394 */
3395
3396xmlChar *
3397xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3398 xmlChar *buf = NULL;
3399 int len = 0;
3400 int size = XML_PARSER_BUFFER_SIZE;
3401 int cur, l;
3402 xmlChar stop;
3403 int state = ctxt->instate;
3404 int count = 0;
3405
3406 SHRINK;
3407 if (RAW == '"') {
3408 NEXT;
3409 stop = '"';
3410 } else if (RAW == '\'') {
3411 NEXT;
3412 stop = '\'';
3413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003415 return(NULL);
3416 }
3417
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003418 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003419 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003420 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003421 return(NULL);
3422 }
3423 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3424 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003425 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003426 if (len + 5 >= size) {
3427 size *= 2;
3428 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3429 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003430 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003431 ctxt->instate = (xmlParserInputState) state;
3432 return(NULL);
3433 }
3434 }
3435 count++;
3436 if (count > 50) {
3437 GROW;
3438 count = 0;
3439 }
3440 COPY_BUF(l,buf,len,cur);
3441 NEXTL(l);
3442 cur = CUR_CHAR(l);
3443 if (cur == 0) {
3444 GROW;
3445 SHRINK;
3446 cur = CUR_CHAR(l);
3447 }
3448 }
3449 buf[len] = 0;
3450 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003451 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003452 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003453 } else {
3454 NEXT;
3455 }
3456 return(buf);
3457}
3458
3459/**
3460 * xmlParsePubidLiteral:
3461 * @ctxt: an XML parser context
3462 *
3463 * parse an XML public literal
3464 *
3465 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3466 *
3467 * Returns the PubidLiteral parsed or NULL.
3468 */
3469
3470xmlChar *
3471xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3472 xmlChar *buf = NULL;
3473 int len = 0;
3474 int size = XML_PARSER_BUFFER_SIZE;
3475 xmlChar cur;
3476 xmlChar stop;
3477 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003478 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003479
3480 SHRINK;
3481 if (RAW == '"') {
3482 NEXT;
3483 stop = '"';
3484 } else if (RAW == '\'') {
3485 NEXT;
3486 stop = '\'';
3487 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003489 return(NULL);
3490 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003491 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003492 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003493 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 return(NULL);
3495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003497 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003498 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (len + 1 >= size) {
3500 size *= 2;
3501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3502 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return(NULL);
3505 }
3506 }
3507 buf[len++] = cur;
3508 count++;
3509 if (count > 50) {
3510 GROW;
3511 count = 0;
3512 }
3513 NEXT;
3514 cur = CUR;
3515 if (cur == 0) {
3516 GROW;
3517 SHRINK;
3518 cur = CUR;
3519 }
3520 }
3521 buf[len] = 0;
3522 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003523 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 } else {
3525 NEXT;
3526 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003527 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003528 return(buf);
3529}
3530
Daniel Veillard48b2f892001-02-25 16:11:03 +00003531void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003532/**
3533 * xmlParseCharData:
3534 * @ctxt: an XML parser context
3535 * @cdata: int indicating whether we are within a CDATA section
3536 *
3537 * parse a CharData section.
3538 * if we are within a CDATA section ']]>' marks an end of section.
3539 *
3540 * The right angle bracket (>) may be represented using the string "&gt;",
3541 * and must, for compatibility, be escaped using "&gt;" or a character
3542 * reference when it appears in the string "]]>" in content, when that
3543 * string is not marking the end of a CDATA section.
3544 *
3545 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3546 */
3547
3548void
3549xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003550 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003551 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003552 int line = ctxt->input->line;
3553 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003554
3555 SHRINK;
3556 GROW;
3557 /*
3558 * Accelerated common case where input don't need to be
3559 * modified before passing it to the handler.
3560 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003561 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003562 in = ctxt->input->cur;
3563 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003564get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003565 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3566 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003567 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003568 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003569 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003570 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003571 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003572 ctxt->input->line++;
3573 in++;
3574 }
3575 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003576 }
3577 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003578 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003579 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003580 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003581 return;
3582 }
3583 in++;
3584 goto get_more;
3585 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003587 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003588 if ((ctxt->sax->ignorableWhitespace !=
3589 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003590 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003591 const xmlChar *tmp = ctxt->input->cur;
3592 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003593
Daniel Veillarda7374592001-05-10 14:17:55 +00003594 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003595 ctxt->sax->ignorableWhitespace(ctxt->userData,
3596 tmp, nbchar);
3597 } else if (ctxt->sax->characters != NULL)
3598 ctxt->sax->characters(ctxt->userData,
3599 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003600 line = ctxt->input->line;
3601 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003602 } else {
3603 if (ctxt->sax->characters != NULL)
3604 ctxt->sax->characters(ctxt->userData,
3605 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003606 line = ctxt->input->line;
3607 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003608 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003609 }
3610 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003611 if (*in == 0xD) {
3612 in++;
3613 if (*in == 0xA) {
3614 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003615 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003616 ctxt->input->line++;
3617 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003618 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003619 in--;
3620 }
3621 if (*in == '<') {
3622 return;
3623 }
3624 if (*in == '&') {
3625 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003626 }
3627 SHRINK;
3628 GROW;
3629 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003630 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003631 nbchar = 0;
3632 }
Daniel Veillard50582112001-03-26 22:52:16 +00003633 ctxt->input->line = line;
3634 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003635 xmlParseCharDataComplex(ctxt, cdata);
3636}
3637
Daniel Veillard01c13b52002-12-10 15:19:08 +00003638/**
3639 * xmlParseCharDataComplex:
3640 * @ctxt: an XML parser context
3641 * @cdata: int indicating whether we are within a CDATA section
3642 *
3643 * parse a CharData section.this is the fallback function
3644 * of xmlParseCharData() when the parsing requires handling
3645 * of non-ASCII characters.
3646 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003647void
3648xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3650 int nbchar = 0;
3651 int cur, l;
3652 int count = 0;
3653
3654 SHRINK;
3655 GROW;
3656 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003657 while ((cur != '<') && /* checked */
3658 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003659 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003660 if ((cur == ']') && (NXT(1) == ']') &&
3661 (NXT(2) == '>')) {
3662 if (cdata) break;
3663 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003664 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003665 }
3666 }
3667 COPY_BUF(l,buf,nbchar,cur);
3668 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003669 buf[nbchar] = 0;
3670
Owen Taylor3473f882001-02-23 17:55:21 +00003671 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003672 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003673 */
3674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3675 if (areBlanks(ctxt, buf, nbchar)) {
3676 if (ctxt->sax->ignorableWhitespace != NULL)
3677 ctxt->sax->ignorableWhitespace(ctxt->userData,
3678 buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3682 }
3683 }
3684 nbchar = 0;
3685 }
3686 count++;
3687 if (count > 50) {
3688 GROW;
3689 count = 0;
3690 }
3691 NEXTL(l);
3692 cur = CUR_CHAR(l);
3693 }
3694 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003695 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003696 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003697 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003698 */
3699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3700 if (areBlanks(ctxt, buf, nbchar)) {
3701 if (ctxt->sax->ignorableWhitespace != NULL)
3702 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3703 } else {
3704 if (ctxt->sax->characters != NULL)
3705 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3706 }
3707 }
3708 }
3709}
3710
3711/**
3712 * xmlParseExternalID:
3713 * @ctxt: an XML parser context
3714 * @publicID: a xmlChar** receiving PubidLiteral
3715 * @strict: indicate whether we should restrict parsing to only
3716 * production [75], see NOTE below
3717 *
3718 * Parse an External ID or a Public ID
3719 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003720 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003721 * 'PUBLIC' S PubidLiteral S SystemLiteral
3722 *
3723 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3724 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3725 *
3726 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3727 *
3728 * Returns the function returns SystemLiteral and in the second
3729 * case publicID receives PubidLiteral, is strict is off
3730 * it is possible to return NULL and have publicID set.
3731 */
3732
3733xmlChar *
3734xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3735 xmlChar *URI = NULL;
3736
3737 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003738
3739 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003740 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003741 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003742 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3744 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 SKIP_BLANKS;
3747 URI = xmlParseSystemLiteral(ctxt);
3748 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003749 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003750 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003751 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003752 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003753 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003755 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 SKIP_BLANKS;
3758 *publicID = xmlParsePubidLiteral(ctxt);
3759 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 if (strict) {
3763 /*
3764 * We don't handle [83] so "S SystemLiteral" is required.
3765 */
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003768 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 } else {
3771 /*
3772 * We handle [83] so we return immediately, if
3773 * "S SystemLiteral" is not detected. From a purely parsing
3774 * point of view that's a nice mess.
3775 */
3776 const xmlChar *ptr;
3777 GROW;
3778
3779 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003780 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781
William M. Brack76e95df2003-10-18 16:20:14 +00003782 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003783 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3784 }
3785 SKIP_BLANKS;
3786 URI = xmlParseSystemLiteral(ctxt);
3787 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003788 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 }
3791 return(URI);
3792}
3793
3794/**
3795 * xmlParseComment:
3796 * @ctxt: an XML parser context
3797 *
3798 * Skip an XML (SGML) comment <!-- .... -->
3799 * The spec says that "For compatibility, the string "--" (double-hyphen)
3800 * must not occur within comments. "
3801 *
3802 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3803 */
3804void
3805xmlParseComment(xmlParserCtxtPtr ctxt) {
3806 xmlChar *buf = NULL;
3807 int len;
3808 int size = XML_PARSER_BUFFER_SIZE;
3809 int q, ql;
3810 int r, rl;
3811 int cur, l;
3812 xmlParserInputState state;
3813 xmlParserInputPtr input = ctxt->input;
3814 int count = 0;
3815
3816 /*
3817 * Check that there is a comment right here.
3818 */
3819 if ((RAW != '<') || (NXT(1) != '!') ||
3820 (NXT(2) != '-') || (NXT(3) != '-')) return;
3821
3822 state = ctxt->instate;
3823 ctxt->instate = XML_PARSER_COMMENT;
3824 SHRINK;
3825 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003826 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003827 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003828 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003829 ctxt->instate = state;
3830 return;
3831 }
3832 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003833 if (q == 0)
3834 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 NEXTL(ql);
3836 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003837 if (r == 0)
3838 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003839 NEXTL(rl);
3840 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003841 if (cur == 0)
3842 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003843 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003844 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003845 ((cur != '>') ||
3846 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003847 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003848 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003849 }
3850 if (len + 5 >= size) {
3851 size *= 2;
3852 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3853 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003854 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003855 ctxt->instate = state;
3856 return;
3857 }
3858 }
3859 COPY_BUF(ql,buf,len,q);
3860 q = r;
3861 ql = rl;
3862 r = cur;
3863 rl = l;
3864
3865 count++;
3866 if (count > 50) {
3867 GROW;
3868 count = 0;
3869 }
3870 NEXTL(l);
3871 cur = CUR_CHAR(l);
3872 if (cur == 0) {
3873 SHRINK;
3874 GROW;
3875 cur = CUR_CHAR(l);
3876 }
3877 }
3878 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003879 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003880 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003881 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003882 xmlFree(buf);
3883 } else {
3884 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003885 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3886 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
3888 NEXT;
3889 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3890 (!ctxt->disableSAX))
3891 ctxt->sax->comment(ctxt->userData, buf);
3892 xmlFree(buf);
3893 }
3894 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003895 return;
3896not_terminated:
3897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3898 "Comment not terminated\n", NULL);
3899 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003900}
3901
3902/**
3903 * xmlParsePITarget:
3904 * @ctxt: an XML parser context
3905 *
3906 * parse the name of a PI
3907 *
3908 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3909 *
3910 * Returns the PITarget name or NULL
3911 */
3912
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003913const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003914xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003915 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003916
3917 name = xmlParseName(ctxt);
3918 if ((name != NULL) &&
3919 ((name[0] == 'x') || (name[0] == 'X')) &&
3920 ((name[1] == 'm') || (name[1] == 'M')) &&
3921 ((name[2] == 'l') || (name[2] == 'L'))) {
3922 int i;
3923 if ((name[0] == 'x') && (name[1] == 'm') &&
3924 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003925 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003926 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003927 return(name);
3928 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 return(name);
3931 }
3932 for (i = 0;;i++) {
3933 if (xmlW3CPIs[i] == NULL) break;
3934 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3935 return(name);
3936 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003937 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3938 "xmlParsePITarget: invalid name prefix 'xml'\n",
3939 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 }
3941 return(name);
3942}
3943
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003944#ifdef LIBXML_CATALOG_ENABLED
3945/**
3946 * xmlParseCatalogPI:
3947 * @ctxt: an XML parser context
3948 * @catalog: the PI value string
3949 *
3950 * parse an XML Catalog Processing Instruction.
3951 *
3952 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3953 *
3954 * Occurs only if allowed by the user and if happening in the Misc
3955 * part of the document before any doctype informations
3956 * This will add the given catalog to the parsing context in order
3957 * to be used if there is a resolution need further down in the document
3958 */
3959
3960static void
3961xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3962 xmlChar *URL = NULL;
3963 const xmlChar *tmp, *base;
3964 xmlChar marker;
3965
3966 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003967 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003968 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3969 goto error;
3970 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003971 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003972 if (*tmp != '=') {
3973 return;
3974 }
3975 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003976 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003977 marker = *tmp;
3978 if ((marker != '\'') && (marker != '"'))
3979 goto error;
3980 tmp++;
3981 base = tmp;
3982 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3983 if (*tmp == 0)
3984 goto error;
3985 URL = xmlStrndup(base, tmp - base);
3986 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003987 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003988 if (*tmp != 0)
3989 goto error;
3990
3991 if (URL != NULL) {
3992 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3993 xmlFree(URL);
3994 }
3995 return;
3996
3997error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003998 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3999 "Catalog PI syntax error: %s\n",
4000 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004001 if (URL != NULL)
4002 xmlFree(URL);
4003}
4004#endif
4005
Owen Taylor3473f882001-02-23 17:55:21 +00004006/**
4007 * xmlParsePI:
4008 * @ctxt: an XML parser context
4009 *
4010 * parse an XML Processing Instruction.
4011 *
4012 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4013 *
4014 * The processing is transfered to SAX once parsed.
4015 */
4016
4017void
4018xmlParsePI(xmlParserCtxtPtr ctxt) {
4019 xmlChar *buf = NULL;
4020 int len = 0;
4021 int size = XML_PARSER_BUFFER_SIZE;
4022 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004023 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004024 xmlParserInputState state;
4025 int count = 0;
4026
4027 if ((RAW == '<') && (NXT(1) == '?')) {
4028 xmlParserInputPtr input = ctxt->input;
4029 state = ctxt->instate;
4030 ctxt->instate = XML_PARSER_PI;
4031 /*
4032 * this is a Processing Instruction.
4033 */
4034 SKIP(2);
4035 SHRINK;
4036
4037 /*
4038 * Parse the target name and check for special support like
4039 * namespace.
4040 */
4041 target = xmlParsePITarget(ctxt);
4042 if (target != NULL) {
4043 if ((RAW == '?') && (NXT(1) == '>')) {
4044 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004045 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4046 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004047 }
4048 SKIP(2);
4049
4050 /*
4051 * SAX: PI detected.
4052 */
4053 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4054 (ctxt->sax->processingInstruction != NULL))
4055 ctxt->sax->processingInstruction(ctxt->userData,
4056 target, NULL);
4057 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 return;
4059 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004060 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004061 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004063 ctxt->instate = state;
4064 return;
4065 }
4066 cur = CUR;
4067 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004068 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4069 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071 SKIP_BLANKS;
4072 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004073 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004074 ((cur != '?') || (NXT(1) != '>'))) {
4075 if (len + 5 >= size) {
4076 size *= 2;
4077 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4078 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004079 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004080 ctxt->instate = state;
4081 return;
4082 }
4083 }
4084 count++;
4085 if (count > 50) {
4086 GROW;
4087 count = 0;
4088 }
4089 COPY_BUF(l,buf,len,cur);
4090 NEXTL(l);
4091 cur = CUR_CHAR(l);
4092 if (cur == 0) {
4093 SHRINK;
4094 GROW;
4095 cur = CUR_CHAR(l);
4096 }
4097 }
4098 buf[len] = 0;
4099 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004100 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4101 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004102 } else {
4103 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4105 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004106 }
4107 SKIP(2);
4108
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004109#ifdef LIBXML_CATALOG_ENABLED
4110 if (((state == XML_PARSER_MISC) ||
4111 (state == XML_PARSER_START)) &&
4112 (xmlStrEqual(target, XML_CATALOG_PI))) {
4113 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4114 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4115 (allow == XML_CATA_ALLOW_ALL))
4116 xmlParseCatalogPI(ctxt, buf);
4117 }
4118#endif
4119
4120
Owen Taylor3473f882001-02-23 17:55:21 +00004121 /*
4122 * SAX: PI detected.
4123 */
4124 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4125 (ctxt->sax->processingInstruction != NULL))
4126 ctxt->sax->processingInstruction(ctxt->userData,
4127 target, buf);
4128 }
4129 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004131 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004132 }
4133 ctxt->instate = state;
4134 }
4135}
4136
4137/**
4138 * xmlParseNotationDecl:
4139 * @ctxt: an XML parser context
4140 *
4141 * parse a notation declaration
4142 *
4143 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4144 *
4145 * Hence there is actually 3 choices:
4146 * 'PUBLIC' S PubidLiteral
4147 * 'PUBLIC' S PubidLiteral S SystemLiteral
4148 * and 'SYSTEM' S SystemLiteral
4149 *
4150 * See the NOTE on xmlParseExternalID().
4151 */
4152
4153void
4154xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004155 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 xmlChar *Pubid;
4157 xmlChar *Systemid;
4158
Daniel Veillarda07050d2003-10-19 14:46:32 +00004159 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004160 xmlParserInputPtr input = ctxt->input;
4161 SHRINK;
4162 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004163 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4165 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004166 return;
4167 }
4168 SKIP_BLANKS;
4169
Daniel Veillard76d66f42001-05-16 21:05:17 +00004170 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004171 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004172 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004173 return;
4174 }
William M. Brack76e95df2003-10-18 16:20:14 +00004175 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004177 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004178 return;
4179 }
4180 SKIP_BLANKS;
4181
4182 /*
4183 * Parse the IDs.
4184 */
4185 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4186 SKIP_BLANKS;
4187
4188 if (RAW == '>') {
4189 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004190 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4191 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 NEXT;
4194 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4195 (ctxt->sax->notationDecl != NULL))
4196 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4197 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004198 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
Owen Taylor3473f882001-02-23 17:55:21 +00004200 if (Systemid != NULL) xmlFree(Systemid);
4201 if (Pubid != NULL) xmlFree(Pubid);
4202 }
4203}
4204
4205/**
4206 * xmlParseEntityDecl:
4207 * @ctxt: an XML parser context
4208 *
4209 * parse <!ENTITY declarations
4210 *
4211 * [70] EntityDecl ::= GEDecl | PEDecl
4212 *
4213 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4214 *
4215 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4216 *
4217 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4218 *
4219 * [74] PEDef ::= EntityValue | ExternalID
4220 *
4221 * [76] NDataDecl ::= S 'NDATA' S Name
4222 *
4223 * [ VC: Notation Declared ]
4224 * The Name must match the declared name of a notation.
4225 */
4226
4227void
4228xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004229 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 xmlChar *value = NULL;
4231 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004232 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 int isParameter = 0;
4234 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004235 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004236
4237 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004238 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004239 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 SHRINK;
4241 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004242 skipped = SKIP_BLANKS;
4243 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004244 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4245 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004246 }
Owen Taylor3473f882001-02-23 17:55:21 +00004247
4248 if (RAW == '%') {
4249 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004250 skipped = SKIP_BLANKS;
4251 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4253 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 isParameter = 1;
4256 }
4257
Daniel Veillard76d66f42001-05-16 21:05:17 +00004258 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004260 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4261 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004262 return;
4263 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004264 skipped = SKIP_BLANKS;
4265 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4267 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
Owen Taylor3473f882001-02-23 17:55:21 +00004269
Daniel Veillardf5582f12002-06-11 10:08:16 +00004270 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004271 /*
4272 * handle the various case of definitions...
4273 */
4274 if (isParameter) {
4275 if ((RAW == '"') || (RAW == '\'')) {
4276 value = xmlParseEntityValue(ctxt, &orig);
4277 if (value) {
4278 if ((ctxt->sax != NULL) &&
4279 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4280 ctxt->sax->entityDecl(ctxt->userData, name,
4281 XML_INTERNAL_PARAMETER_ENTITY,
4282 NULL, NULL, value);
4283 }
4284 } else {
4285 URI = xmlParseExternalID(ctxt, &literal, 1);
4286 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 }
4289 if (URI) {
4290 xmlURIPtr uri;
4291
4292 uri = xmlParseURI((const char *) URI);
4293 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004294 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4295 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004296 /*
4297 * This really ought to be a well formedness error
4298 * but the XML Core WG decided otherwise c.f. issue
4299 * E26 of the XML erratas.
4300 */
Owen Taylor3473f882001-02-23 17:55:21 +00004301 } else {
4302 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004303 /*
4304 * Okay this is foolish to block those but not
4305 * invalid URIs.
4306 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004307 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004308 } else {
4309 if ((ctxt->sax != NULL) &&
4310 (!ctxt->disableSAX) &&
4311 (ctxt->sax->entityDecl != NULL))
4312 ctxt->sax->entityDecl(ctxt->userData, name,
4313 XML_EXTERNAL_PARAMETER_ENTITY,
4314 literal, URI, NULL);
4315 }
4316 xmlFreeURI(uri);
4317 }
4318 }
4319 }
4320 } else {
4321 if ((RAW == '"') || (RAW == '\'')) {
4322 value = xmlParseEntityValue(ctxt, &orig);
4323 if ((ctxt->sax != NULL) &&
4324 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4325 ctxt->sax->entityDecl(ctxt->userData, name,
4326 XML_INTERNAL_GENERAL_ENTITY,
4327 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004328 /*
4329 * For expat compatibility in SAX mode.
4330 */
4331 if ((ctxt->myDoc == NULL) ||
4332 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4333 if (ctxt->myDoc == NULL) {
4334 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4335 }
4336 if (ctxt->myDoc->intSubset == NULL)
4337 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4338 BAD_CAST "fake", NULL, NULL);
4339
Daniel Veillard1af9a412003-08-20 22:54:39 +00004340 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4341 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004342 }
Owen Taylor3473f882001-02-23 17:55:21 +00004343 } else {
4344 URI = xmlParseExternalID(ctxt, &literal, 1);
4345 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004346 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 if (URI) {
4349 xmlURIPtr uri;
4350
4351 uri = xmlParseURI((const char *)URI);
4352 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004353 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4354 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004355 /*
4356 * This really ought to be a well formedness error
4357 * but the XML Core WG decided otherwise c.f. issue
4358 * E26 of the XML erratas.
4359 */
Owen Taylor3473f882001-02-23 17:55:21 +00004360 } else {
4361 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004362 /*
4363 * Okay this is foolish to block those but not
4364 * invalid URIs.
4365 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004366 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004367 }
4368 xmlFreeURI(uri);
4369 }
4370 }
William M. Brack76e95df2003-10-18 16:20:14 +00004371 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4373 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004374 }
4375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004376 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004378 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004379 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4380 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004381 }
4382 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004383 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004384 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4385 (ctxt->sax->unparsedEntityDecl != NULL))
4386 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4387 literal, URI, ndata);
4388 } else {
4389 if ((ctxt->sax != NULL) &&
4390 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4391 ctxt->sax->entityDecl(ctxt->userData, name,
4392 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4393 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004394 /*
4395 * For expat compatibility in SAX mode.
4396 * assuming the entity repalcement was asked for
4397 */
4398 if ((ctxt->replaceEntities != 0) &&
4399 ((ctxt->myDoc == NULL) ||
4400 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4401 if (ctxt->myDoc == NULL) {
4402 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4403 }
4404
4405 if (ctxt->myDoc->intSubset == NULL)
4406 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4407 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004408 xmlSAX2EntityDecl(ctxt, name,
4409 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4410 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004411 }
Owen Taylor3473f882001-02-23 17:55:21 +00004412 }
4413 }
4414 }
4415 SKIP_BLANKS;
4416 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004418 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 } else {
4420 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004421 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4422 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 NEXT;
4425 }
4426 if (orig != NULL) {
4427 /*
4428 * Ugly mechanism to save the raw entity value.
4429 */
4430 xmlEntityPtr cur = NULL;
4431
4432 if (isParameter) {
4433 if ((ctxt->sax != NULL) &&
4434 (ctxt->sax->getParameterEntity != NULL))
4435 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4436 } else {
4437 if ((ctxt->sax != NULL) &&
4438 (ctxt->sax->getEntity != NULL))
4439 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004440 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004441 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004442 }
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 if (cur != NULL) {
4445 if (cur->orig != NULL)
4446 xmlFree(orig);
4447 else
4448 cur->orig = orig;
4449 } else
4450 xmlFree(orig);
4451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 if (value != NULL) xmlFree(value);
4453 if (URI != NULL) xmlFree(URI);
4454 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004455 }
4456}
4457
4458/**
4459 * xmlParseDefaultDecl:
4460 * @ctxt: an XML parser context
4461 * @value: Receive a possible fixed default value for the attribute
4462 *
4463 * Parse an attribute default declaration
4464 *
4465 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4466 *
4467 * [ VC: Required Attribute ]
4468 * if the default declaration is the keyword #REQUIRED, then the
4469 * attribute must be specified for all elements of the type in the
4470 * attribute-list declaration.
4471 *
4472 * [ VC: Attribute Default Legal ]
4473 * The declared default value must meet the lexical constraints of
4474 * the declared attribute type c.f. xmlValidateAttributeDecl()
4475 *
4476 * [ VC: Fixed Attribute Default ]
4477 * if an attribute has a default value declared with the #FIXED
4478 * keyword, instances of that attribute must match the default value.
4479 *
4480 * [ WFC: No < in Attribute Values ]
4481 * handled in xmlParseAttValue()
4482 *
4483 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4484 * or XML_ATTRIBUTE_FIXED.
4485 */
4486
4487int
4488xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4489 int val;
4490 xmlChar *ret;
4491
4492 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004493 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004494 SKIP(9);
4495 return(XML_ATTRIBUTE_REQUIRED);
4496 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004497 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004498 SKIP(8);
4499 return(XML_ATTRIBUTE_IMPLIED);
4500 }
4501 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004502 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004503 SKIP(6);
4504 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004505 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4507 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004508 }
4509 SKIP_BLANKS;
4510 }
4511 ret = xmlParseAttValue(ctxt);
4512 ctxt->instate = XML_PARSER_DTD;
4513 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004514 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004515 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004516 } else
4517 *value = ret;
4518 return(val);
4519}
4520
4521/**
4522 * xmlParseNotationType:
4523 * @ctxt: an XML parser context
4524 *
4525 * parse an Notation attribute type.
4526 *
4527 * Note: the leading 'NOTATION' S part has already being parsed...
4528 *
4529 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4530 *
4531 * [ VC: Notation Attributes ]
4532 * Values of this type must match one of the notation names included
4533 * in the declaration; all notation names in the declaration must be declared.
4534 *
4535 * Returns: the notation attribute tree built while parsing
4536 */
4537
4538xmlEnumerationPtr
4539xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004540 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004541 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4542
4543 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004544 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004545 return(NULL);
4546 }
4547 SHRINK;
4548 do {
4549 NEXT;
4550 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004551 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4554 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004555 return(ret);
4556 }
4557 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 if (cur == NULL) return(ret);
4559 if (last == NULL) ret = last = cur;
4560 else {
4561 last->next = cur;
4562 last = cur;
4563 }
4564 SKIP_BLANKS;
4565 } while (RAW == '|');
4566 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004567 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004568 if ((last != NULL) && (last != ret))
4569 xmlFreeEnumeration(last);
4570 return(ret);
4571 }
4572 NEXT;
4573 return(ret);
4574}
4575
4576/**
4577 * xmlParseEnumerationType:
4578 * @ctxt: an XML parser context
4579 *
4580 * parse an Enumeration attribute type.
4581 *
4582 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4583 *
4584 * [ VC: Enumeration ]
4585 * Values of this type must match one of the Nmtoken tokens in
4586 * the declaration
4587 *
4588 * Returns: the enumeration attribute tree built while parsing
4589 */
4590
4591xmlEnumerationPtr
4592xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4593 xmlChar *name;
4594 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4595
4596 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004597 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return(NULL);
4599 }
4600 SHRINK;
4601 do {
4602 NEXT;
4603 SKIP_BLANKS;
4604 name = xmlParseNmtoken(ctxt);
4605 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004606 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004607 return(ret);
4608 }
4609 cur = xmlCreateEnumeration(name);
4610 xmlFree(name);
4611 if (cur == NULL) return(ret);
4612 if (last == NULL) ret = last = cur;
4613 else {
4614 last->next = cur;
4615 last = cur;
4616 }
4617 SKIP_BLANKS;
4618 } while (RAW == '|');
4619 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 return(ret);
4622 }
4623 NEXT;
4624 return(ret);
4625}
4626
4627/**
4628 * xmlParseEnumeratedType:
4629 * @ctxt: an XML parser context
4630 * @tree: the enumeration tree built while parsing
4631 *
4632 * parse an Enumerated attribute type.
4633 *
4634 * [57] EnumeratedType ::= NotationType | Enumeration
4635 *
4636 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4637 *
4638 *
4639 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4640 */
4641
4642int
4643xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004644 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004645 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004646 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 return(0);
4650 }
4651 SKIP_BLANKS;
4652 *tree = xmlParseNotationType(ctxt);
4653 if (*tree == NULL) return(0);
4654 return(XML_ATTRIBUTE_NOTATION);
4655 }
4656 *tree = xmlParseEnumerationType(ctxt);
4657 if (*tree == NULL) return(0);
4658 return(XML_ATTRIBUTE_ENUMERATION);
4659}
4660
4661/**
4662 * xmlParseAttributeType:
4663 * @ctxt: an XML parser context
4664 * @tree: the enumeration tree built while parsing
4665 *
4666 * parse the Attribute list def for an element
4667 *
4668 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4669 *
4670 * [55] StringType ::= 'CDATA'
4671 *
4672 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4673 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4674 *
4675 * Validity constraints for attribute values syntax are checked in
4676 * xmlValidateAttributeValue()
4677 *
4678 * [ VC: ID ]
4679 * Values of type ID must match the Name production. A name must not
4680 * appear more than once in an XML document as a value of this type;
4681 * i.e., ID values must uniquely identify the elements which bear them.
4682 *
4683 * [ VC: One ID per Element Type ]
4684 * No element type may have more than one ID attribute specified.
4685 *
4686 * [ VC: ID Attribute Default ]
4687 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4688 *
4689 * [ VC: IDREF ]
4690 * Values of type IDREF must match the Name production, and values
4691 * of type IDREFS must match Names; each IDREF Name must match the value
4692 * of an ID attribute on some element in the XML document; i.e. IDREF
4693 * values must match the value of some ID attribute.
4694 *
4695 * [ VC: Entity Name ]
4696 * Values of type ENTITY must match the Name production, values
4697 * of type ENTITIES must match Names; each Entity Name must match the
4698 * name of an unparsed entity declared in the DTD.
4699 *
4700 * [ VC: Name Token ]
4701 * Values of type NMTOKEN must match the Nmtoken production; values
4702 * of type NMTOKENS must match Nmtokens.
4703 *
4704 * Returns the attribute type
4705 */
4706int
4707xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4708 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004709 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004710 SKIP(5);
4711 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004712 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004713 SKIP(6);
4714 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004715 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004716 SKIP(5);
4717 return(XML_ATTRIBUTE_IDREF);
4718 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4719 SKIP(2);
4720 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004721 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004722 SKIP(6);
4723 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004724 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004725 SKIP(8);
4726 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004727 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004728 SKIP(8);
4729 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004730 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004731 SKIP(7);
4732 return(XML_ATTRIBUTE_NMTOKEN);
4733 }
4734 return(xmlParseEnumeratedType(ctxt, tree));
4735}
4736
4737/**
4738 * xmlParseAttributeListDecl:
4739 * @ctxt: an XML parser context
4740 *
4741 * : parse the Attribute list def for an element
4742 *
4743 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4744 *
4745 * [53] AttDef ::= S Name S AttType S DefaultDecl
4746 *
4747 */
4748void
4749xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004750 const xmlChar *elemName;
4751 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004752 xmlEnumerationPtr tree;
4753
Daniel Veillarda07050d2003-10-19 14:46:32 +00004754 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004755 xmlParserInputPtr input = ctxt->input;
4756
4757 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004758 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004759 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004760 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004761 }
4762 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004763 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4766 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004767 return;
4768 }
4769 SKIP_BLANKS;
4770 GROW;
4771 while (RAW != '>') {
4772 const xmlChar *check = CUR_PTR;
4773 int type;
4774 int def;
4775 xmlChar *defaultValue = NULL;
4776
4777 GROW;
4778 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004779 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004780 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4782 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004783 break;
4784 }
4785 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004786 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004788 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004789 if (defaultValue != NULL)
4790 xmlFree(defaultValue);
4791 break;
4792 }
4793 SKIP_BLANKS;
4794
4795 type = xmlParseAttributeType(ctxt, &tree);
4796 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004797 if (defaultValue != NULL)
4798 xmlFree(defaultValue);
4799 break;
4800 }
4801
4802 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004803 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4805 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004806 if (defaultValue != NULL)
4807 xmlFree(defaultValue);
4808 if (tree != NULL)
4809 xmlFreeEnumeration(tree);
4810 break;
4811 }
4812 SKIP_BLANKS;
4813
4814 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4815 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004816 if (defaultValue != NULL)
4817 xmlFree(defaultValue);
4818 if (tree != NULL)
4819 xmlFreeEnumeration(tree);
4820 break;
4821 }
4822
4823 GROW;
4824 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004825 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004826 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004827 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 if (defaultValue != NULL)
4829 xmlFree(defaultValue);
4830 if (tree != NULL)
4831 xmlFreeEnumeration(tree);
4832 break;
4833 }
4834 SKIP_BLANKS;
4835 }
4836 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4838 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004839 if (defaultValue != NULL)
4840 xmlFree(defaultValue);
4841 if (tree != NULL)
4842 xmlFreeEnumeration(tree);
4843 break;
4844 }
4845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4846 (ctxt->sax->attributeDecl != NULL))
4847 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4848 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004849 else if (tree != NULL)
4850 xmlFreeEnumeration(tree);
4851
4852 if ((ctxt->sax2) && (defaultValue != NULL) &&
4853 (def != XML_ATTRIBUTE_IMPLIED) &&
4854 (def != XML_ATTRIBUTE_REQUIRED)) {
4855 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4856 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004857 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4858 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4859 }
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if (defaultValue != NULL)
4861 xmlFree(defaultValue);
4862 GROW;
4863 }
4864 if (RAW == '>') {
4865 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4867 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004868 }
4869 NEXT;
4870 }
Owen Taylor3473f882001-02-23 17:55:21 +00004871 }
4872}
4873
4874/**
4875 * xmlParseElementMixedContentDecl:
4876 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004877 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004878 *
4879 * parse the declaration for a Mixed Element content
4880 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4881 *
4882 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4883 * '(' S? '#PCDATA' S? ')'
4884 *
4885 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4886 *
4887 * [ VC: No Duplicate Types ]
4888 * The same name must not appear more than once in a single
4889 * mixed-content declaration.
4890 *
4891 * returns: the list of the xmlElementContentPtr describing the element choices
4892 */
4893xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004894xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004895 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004896 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004897
4898 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004899 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004900 SKIP(7);
4901 SKIP_BLANKS;
4902 SHRINK;
4903 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004905 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906"Element content declaration doesn't start and stop in the same entity\n",
4907 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004908 }
Owen Taylor3473f882001-02-23 17:55:21 +00004909 NEXT;
4910 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4911 if (RAW == '*') {
4912 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4913 NEXT;
4914 }
4915 return(ret);
4916 }
4917 if ((RAW == '(') || (RAW == '|')) {
4918 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4919 if (ret == NULL) return(NULL);
4920 }
4921 while (RAW == '|') {
4922 NEXT;
4923 if (elem == NULL) {
4924 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4925 if (ret == NULL) return(NULL);
4926 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004927 if (cur != NULL)
4928 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004929 cur = ret;
4930 } else {
4931 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4932 if (n == NULL) return(NULL);
4933 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004934 if (n->c1 != NULL)
4935 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004936 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004937 if (n != NULL)
4938 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004939 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 }
4941 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004942 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004944 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004945 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004946 xmlFreeElementContent(cur);
4947 return(NULL);
4948 }
4949 SKIP_BLANKS;
4950 GROW;
4951 }
4952 if ((RAW == ')') && (NXT(1) == '*')) {
4953 if (elem != NULL) {
4954 cur->c2 = xmlNewElementContent(elem,
4955 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004956 if (cur->c2 != NULL)
4957 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004958 }
4959 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004960 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004961 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4962"Element content declaration doesn't start and stop in the same entity\n",
4963 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004964 }
Owen Taylor3473f882001-02-23 17:55:21 +00004965 SKIP(2);
4966 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004967 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004968 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004969 return(NULL);
4970 }
4971
4972 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004973 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 }
4975 return(ret);
4976}
4977
4978/**
4979 * xmlParseElementChildrenContentDecl:
4980 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004981 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004982 *
4983 * parse the declaration for a Mixed Element content
4984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4985 *
4986 *
4987 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4988 *
4989 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4990 *
4991 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4992 *
4993 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4994 *
4995 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4996 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004997 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004998 * opening or closing parentheses in a choice, seq, or Mixed
4999 * construct is contained in the replacement text for a parameter
5000 * entity, both must be contained in the same replacement text. For
5001 * interoperability, if a parameter-entity reference appears in a
5002 * choice, seq, or Mixed construct, its replacement text should not
5003 * be empty, and neither the first nor last non-blank character of
5004 * the replacement text should be a connector (| or ,).
5005 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005006 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005007 * hierarchy.
5008 */
5009xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005010xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005011 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005012 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 xmlChar type = 0;
5014
5015 SKIP_BLANKS;
5016 GROW;
5017 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005018 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005019
Owen Taylor3473f882001-02-23 17:55:21 +00005020 /* Recurse on first child */
5021 NEXT;
5022 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005023 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP_BLANKS;
5025 GROW;
5026 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005027 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005028 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005029 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005030 return(NULL);
5031 }
5032 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005033 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005034 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005035 return(NULL);
5036 }
Owen Taylor3473f882001-02-23 17:55:21 +00005037 GROW;
5038 if (RAW == '?') {
5039 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5040 NEXT;
5041 } else if (RAW == '*') {
5042 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5043 NEXT;
5044 } else if (RAW == '+') {
5045 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5046 NEXT;
5047 } else {
5048 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5049 }
Owen Taylor3473f882001-02-23 17:55:21 +00005050 GROW;
5051 }
5052 SKIP_BLANKS;
5053 SHRINK;
5054 while (RAW != ')') {
5055 /*
5056 * Each loop we parse one separator and one element.
5057 */
5058 if (RAW == ',') {
5059 if (type == 0) type = CUR;
5060
5061 /*
5062 * Detect "Name | Name , Name" error
5063 */
5064 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005066 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005067 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005068 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005069 xmlFreeElementContent(last);
5070 if (ret != NULL)
5071 xmlFreeElementContent(ret);
5072 return(NULL);
5073 }
5074 NEXT;
5075
5076 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5077 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005078 if ((last != NULL) && (last != ret))
5079 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005080 xmlFreeElementContent(ret);
5081 return(NULL);
5082 }
5083 if (last == NULL) {
5084 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005085 if (ret != NULL)
5086 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005087 ret = cur = op;
5088 } else {
5089 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005090 if (op != NULL)
5091 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005093 if (last != NULL)
5094 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005095 cur =op;
5096 last = NULL;
5097 }
5098 } else if (RAW == '|') {
5099 if (type == 0) type = CUR;
5100
5101 /*
5102 * Detect "Name , Name | Name" error
5103 */
5104 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005105 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005106 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005107 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005108 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005109 xmlFreeElementContent(last);
5110 if (ret != NULL)
5111 xmlFreeElementContent(ret);
5112 return(NULL);
5113 }
5114 NEXT;
5115
5116 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5117 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005118 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005119 xmlFreeElementContent(last);
5120 if (ret != NULL)
5121 xmlFreeElementContent(ret);
5122 return(NULL);
5123 }
5124 if (last == NULL) {
5125 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005126 if (ret != NULL)
5127 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005128 ret = cur = op;
5129 } else {
5130 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005131 if (op != NULL)
5132 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005134 if (last != NULL)
5135 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005136 cur =op;
5137 last = NULL;
5138 }
5139 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005140 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005141 if (ret != NULL)
5142 xmlFreeElementContent(ret);
5143 return(NULL);
5144 }
5145 GROW;
5146 SKIP_BLANKS;
5147 GROW;
5148 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005149 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005150 /* Recurse on second child */
5151 NEXT;
5152 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005153 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 SKIP_BLANKS;
5155 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005156 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005158 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005159 if (ret != NULL)
5160 xmlFreeElementContent(ret);
5161 return(NULL);
5162 }
5163 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005164 if (RAW == '?') {
5165 last->ocur = XML_ELEMENT_CONTENT_OPT;
5166 NEXT;
5167 } else if (RAW == '*') {
5168 last->ocur = XML_ELEMENT_CONTENT_MULT;
5169 NEXT;
5170 } else if (RAW == '+') {
5171 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5172 NEXT;
5173 } else {
5174 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5175 }
5176 }
5177 SKIP_BLANKS;
5178 GROW;
5179 }
5180 if ((cur != NULL) && (last != NULL)) {
5181 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005182 if (last != NULL)
5183 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005184 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005185 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005186 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5187"Element content declaration doesn't start and stop in the same entity\n",
5188 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005189 }
Owen Taylor3473f882001-02-23 17:55:21 +00005190 NEXT;
5191 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005192 if (ret != NULL)
5193 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005194 NEXT;
5195 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005196 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005197 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005198 cur = ret;
5199 /*
5200 * Some normalization:
5201 * (a | b* | c?)* == (a | b | c)*
5202 */
5203 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5204 if ((cur->c1 != NULL) &&
5205 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5206 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5207 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5208 if ((cur->c2 != NULL) &&
5209 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5210 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5211 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5212 cur = cur->c2;
5213 }
5214 }
Owen Taylor3473f882001-02-23 17:55:21 +00005215 NEXT;
5216 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005217 if (ret != NULL) {
5218 int found = 0;
5219
Daniel Veillarde470df72001-04-18 21:41:07 +00005220 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005221 /*
5222 * Some normalization:
5223 * (a | b*)+ == (a | b)*
5224 * (a | b?)+ == (a | b)*
5225 */
5226 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5227 if ((cur->c1 != NULL) &&
5228 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5229 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5230 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5231 found = 1;
5232 }
5233 if ((cur->c2 != NULL) &&
5234 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5235 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5236 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5237 found = 1;
5238 }
5239 cur = cur->c2;
5240 }
5241 if (found)
5242 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 NEXT;
5245 }
5246 return(ret);
5247}
5248
5249/**
5250 * xmlParseElementContentDecl:
5251 * @ctxt: an XML parser context
5252 * @name: the name of the element being defined.
5253 * @result: the Element Content pointer will be stored here if any
5254 *
5255 * parse the declaration for an Element content either Mixed or Children,
5256 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5257 *
5258 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5259 *
5260 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5261 */
5262
5263int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005264xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005265 xmlElementContentPtr *result) {
5266
5267 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005269 int res;
5270
5271 *result = NULL;
5272
5273 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005274 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005275 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005276 return(-1);
5277 }
5278 NEXT;
5279 GROW;
5280 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005281 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005282 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005283 res = XML_ELEMENT_TYPE_MIXED;
5284 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005285 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 res = XML_ELEMENT_TYPE_ELEMENT;
5287 }
Owen Taylor3473f882001-02-23 17:55:21 +00005288 SKIP_BLANKS;
5289 *result = tree;
5290 return(res);
5291}
5292
5293/**
5294 * xmlParseElementDecl:
5295 * @ctxt: an XML parser context
5296 *
5297 * parse an Element declaration.
5298 *
5299 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5300 *
5301 * [ VC: Unique Element Type Declaration ]
5302 * No element type may be declared more than once
5303 *
5304 * Returns the type of the element, or -1 in case of error
5305 */
5306int
5307xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005308 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005309 int ret = -1;
5310 xmlElementContentPtr content = NULL;
5311
5312 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005313 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005314 xmlParserInputPtr input = ctxt->input;
5315
5316 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005317 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5319 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005320 }
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005326 return(-1);
5327 }
5328 while ((RAW == 0) && (ctxt->inputNr > 1))
5329 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005330 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5332 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005333 }
5334 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005335 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005336 SKIP(5);
5337 /*
5338 * Element must always be empty.
5339 */
5340 ret = XML_ELEMENT_TYPE_EMPTY;
5341 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5342 (NXT(2) == 'Y')) {
5343 SKIP(3);
5344 /*
5345 * Element is a generic container.
5346 */
5347 ret = XML_ELEMENT_TYPE_ANY;
5348 } else if (RAW == '(') {
5349 ret = xmlParseElementContentDecl(ctxt, name, &content);
5350 } else {
5351 /*
5352 * [ WFC: PEs in Internal Subset ] error handling.
5353 */
5354 if ((RAW == '%') && (ctxt->external == 0) &&
5355 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005356 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005358 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005359 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005360 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5361 }
Owen Taylor3473f882001-02-23 17:55:21 +00005362 return(-1);
5363 }
5364
5365 SKIP_BLANKS;
5366 /*
5367 * Pop-up of finished entities.
5368 */
5369 while ((RAW == 0) && (ctxt->inputNr > 1))
5370 xmlPopInput(ctxt);
5371 SKIP_BLANKS;
5372
5373 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005374 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 } else {
5376 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005377 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5378 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380
5381 NEXT;
5382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5383 (ctxt->sax->elementDecl != NULL))
5384 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5385 content);
5386 }
5387 if (content != NULL) {
5388 xmlFreeElementContent(content);
5389 }
Owen Taylor3473f882001-02-23 17:55:21 +00005390 }
5391 return(ret);
5392}
5393
5394/**
Owen Taylor3473f882001-02-23 17:55:21 +00005395 * xmlParseConditionalSections
5396 * @ctxt: an XML parser context
5397 *
5398 * [61] conditionalSect ::= includeSect | ignoreSect
5399 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5400 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5401 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5402 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5403 */
5404
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005405static void
Owen Taylor3473f882001-02-23 17:55:21 +00005406xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5407 SKIP(3);
5408 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005409 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005410 SKIP(7);
5411 SKIP_BLANKS;
5412 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005413 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005414 } else {
5415 NEXT;
5416 }
5417 if (xmlParserDebugEntities) {
5418 if ((ctxt->input != NULL) && (ctxt->input->filename))
5419 xmlGenericError(xmlGenericErrorContext,
5420 "%s(%d): ", ctxt->input->filename,
5421 ctxt->input->line);
5422 xmlGenericError(xmlGenericErrorContext,
5423 "Entering INCLUDE Conditional Section\n");
5424 }
5425
5426 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5427 (NXT(2) != '>'))) {
5428 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005429 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005430
5431 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5432 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005433 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005434 NEXT;
5435 } else if (RAW == '%') {
5436 xmlParsePEReference(ctxt);
5437 } else
5438 xmlParseMarkupDecl(ctxt);
5439
5440 /*
5441 * Pop-up of finished entities.
5442 */
5443 while ((RAW == 0) && (ctxt->inputNr > 1))
5444 xmlPopInput(ctxt);
5445
Daniel Veillardfdc91562002-07-01 21:52:03 +00005446 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005447 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005448 break;
5449 }
5450 }
5451 if (xmlParserDebugEntities) {
5452 if ((ctxt->input != NULL) && (ctxt->input->filename))
5453 xmlGenericError(xmlGenericErrorContext,
5454 "%s(%d): ", ctxt->input->filename,
5455 ctxt->input->line);
5456 xmlGenericError(xmlGenericErrorContext,
5457 "Leaving INCLUDE Conditional Section\n");
5458 }
5459
Daniel Veillarda07050d2003-10-19 14:46:32 +00005460 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005461 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005462 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005463 int depth = 0;
5464
5465 SKIP(6);
5466 SKIP_BLANKS;
5467 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 } else {
5470 NEXT;
5471 }
5472 if (xmlParserDebugEntities) {
5473 if ((ctxt->input != NULL) && (ctxt->input->filename))
5474 xmlGenericError(xmlGenericErrorContext,
5475 "%s(%d): ", ctxt->input->filename,
5476 ctxt->input->line);
5477 xmlGenericError(xmlGenericErrorContext,
5478 "Entering IGNORE Conditional Section\n");
5479 }
5480
5481 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005482 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005483 * But disable SAX event generating DTD building in the meantime
5484 */
5485 state = ctxt->disableSAX;
5486 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005488 ctxt->instate = XML_PARSER_IGNORE;
5489
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005490 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005491 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5492 depth++;
5493 SKIP(3);
5494 continue;
5495 }
5496 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5497 if (--depth >= 0) SKIP(3);
5498 continue;
5499 }
5500 NEXT;
5501 continue;
5502 }
5503
5504 ctxt->disableSAX = state;
5505 ctxt->instate = instate;
5506
5507 if (xmlParserDebugEntities) {
5508 if ((ctxt->input != NULL) && (ctxt->input->filename))
5509 xmlGenericError(xmlGenericErrorContext,
5510 "%s(%d): ", ctxt->input->filename,
5511 ctxt->input->line);
5512 xmlGenericError(xmlGenericErrorContext,
5513 "Leaving IGNORE Conditional Section\n");
5514 }
5515
5516 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519
5520 if (RAW == 0)
5521 SHRINK;
5522
5523 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005524 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005525 } else {
5526 SKIP(3);
5527 }
5528}
5529
5530/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005531 * xmlParseMarkupDecl:
5532 * @ctxt: an XML parser context
5533 *
5534 * parse Markup declarations
5535 *
5536 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5537 * NotationDecl | PI | Comment
5538 *
5539 * [ VC: Proper Declaration/PE Nesting ]
5540 * Parameter-entity replacement text must be properly nested with
5541 * markup declarations. That is to say, if either the first character
5542 * or the last character of a markup declaration (markupdecl above) is
5543 * contained in the replacement text for a parameter-entity reference,
5544 * both must be contained in the same replacement text.
5545 *
5546 * [ WFC: PEs in Internal Subset ]
5547 * In the internal DTD subset, parameter-entity references can occur
5548 * only where markup declarations can occur, not within markup declarations.
5549 * (This does not apply to references that occur in external parameter
5550 * entities or to the external subset.)
5551 */
5552void
5553xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5554 GROW;
5555 xmlParseElementDecl(ctxt);
5556 xmlParseAttributeListDecl(ctxt);
5557 xmlParseEntityDecl(ctxt);
5558 xmlParseNotationDecl(ctxt);
5559 xmlParsePI(ctxt);
5560 xmlParseComment(ctxt);
5561 /*
5562 * This is only for internal subset. On external entities,
5563 * the replacement is done before parsing stage
5564 */
5565 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5566 xmlParsePEReference(ctxt);
5567
5568 /*
5569 * Conditional sections are allowed from entities included
5570 * by PE References in the internal subset.
5571 */
5572 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5573 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5574 xmlParseConditionalSections(ctxt);
5575 }
5576 }
5577
5578 ctxt->instate = XML_PARSER_DTD;
5579}
5580
5581/**
5582 * xmlParseTextDecl:
5583 * @ctxt: an XML parser context
5584 *
5585 * parse an XML declaration header for external entities
5586 *
5587 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5588 *
5589 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5590 */
5591
5592void
5593xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5594 xmlChar *version;
5595
5596 /*
5597 * We know that '<?xml' is here.
5598 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005599 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005600 SKIP(5);
5601 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005602 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005603 return;
5604 }
5605
William M. Brack76e95df2003-10-18 16:20:14 +00005606 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5608 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005609 }
5610 SKIP_BLANKS;
5611
5612 /*
5613 * We may have the VersionInfo here.
5614 */
5615 version = xmlParseVersionInfo(ctxt);
5616 if (version == NULL)
5617 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005618 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005619 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5621 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005622 }
5623 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005624 ctxt->input->version = version;
5625
5626 /*
5627 * We must have the encoding declaration
5628 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005629 xmlParseEncodingDecl(ctxt);
5630 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5631 /*
5632 * The XML REC instructs us to stop parsing right here
5633 */
5634 return;
5635 }
5636
5637 SKIP_BLANKS;
5638 if ((RAW == '?') && (NXT(1) == '>')) {
5639 SKIP(2);
5640 } else if (RAW == '>') {
5641 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005642 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005643 NEXT;
5644 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005645 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005646 MOVETO_ENDTAG(CUR_PTR);
5647 NEXT;
5648 }
5649}
5650
5651/**
Owen Taylor3473f882001-02-23 17:55:21 +00005652 * xmlParseExternalSubset:
5653 * @ctxt: an XML parser context
5654 * @ExternalID: the external identifier
5655 * @SystemID: the system identifier (or URL)
5656 *
5657 * parse Markup declarations from an external subset
5658 *
5659 * [30] extSubset ::= textDecl? extSubsetDecl
5660 *
5661 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5662 */
5663void
5664xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5665 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005666 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005667 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005668 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005669 xmlParseTextDecl(ctxt);
5670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5671 /*
5672 * The XML REC instructs us to stop parsing right here
5673 */
5674 ctxt->instate = XML_PARSER_EOF;
5675 return;
5676 }
5677 }
5678 if (ctxt->myDoc == NULL) {
5679 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5680 }
5681 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5682 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5683
5684 ctxt->instate = XML_PARSER_DTD;
5685 ctxt->external = 1;
5686 while (((RAW == '<') && (NXT(1) == '?')) ||
5687 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005688 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005689 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005690 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005691
5692 GROW;
5693 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5694 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005695 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005696 NEXT;
5697 } else if (RAW == '%') {
5698 xmlParsePEReference(ctxt);
5699 } else
5700 xmlParseMarkupDecl(ctxt);
5701
5702 /*
5703 * Pop-up of finished entities.
5704 */
5705 while ((RAW == 0) && (ctxt->inputNr > 1))
5706 xmlPopInput(ctxt);
5707
Daniel Veillardfdc91562002-07-01 21:52:03 +00005708 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005709 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005710 break;
5711 }
5712 }
5713
5714 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005715 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 }
5717
5718}
5719
5720/**
5721 * xmlParseReference:
5722 * @ctxt: an XML parser context
5723 *
5724 * parse and handle entity references in content, depending on the SAX
5725 * interface, this may end-up in a call to character() if this is a
5726 * CharRef, a predefined entity, if there is no reference() callback.
5727 * or if the parser was asked to switch to that mode.
5728 *
5729 * [67] Reference ::= EntityRef | CharRef
5730 */
5731void
5732xmlParseReference(xmlParserCtxtPtr ctxt) {
5733 xmlEntityPtr ent;
5734 xmlChar *val;
5735 if (RAW != '&') return;
5736
5737 if (NXT(1) == '#') {
5738 int i = 0;
5739 xmlChar out[10];
5740 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005741 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005742
5743 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5744 /*
5745 * So we are using non-UTF-8 buffers
5746 * Check that the char fit on 8bits, if not
5747 * generate a CharRef.
5748 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005749 if (value <= 0xFF) {
5750 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005751 out[1] = 0;
5752 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5753 (!ctxt->disableSAX))
5754 ctxt->sax->characters(ctxt->userData, out, 1);
5755 } else {
5756 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005757 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005759 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5761 (!ctxt->disableSAX))
5762 ctxt->sax->reference(ctxt->userData, out);
5763 }
5764 } else {
5765 /*
5766 * Just encode the value in UTF-8
5767 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005768 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005769 out[i] = 0;
5770 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5771 (!ctxt->disableSAX))
5772 ctxt->sax->characters(ctxt->userData, out, i);
5773 }
5774 } else {
5775 ent = xmlParseEntityRef(ctxt);
5776 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005777 if (!ctxt->wellFormed)
5778 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005779 if ((ent->name != NULL) &&
5780 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5781 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005782 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005783
5784
5785 /*
5786 * The first reference to the entity trigger a parsing phase
5787 * where the ent->children is filled with the result from
5788 * the parsing.
5789 */
5790 if (ent->children == NULL) {
5791 xmlChar *value;
5792 value = ent->content;
5793
5794 /*
5795 * Check that this entity is well formed
5796 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005797 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005798 (value[1] == 0) && (value[0] == '<') &&
5799 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5800 /*
5801 * DONE: get definite answer on this !!!
5802 * Lots of entity decls are used to declare a single
5803 * char
5804 * <!ENTITY lt "<">
5805 * Which seems to be valid since
5806 * 2.4: The ampersand character (&) and the left angle
5807 * bracket (<) may appear in their literal form only
5808 * when used ... They are also legal within the literal
5809 * entity value of an internal entity declaration;i
5810 * see "4.3.2 Well-Formed Parsed Entities".
5811 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5812 * Looking at the OASIS test suite and James Clark
5813 * tests, this is broken. However the XML REC uses
5814 * it. Is the XML REC not well-formed ????
5815 * This is a hack to avoid this problem
5816 *
5817 * ANSWER: since lt gt amp .. are already defined,
5818 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005819 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005820 * is lousy but acceptable.
5821 */
5822 list = xmlNewDocText(ctxt->myDoc, value);
5823 if (list != NULL) {
5824 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5825 (ent->children == NULL)) {
5826 ent->children = list;
5827 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005828 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005829 list->parent = (xmlNodePtr) ent;
5830 } else {
5831 xmlFreeNodeList(list);
5832 }
5833 } else if (list != NULL) {
5834 xmlFreeNodeList(list);
5835 }
5836 } else {
5837 /*
5838 * 4.3.2: An internal general parsed entity is well-formed
5839 * if its replacement text matches the production labeled
5840 * content.
5841 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005842
5843 void *user_data;
5844 /*
5845 * This is a bit hackish but this seems the best
5846 * way to make sure both SAX and DOM entity support
5847 * behaves okay.
5848 */
5849 if (ctxt->userData == ctxt)
5850 user_data = NULL;
5851 else
5852 user_data = ctxt->userData;
5853
Owen Taylor3473f882001-02-23 17:55:21 +00005854 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5855 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005856 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5857 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 ctxt->depth--;
5859 } else if (ent->etype ==
5860 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5861 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005862 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005863 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005864 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005865 ctxt->depth--;
5866 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005867 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005868 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5869 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005870 }
5871 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005872 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005873 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005874 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005875 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5876 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005877 (ent->children == NULL)) {
5878 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005879 if (ctxt->replaceEntities) {
5880 /*
5881 * Prune it directly in the generated document
5882 * except for single text nodes.
5883 */
5884 if ((list->type == XML_TEXT_NODE) &&
5885 (list->next == NULL)) {
5886 list->parent = (xmlNodePtr) ent;
5887 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005888 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005889 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005890 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005891 while (list != NULL) {
5892 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005893 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005894 if (list->next == NULL)
5895 ent->last = list;
5896 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005897 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005898 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005899#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005900 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5901 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005902#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005903 }
5904 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005905 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005906 while (list != NULL) {
5907 list->parent = (xmlNodePtr) ent;
5908 if (list->next == NULL)
5909 ent->last = list;
5910 list = list->next;
5911 }
Owen Taylor3473f882001-02-23 17:55:21 +00005912 }
5913 } else {
5914 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005915 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005917 } else if ((ret != XML_ERR_OK) &&
5918 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005919 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005920 } else if (list != NULL) {
5921 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005922 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 }
5924 }
5925 }
5926 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5927 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5928 /*
5929 * Create a node.
5930 */
5931 ctxt->sax->reference(ctxt->userData, ent->name);
5932 return;
5933 } else if (ctxt->replaceEntities) {
5934 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5935 /*
5936 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005937 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005938 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005939 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005940 if ((list == NULL) && (ent->owner == 0)) {
5941 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005942 cur = ent->children;
5943 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005944 nw = xmlCopyNode(cur, 1);
5945 if (nw != NULL) {
5946 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005947 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005948 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005949 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005950 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005951 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005952 if (cur == ent->last)
5953 break;
5954 cur = cur->next;
5955 }
Daniel Veillard81273902003-09-30 00:43:48 +00005956#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005957 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005958 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005959#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005960 } else if (list == NULL) {
5961 xmlNodePtr nw = NULL, cur, next, last,
5962 firstChild = NULL;
5963 /*
5964 * Copy the entity child list and make it the new
5965 * entity child list. The goal is to make sure any
5966 * ID or REF referenced will be the one from the
5967 * document content and not the entity copy.
5968 */
5969 cur = ent->children;
5970 ent->children = NULL;
5971 last = ent->last;
5972 ent->last = NULL;
5973 while (cur != NULL) {
5974 next = cur->next;
5975 cur->next = NULL;
5976 cur->parent = NULL;
5977 nw = xmlCopyNode(cur, 1);
5978 if (nw != NULL) {
5979 nw->_private = cur->_private;
5980 if (firstChild == NULL){
5981 firstChild = cur;
5982 }
5983 xmlAddChild((xmlNodePtr) ent, nw);
5984 xmlAddChild(ctxt->node, cur);
5985 }
5986 if (cur == last)
5987 break;
5988 cur = next;
5989 }
5990 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005991#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005992 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5993 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005994#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005995 } else {
5996 /*
5997 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * node with a possible previous text one which
5999 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006000 */
6001 if (ent->children->type == XML_TEXT_NODE)
6002 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
6003 if ((ent->last != ent->children) &&
6004 (ent->last->type == XML_TEXT_NODE))
6005 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
6006 xmlAddChildList(ctxt->node, ent->children);
6007 }
6008
Owen Taylor3473f882001-02-23 17:55:21 +00006009 /*
6010 * This is to avoid a nasty side effect, see
6011 * characters() in SAX.c
6012 */
6013 ctxt->nodemem = 0;
6014 ctxt->nodelen = 0;
6015 return;
6016 } else {
6017 /*
6018 * Probably running in SAX mode
6019 */
6020 xmlParserInputPtr input;
6021
6022 input = xmlNewEntityInputStream(ctxt, ent);
6023 xmlPushInput(ctxt, input);
6024 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006025 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6026 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 xmlParseTextDecl(ctxt);
6028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6029 /*
6030 * The XML REC instructs us to stop parsing right here
6031 */
6032 ctxt->instate = XML_PARSER_EOF;
6033 return;
6034 }
6035 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006036 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6037 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006038 }
6039 }
6040 return;
6041 }
6042 }
6043 } else {
6044 val = ent->content;
6045 if (val == NULL) return;
6046 /*
6047 * inline the entity.
6048 */
6049 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6050 (!ctxt->disableSAX))
6051 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6052 }
6053 }
6054}
6055
6056/**
6057 * xmlParseEntityRef:
6058 * @ctxt: an XML parser context
6059 *
6060 * parse ENTITY references declarations
6061 *
6062 * [68] EntityRef ::= '&' Name ';'
6063 *
6064 * [ WFC: Entity Declared ]
6065 * In a document without any DTD, a document with only an internal DTD
6066 * subset which contains no parameter entity references, or a document
6067 * with "standalone='yes'", the Name given in the entity reference
6068 * must match that in an entity declaration, except that well-formed
6069 * documents need not declare any of the following entities: amp, lt,
6070 * gt, apos, quot. The declaration of a parameter entity must precede
6071 * any reference to it. Similarly, the declaration of a general entity
6072 * must precede any reference to it which appears in a default value in an
6073 * attribute-list declaration. Note that if entities are declared in the
6074 * external subset or in external parameter entities, a non-validating
6075 * processor is not obligated to read and process their declarations;
6076 * for such documents, the rule that an entity must be declared is a
6077 * well-formedness constraint only if standalone='yes'.
6078 *
6079 * [ WFC: Parsed Entity ]
6080 * An entity reference must not contain the name of an unparsed entity
6081 *
6082 * Returns the xmlEntityPtr if found, or NULL otherwise.
6083 */
6084xmlEntityPtr
6085xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006086 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006087 xmlEntityPtr ent = NULL;
6088
6089 GROW;
6090
6091 if (RAW == '&') {
6092 NEXT;
6093 name = xmlParseName(ctxt);
6094 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6096 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006097 } else {
6098 if (RAW == ';') {
6099 NEXT;
6100 /*
6101 * Ask first SAX for entity resolution, otherwise try the
6102 * predefined set.
6103 */
6104 if (ctxt->sax != NULL) {
6105 if (ctxt->sax->getEntity != NULL)
6106 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006107 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006108 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006109 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6110 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006111 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006112 }
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114 /*
6115 * [ WFC: Entity Declared ]
6116 * In a document without any DTD, a document with only an
6117 * internal DTD subset which contains no parameter entity
6118 * references, or a document with "standalone='yes'", the
6119 * Name given in the entity reference must match that in an
6120 * entity declaration, except that well-formed documents
6121 * need not declare any of the following entities: amp, lt,
6122 * gt, apos, quot.
6123 * The declaration of a parameter entity must precede any
6124 * reference to it.
6125 * Similarly, the declaration of a general entity must
6126 * precede any reference to it which appears in a default
6127 * value in an attribute-list declaration. Note that if
6128 * entities are declared in the external subset or in
6129 * external parameter entities, a non-validating processor
6130 * is not obligated to read and process their declarations;
6131 * for such documents, the rule that an entity must be
6132 * declared is a well-formedness constraint only if
6133 * standalone='yes'.
6134 */
6135 if (ent == NULL) {
6136 if ((ctxt->standalone == 1) ||
6137 ((ctxt->hasExternalSubset == 0) &&
6138 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006139 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006140 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006141 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006142 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006143 "Entity '%s' not defined\n", name);
6144 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006145 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006146 }
6147
6148 /*
6149 * [ WFC: Parsed Entity ]
6150 * An entity reference must not contain the name of an
6151 * unparsed entity
6152 */
6153 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006154 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006155 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
6157
6158 /*
6159 * [ WFC: No External Entity References ]
6160 * Attribute values cannot contain direct or indirect
6161 * entity references to external entities.
6162 */
6163 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6164 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006165 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6166 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
6168 /*
6169 * [ WFC: No < in Attribute Values ]
6170 * The replacement text of any entity referred to directly or
6171 * indirectly in an attribute value (other than "&lt;") must
6172 * not contain a <.
6173 */
6174 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6175 (ent != NULL) &&
6176 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6177 (ent->content != NULL) &&
6178 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006179 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006180 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006181 }
6182
6183 /*
6184 * Internal check, no parameter entities here ...
6185 */
6186 else {
6187 switch (ent->etype) {
6188 case XML_INTERNAL_PARAMETER_ENTITY:
6189 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006190 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6191 "Attempt to reference the parameter entity '%s'\n",
6192 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 break;
6194 default:
6195 break;
6196 }
6197 }
6198
6199 /*
6200 * [ WFC: No Recursion ]
6201 * A parsed entity must not contain a recursive reference
6202 * to itself, either directly or indirectly.
6203 * Done somewhere else
6204 */
6205
6206 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006207 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006208 }
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210 }
6211 return(ent);
6212}
6213
6214/**
6215 * xmlParseStringEntityRef:
6216 * @ctxt: an XML parser context
6217 * @str: a pointer to an index in the string
6218 *
6219 * parse ENTITY references declarations, but this version parses it from
6220 * a string value.
6221 *
6222 * [68] EntityRef ::= '&' Name ';'
6223 *
6224 * [ WFC: Entity Declared ]
6225 * In a document without any DTD, a document with only an internal DTD
6226 * subset which contains no parameter entity references, or a document
6227 * with "standalone='yes'", the Name given in the entity reference
6228 * must match that in an entity declaration, except that well-formed
6229 * documents need not declare any of the following entities: amp, lt,
6230 * gt, apos, quot. The declaration of a parameter entity must precede
6231 * any reference to it. Similarly, the declaration of a general entity
6232 * must precede any reference to it which appears in a default value in an
6233 * attribute-list declaration. Note that if entities are declared in the
6234 * external subset or in external parameter entities, a non-validating
6235 * processor is not obligated to read and process their declarations;
6236 * for such documents, the rule that an entity must be declared is a
6237 * well-formedness constraint only if standalone='yes'.
6238 *
6239 * [ WFC: Parsed Entity ]
6240 * An entity reference must not contain the name of an unparsed entity
6241 *
6242 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6243 * is updated to the current location in the string.
6244 */
6245xmlEntityPtr
6246xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6247 xmlChar *name;
6248 const xmlChar *ptr;
6249 xmlChar cur;
6250 xmlEntityPtr ent = NULL;
6251
6252 if ((str == NULL) || (*str == NULL))
6253 return(NULL);
6254 ptr = *str;
6255 cur = *ptr;
6256 if (cur == '&') {
6257 ptr++;
6258 cur = *ptr;
6259 name = xmlParseStringName(ctxt, &ptr);
6260 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006261 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6262 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006263 } else {
6264 if (*ptr == ';') {
6265 ptr++;
6266 /*
6267 * Ask first SAX for entity resolution, otherwise try the
6268 * predefined set.
6269 */
6270 if (ctxt->sax != NULL) {
6271 if (ctxt->sax->getEntity != NULL)
6272 ent = ctxt->sax->getEntity(ctxt->userData, name);
6273 if (ent == NULL)
6274 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006275 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006276 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006277 }
Owen Taylor3473f882001-02-23 17:55:21 +00006278 }
6279 /*
6280 * [ WFC: Entity Declared ]
6281 * In a document without any DTD, a document with only an
6282 * internal DTD subset which contains no parameter entity
6283 * references, or a document with "standalone='yes'", the
6284 * Name given in the entity reference must match that in an
6285 * entity declaration, except that well-formed documents
6286 * need not declare any of the following entities: amp, lt,
6287 * gt, apos, quot.
6288 * The declaration of a parameter entity must precede any
6289 * reference to it.
6290 * Similarly, the declaration of a general entity must
6291 * precede any reference to it which appears in a default
6292 * value in an attribute-list declaration. Note that if
6293 * entities are declared in the external subset or in
6294 * external parameter entities, a non-validating processor
6295 * is not obligated to read and process their declarations;
6296 * for such documents, the rule that an entity must be
6297 * declared is a well-formedness constraint only if
6298 * standalone='yes'.
6299 */
6300 if (ent == NULL) {
6301 if ((ctxt->standalone == 1) ||
6302 ((ctxt->hasExternalSubset == 0) &&
6303 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006304 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006305 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006307 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006308 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006309 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006311 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
6313
6314 /*
6315 * [ WFC: Parsed Entity ]
6316 * An entity reference must not contain the name of an
6317 * unparsed entity
6318 */
6319 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006320 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006321 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006322 }
6323
6324 /*
6325 * [ WFC: No External Entity References ]
6326 * Attribute values cannot contain direct or indirect
6327 * entity references to external entities.
6328 */
6329 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6330 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006331 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006332 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006333 }
6334 /*
6335 * [ WFC: No < in Attribute Values ]
6336 * The replacement text of any entity referred to directly or
6337 * indirectly in an attribute value (other than "&lt;") must
6338 * not contain a <.
6339 */
6340 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6341 (ent != NULL) &&
6342 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6343 (ent->content != NULL) &&
6344 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006345 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6346 "'<' in entity '%s' is not allowed in attributes values\n",
6347 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006348 }
6349
6350 /*
6351 * Internal check, no parameter entities here ...
6352 */
6353 else {
6354 switch (ent->etype) {
6355 case XML_INTERNAL_PARAMETER_ENTITY:
6356 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006357 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6358 "Attempt to reference the parameter entity '%s'\n",
6359 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006360 break;
6361 default:
6362 break;
6363 }
6364 }
6365
6366 /*
6367 * [ WFC: No Recursion ]
6368 * A parsed entity must not contain a recursive reference
6369 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006370 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006371 */
6372
6373 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006374 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 }
6376 xmlFree(name);
6377 }
6378 }
6379 *str = ptr;
6380 return(ent);
6381}
6382
6383/**
6384 * xmlParsePEReference:
6385 * @ctxt: an XML parser context
6386 *
6387 * parse PEReference declarations
6388 * The entity content is handled directly by pushing it's content as
6389 * a new input stream.
6390 *
6391 * [69] PEReference ::= '%' Name ';'
6392 *
6393 * [ WFC: No Recursion ]
6394 * A parsed entity must not contain a recursive
6395 * reference to itself, either directly or indirectly.
6396 *
6397 * [ WFC: Entity Declared ]
6398 * In a document without any DTD, a document with only an internal DTD
6399 * subset which contains no parameter entity references, or a document
6400 * with "standalone='yes'", ... ... The declaration of a parameter
6401 * entity must precede any reference to it...
6402 *
6403 * [ VC: Entity Declared ]
6404 * In a document with an external subset or external parameter entities
6405 * with "standalone='no'", ... ... The declaration of a parameter entity
6406 * must precede any reference to it...
6407 *
6408 * [ WFC: In DTD ]
6409 * Parameter-entity references may only appear in the DTD.
6410 * NOTE: misleading but this is handled.
6411 */
6412void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006413xmlParsePEReference(xmlParserCtxtPtr ctxt)
6414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006415 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006416 xmlEntityPtr entity = NULL;
6417 xmlParserInputPtr input;
6418
6419 if (RAW == '%') {
6420 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006421 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006422 if (name == NULL) {
6423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6424 "xmlParsePEReference: no name\n");
6425 } else {
6426 if (RAW == ';') {
6427 NEXT;
6428 if ((ctxt->sax != NULL) &&
6429 (ctxt->sax->getParameterEntity != NULL))
6430 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6431 name);
6432 if (entity == NULL) {
6433 /*
6434 * [ WFC: Entity Declared ]
6435 * In a document without any DTD, a document with only an
6436 * internal DTD subset which contains no parameter entity
6437 * references, or a document with "standalone='yes'", ...
6438 * ... The declaration of a parameter entity must precede
6439 * any reference to it...
6440 */
6441 if ((ctxt->standalone == 1) ||
6442 ((ctxt->hasExternalSubset == 0) &&
6443 (ctxt->hasPErefs == 0))) {
6444 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6445 "PEReference: %%%s; not found\n",
6446 name);
6447 } else {
6448 /*
6449 * [ VC: Entity Declared ]
6450 * In a document with an external subset or external
6451 * parameter entities with "standalone='no'", ...
6452 * ... The declaration of a parameter entity must
6453 * precede any reference to it...
6454 */
6455 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6456 "PEReference: %%%s; not found\n",
6457 name, NULL);
6458 ctxt->valid = 0;
6459 }
6460 } else {
6461 /*
6462 * Internal checking in case the entity quest barfed
6463 */
6464 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6465 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6466 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6467 "Internal: %%%s; is not a parameter entity\n",
6468 name, NULL);
6469 } else if (ctxt->input->free != deallocblankswrapper) {
6470 input =
6471 xmlNewBlanksWrapperInputStream(ctxt, entity);
6472 xmlPushInput(ctxt, input);
6473 } else {
6474 /*
6475 * TODO !!!
6476 * handle the extra spaces added before and after
6477 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6478 */
6479 input = xmlNewEntityInputStream(ctxt, entity);
6480 xmlPushInput(ctxt, input);
6481 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006482 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006483 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006484 xmlParseTextDecl(ctxt);
6485 if (ctxt->errNo ==
6486 XML_ERR_UNSUPPORTED_ENCODING) {
6487 /*
6488 * The XML REC instructs us to stop parsing
6489 * right here
6490 */
6491 ctxt->instate = XML_PARSER_EOF;
6492 return;
6493 }
6494 }
6495 }
6496 }
6497 ctxt->hasPErefs = 1;
6498 } else {
6499 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6500 }
6501 }
Owen Taylor3473f882001-02-23 17:55:21 +00006502 }
6503}
6504
6505/**
6506 * xmlParseStringPEReference:
6507 * @ctxt: an XML parser context
6508 * @str: a pointer to an index in the string
6509 *
6510 * parse PEReference declarations
6511 *
6512 * [69] PEReference ::= '%' Name ';'
6513 *
6514 * [ WFC: No Recursion ]
6515 * A parsed entity must not contain a recursive
6516 * reference to itself, either directly or indirectly.
6517 *
6518 * [ WFC: Entity Declared ]
6519 * In a document without any DTD, a document with only an internal DTD
6520 * subset which contains no parameter entity references, or a document
6521 * with "standalone='yes'", ... ... The declaration of a parameter
6522 * entity must precede any reference to it...
6523 *
6524 * [ VC: Entity Declared ]
6525 * In a document with an external subset or external parameter entities
6526 * with "standalone='no'", ... ... The declaration of a parameter entity
6527 * must precede any reference to it...
6528 *
6529 * [ WFC: In DTD ]
6530 * Parameter-entity references may only appear in the DTD.
6531 * NOTE: misleading but this is handled.
6532 *
6533 * Returns the string of the entity content.
6534 * str is updated to the current value of the index
6535 */
6536xmlEntityPtr
6537xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6538 const xmlChar *ptr;
6539 xmlChar cur;
6540 xmlChar *name;
6541 xmlEntityPtr entity = NULL;
6542
6543 if ((str == NULL) || (*str == NULL)) return(NULL);
6544 ptr = *str;
6545 cur = *ptr;
6546 if (cur == '%') {
6547 ptr++;
6548 cur = *ptr;
6549 name = xmlParseStringName(ctxt, &ptr);
6550 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006551 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6552 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006553 } else {
6554 cur = *ptr;
6555 if (cur == ';') {
6556 ptr++;
6557 cur = *ptr;
6558 if ((ctxt->sax != NULL) &&
6559 (ctxt->sax->getParameterEntity != NULL))
6560 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6561 name);
6562 if (entity == NULL) {
6563 /*
6564 * [ WFC: Entity Declared ]
6565 * In a document without any DTD, a document with only an
6566 * internal DTD subset which contains no parameter entity
6567 * references, or a document with "standalone='yes'", ...
6568 * ... The declaration of a parameter entity must precede
6569 * any reference to it...
6570 */
6571 if ((ctxt->standalone == 1) ||
6572 ((ctxt->hasExternalSubset == 0) &&
6573 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006574 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006575 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006576 } else {
6577 /*
6578 * [ VC: Entity Declared ]
6579 * In a document with an external subset or external
6580 * parameter entities with "standalone='no'", ...
6581 * ... The declaration of a parameter entity must
6582 * precede any reference to it...
6583 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006584 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6585 "PEReference: %%%s; not found\n",
6586 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006587 ctxt->valid = 0;
6588 }
6589 } else {
6590 /*
6591 * Internal checking in case the entity quest barfed
6592 */
6593 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6594 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006595 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6596 "%%%s; is not a parameter entity\n",
6597 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006598 }
6599 }
6600 ctxt->hasPErefs = 1;
6601 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006602 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604 xmlFree(name);
6605 }
6606 }
6607 *str = ptr;
6608 return(entity);
6609}
6610
6611/**
6612 * xmlParseDocTypeDecl:
6613 * @ctxt: an XML parser context
6614 *
6615 * parse a DOCTYPE declaration
6616 *
6617 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6618 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6619 *
6620 * [ VC: Root Element Type ]
6621 * The Name in the document type declaration must match the element
6622 * type of the root element.
6623 */
6624
6625void
6626xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006627 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006628 xmlChar *ExternalID = NULL;
6629 xmlChar *URI = NULL;
6630
6631 /*
6632 * We know that '<!DOCTYPE' has been detected.
6633 */
6634 SKIP(9);
6635
6636 SKIP_BLANKS;
6637
6638 /*
6639 * Parse the DOCTYPE name.
6640 */
6641 name = xmlParseName(ctxt);
6642 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6644 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006645 }
6646 ctxt->intSubName = name;
6647
6648 SKIP_BLANKS;
6649
6650 /*
6651 * Check for SystemID and ExternalID
6652 */
6653 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6654
6655 if ((URI != NULL) || (ExternalID != NULL)) {
6656 ctxt->hasExternalSubset = 1;
6657 }
6658 ctxt->extSubURI = URI;
6659 ctxt->extSubSystem = ExternalID;
6660
6661 SKIP_BLANKS;
6662
6663 /*
6664 * Create and update the internal subset.
6665 */
6666 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6667 (!ctxt->disableSAX))
6668 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6669
6670 /*
6671 * Is there any internal subset declarations ?
6672 * they are handled separately in xmlParseInternalSubset()
6673 */
6674 if (RAW == '[')
6675 return;
6676
6677 /*
6678 * We should be at the end of the DOCTYPE declaration.
6679 */
6680 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006681 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006682 }
6683 NEXT;
6684}
6685
6686/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006687 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006688 * @ctxt: an XML parser context
6689 *
6690 * parse the internal subset declaration
6691 *
6692 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6693 */
6694
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006695static void
Owen Taylor3473f882001-02-23 17:55:21 +00006696xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6697 /*
6698 * Is there any DTD definition ?
6699 */
6700 if (RAW == '[') {
6701 ctxt->instate = XML_PARSER_DTD;
6702 NEXT;
6703 /*
6704 * Parse the succession of Markup declarations and
6705 * PEReferences.
6706 * Subsequence (markupdecl | PEReference | S)*
6707 */
6708 while (RAW != ']') {
6709 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006710 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006711
6712 SKIP_BLANKS;
6713 xmlParseMarkupDecl(ctxt);
6714 xmlParsePEReference(ctxt);
6715
6716 /*
6717 * Pop-up of finished entities.
6718 */
6719 while ((RAW == 0) && (ctxt->inputNr > 1))
6720 xmlPopInput(ctxt);
6721
6722 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006723 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006724 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006725 break;
6726 }
6727 }
6728 if (RAW == ']') {
6729 NEXT;
6730 SKIP_BLANKS;
6731 }
6732 }
6733
6734 /*
6735 * We should be at the end of the DOCTYPE declaration.
6736 */
6737 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006738 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006739 }
6740 NEXT;
6741}
6742
Daniel Veillard81273902003-09-30 00:43:48 +00006743#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006744/**
6745 * xmlParseAttribute:
6746 * @ctxt: an XML parser context
6747 * @value: a xmlChar ** used to store the value of the attribute
6748 *
6749 * parse an attribute
6750 *
6751 * [41] Attribute ::= Name Eq AttValue
6752 *
6753 * [ WFC: No External Entity References ]
6754 * Attribute values cannot contain direct or indirect entity references
6755 * to external entities.
6756 *
6757 * [ WFC: No < in Attribute Values ]
6758 * The replacement text of any entity referred to directly or indirectly in
6759 * an attribute value (other than "&lt;") must not contain a <.
6760 *
6761 * [ VC: Attribute Value Type ]
6762 * The attribute must have been declared; the value must be of the type
6763 * declared for it.
6764 *
6765 * [25] Eq ::= S? '=' S?
6766 *
6767 * With namespace:
6768 *
6769 * [NS 11] Attribute ::= QName Eq AttValue
6770 *
6771 * Also the case QName == xmlns:??? is handled independently as a namespace
6772 * definition.
6773 *
6774 * Returns the attribute name, and the value in *value.
6775 */
6776
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006777const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006778xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006779 const xmlChar *name;
6780 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006781
6782 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006783 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006784 name = xmlParseName(ctxt);
6785 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006786 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006787 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006788 return(NULL);
6789 }
6790
6791 /*
6792 * read the value
6793 */
6794 SKIP_BLANKS;
6795 if (RAW == '=') {
6796 NEXT;
6797 SKIP_BLANKS;
6798 val = xmlParseAttValue(ctxt);
6799 ctxt->instate = XML_PARSER_CONTENT;
6800 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006801 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006802 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006803 return(NULL);
6804 }
6805
6806 /*
6807 * Check that xml:lang conforms to the specification
6808 * No more registered as an error, just generate a warning now
6809 * since this was deprecated in XML second edition
6810 */
6811 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6812 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006813 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6814 "Malformed value for xml:lang : %s\n",
6815 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006816 }
6817 }
6818
6819 /*
6820 * Check that xml:space conforms to the specification
6821 */
6822 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6823 if (xmlStrEqual(val, BAD_CAST "default"))
6824 *(ctxt->space) = 0;
6825 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6826 *(ctxt->space) = 1;
6827 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006828 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006829"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006830 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 }
6832 }
6833
6834 *value = val;
6835 return(name);
6836}
6837
6838/**
6839 * xmlParseStartTag:
6840 * @ctxt: an XML parser context
6841 *
6842 * parse a start of tag either for rule element or
6843 * EmptyElement. In both case we don't parse the tag closing chars.
6844 *
6845 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6846 *
6847 * [ WFC: Unique Att Spec ]
6848 * No attribute name may appear more than once in the same start-tag or
6849 * empty-element tag.
6850 *
6851 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6852 *
6853 * [ WFC: Unique Att Spec ]
6854 * No attribute name may appear more than once in the same start-tag or
6855 * empty-element tag.
6856 *
6857 * With namespace:
6858 *
6859 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6860 *
6861 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6862 *
6863 * Returns the element name parsed
6864 */
6865
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006866const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006867xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006868 const xmlChar *name;
6869 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006871 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006872 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006873 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006874 int i;
6875
6876 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006877 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006878
6879 name = xmlParseName(ctxt);
6880 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006881 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006882 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006883 return(NULL);
6884 }
6885
6886 /*
6887 * Now parse the attributes, it ends up with the ending
6888 *
6889 * (S Attribute)* S?
6890 */
6891 SKIP_BLANKS;
6892 GROW;
6893
Daniel Veillard21a0f912001-02-25 19:54:14 +00006894 while ((RAW != '>') &&
6895 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006896 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006897 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006898 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006899
6900 attname = xmlParseAttribute(ctxt, &attvalue);
6901 if ((attname != NULL) && (attvalue != NULL)) {
6902 /*
6903 * [ WFC: Unique Att Spec ]
6904 * No attribute name may appear more than once in the same
6905 * start-tag or empty-element tag.
6906 */
6907 for (i = 0; i < nbatts;i += 2) {
6908 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006909 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006910 xmlFree(attvalue);
6911 goto failed;
6912 }
6913 }
Owen Taylor3473f882001-02-23 17:55:21 +00006914 /*
6915 * Add the pair to atts
6916 */
6917 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006918 maxatts = 22; /* allow for 10 attrs by default */
6919 atts = (const xmlChar **)
6920 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006921 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006922 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006923 if (attvalue != NULL)
6924 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006925 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006926 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006927 ctxt->atts = atts;
6928 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006929 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006930 const xmlChar **n;
6931
Owen Taylor3473f882001-02-23 17:55:21 +00006932 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006933 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006934 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006935 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006936 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006937 if (attvalue != NULL)
6938 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006939 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006940 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006941 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006942 ctxt->atts = atts;
6943 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006944 }
6945 atts[nbatts++] = attname;
6946 atts[nbatts++] = attvalue;
6947 atts[nbatts] = NULL;
6948 atts[nbatts + 1] = NULL;
6949 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006950 if (attvalue != NULL)
6951 xmlFree(attvalue);
6952 }
6953
6954failed:
6955
Daniel Veillard3772de32002-12-17 10:31:45 +00006956 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006957 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6958 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006959 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006960 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6961 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006962 }
6963 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006964 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6965 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006966 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6967 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006968 break;
6969 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006970 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006971 GROW;
6972 }
6973
6974 /*
6975 * SAX: Start of Element !
6976 */
6977 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006978 (!ctxt->disableSAX)) {
6979 if (nbatts > 0)
6980 ctxt->sax->startElement(ctxt->userData, name, atts);
6981 else
6982 ctxt->sax->startElement(ctxt->userData, name, NULL);
6983 }
Owen Taylor3473f882001-02-23 17:55:21 +00006984
6985 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006986 /* Free only the content strings */
6987 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006988 if (atts[i] != NULL)
6989 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006990 }
6991 return(name);
6992}
6993
6994/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006995 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006996 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006997 * @line: line of the start tag
6998 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006999 *
7000 * parse an end of tag
7001 *
7002 * [42] ETag ::= '</' Name S? '>'
7003 *
7004 * With namespace
7005 *
7006 * [NS 9] ETag ::= '</' QName S? '>'
7007 */
7008
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007009static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007010xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007011 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007012
7013 GROW;
7014 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007015 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007016 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007017 return;
7018 }
7019 SKIP(2);
7020
Daniel Veillard46de64e2002-05-29 08:21:33 +00007021 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007022
7023 /*
7024 * We should definitely be at the ending "S? '>'" part
7025 */
7026 GROW;
7027 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007028 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007029 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007030 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007031 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007032
7033 /*
7034 * [ WFC: Element Type Match ]
7035 * The Name in an element's end-tag must match the element type in the
7036 * start-tag.
7037 *
7038 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007039 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007040 if (name == NULL) name = BAD_CAST "unparseable";
7041 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007042 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007043 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007044 }
7045
7046 /*
7047 * SAX: End of Tag
7048 */
7049 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7050 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007051 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007052
Daniel Veillarde57ec792003-09-10 10:50:59 +00007053 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007054 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007055 return;
7056}
7057
7058/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007059 * xmlParseEndTag:
7060 * @ctxt: an XML parser context
7061 *
7062 * parse an end of tag
7063 *
7064 * [42] ETag ::= '</' Name S? '>'
7065 *
7066 * With namespace
7067 *
7068 * [NS 9] ETag ::= '</' QName S? '>'
7069 */
7070
7071void
7072xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007073 xmlParseEndTag1(ctxt, 0);
7074}
Daniel Veillard81273902003-09-30 00:43:48 +00007075#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007076
7077/************************************************************************
7078 * *
7079 * SAX 2 specific operations *
7080 * *
7081 ************************************************************************/
7082
7083static const xmlChar *
7084xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7085 int len = 0, l;
7086 int c;
7087 int count = 0;
7088
7089 /*
7090 * Handler for more complex cases
7091 */
7092 GROW;
7093 c = CUR_CHAR(l);
7094 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007095 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007096 return(NULL);
7097 }
7098
7099 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007100 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007101 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007102 (IS_COMBINING(c)) ||
7103 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007104 if (count++ > 100) {
7105 count = 0;
7106 GROW;
7107 }
7108 len += l;
7109 NEXTL(l);
7110 c = CUR_CHAR(l);
7111 }
7112 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7113}
7114
7115/*
7116 * xmlGetNamespace:
7117 * @ctxt: an XML parser context
7118 * @prefix: the prefix to lookup
7119 *
7120 * Lookup the namespace name for the @prefix (which ca be NULL)
7121 * The prefix must come from the @ctxt->dict dictionnary
7122 *
7123 * Returns the namespace name or NULL if not bound
7124 */
7125static const xmlChar *
7126xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7127 int i;
7128
Daniel Veillarde57ec792003-09-10 10:50:59 +00007129 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007130 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007131 if (ctxt->nsTab[i] == prefix) {
7132 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7133 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007134 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007135 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007136 return(NULL);
7137}
7138
7139/**
7140 * xmlParseNCName:
7141 * @ctxt: an XML parser context
7142 *
7143 * parse an XML name.
7144 *
7145 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7146 * CombiningChar | Extender
7147 *
7148 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7149 *
7150 * Returns the Name parsed or NULL
7151 */
7152
7153static const xmlChar *
7154xmlParseNCName(xmlParserCtxtPtr ctxt) {
7155 const xmlChar *in;
7156 const xmlChar *ret;
7157 int count = 0;
7158
7159 /*
7160 * Accelerator for simple ASCII names
7161 */
7162 in = ctxt->input->cur;
7163 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7164 ((*in >= 0x41) && (*in <= 0x5A)) ||
7165 (*in == '_')) {
7166 in++;
7167 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7168 ((*in >= 0x41) && (*in <= 0x5A)) ||
7169 ((*in >= 0x30) && (*in <= 0x39)) ||
7170 (*in == '_') || (*in == '-') ||
7171 (*in == '.'))
7172 in++;
7173 if ((*in > 0) && (*in < 0x80)) {
7174 count = in - ctxt->input->cur;
7175 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7176 ctxt->input->cur = in;
7177 ctxt->nbChars += count;
7178 ctxt->input->col += count;
7179 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007180 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007181 }
7182 return(ret);
7183 }
7184 }
7185 return(xmlParseNCNameComplex(ctxt));
7186}
7187
7188/**
7189 * xmlParseQName:
7190 * @ctxt: an XML parser context
7191 * @prefix: pointer to store the prefix part
7192 *
7193 * parse an XML Namespace QName
7194 *
7195 * [6] QName ::= (Prefix ':')? LocalPart
7196 * [7] Prefix ::= NCName
7197 * [8] LocalPart ::= NCName
7198 *
7199 * Returns the Name parsed or NULL
7200 */
7201
7202static const xmlChar *
7203xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7204 const xmlChar *l, *p;
7205
7206 GROW;
7207
7208 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007209 if (l == NULL) {
7210 if (CUR == ':') {
7211 l = xmlParseName(ctxt);
7212 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007213 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7214 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007215 *prefix = NULL;
7216 return(l);
7217 }
7218 }
7219 return(NULL);
7220 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007221 if (CUR == ':') {
7222 NEXT;
7223 p = l;
7224 l = xmlParseNCName(ctxt);
7225 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007226 xmlChar *tmp;
7227
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007228 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7229 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007230 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7231 p = xmlDictLookup(ctxt->dict, tmp, -1);
7232 if (tmp != NULL) xmlFree(tmp);
7233 *prefix = NULL;
7234 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007235 }
7236 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007237 xmlChar *tmp;
7238
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007239 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7240 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007241 NEXT;
7242 tmp = (xmlChar *) xmlParseName(ctxt);
7243 if (tmp != NULL) {
7244 tmp = xmlBuildQName(tmp, l, NULL, 0);
7245 l = xmlDictLookup(ctxt->dict, tmp, -1);
7246 if (tmp != NULL) xmlFree(tmp);
7247 *prefix = p;
7248 return(l);
7249 }
7250 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7251 l = xmlDictLookup(ctxt->dict, tmp, -1);
7252 if (tmp != NULL) xmlFree(tmp);
7253 *prefix = p;
7254 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007255 }
7256 *prefix = p;
7257 } else
7258 *prefix = NULL;
7259 return(l);
7260}
7261
7262/**
7263 * xmlParseQNameAndCompare:
7264 * @ctxt: an XML parser context
7265 * @name: the localname
7266 * @prefix: the prefix, if any.
7267 *
7268 * parse an XML name and compares for match
7269 * (specialized for endtag parsing)
7270 *
7271 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7272 * and the name for mismatch
7273 */
7274
7275static const xmlChar *
7276xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7277 xmlChar const *prefix) {
7278 const xmlChar *cmp = name;
7279 const xmlChar *in;
7280 const xmlChar *ret;
7281 const xmlChar *prefix2;
7282
7283 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7284
7285 GROW;
7286 in = ctxt->input->cur;
7287
7288 cmp = prefix;
7289 while (*in != 0 && *in == *cmp) {
7290 ++in;
7291 ++cmp;
7292 }
7293 if ((*cmp == 0) && (*in == ':')) {
7294 in++;
7295 cmp = name;
7296 while (*in != 0 && *in == *cmp) {
7297 ++in;
7298 ++cmp;
7299 }
William M. Brack76e95df2003-10-18 16:20:14 +00007300 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007301 /* success */
7302 ctxt->input->cur = in;
7303 return((const xmlChar*) 1);
7304 }
7305 }
7306 /*
7307 * all strings coms from the dictionary, equality can be done directly
7308 */
7309 ret = xmlParseQName (ctxt, &prefix2);
7310 if ((ret == name) && (prefix == prefix2))
7311 return((const xmlChar*) 1);
7312 return ret;
7313}
7314
7315/**
7316 * xmlParseAttValueInternal:
7317 * @ctxt: an XML parser context
7318 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007319 * @alloc: whether the attribute was reallocated as a new string
7320 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007321 *
7322 * parse a value for an attribute.
7323 * NOTE: if no normalization is needed, the routine will return pointers
7324 * directly from the data buffer.
7325 *
7326 * 3.3.3 Attribute-Value Normalization:
7327 * Before the value of an attribute is passed to the application or
7328 * checked for validity, the XML processor must normalize it as follows:
7329 * - a character reference is processed by appending the referenced
7330 * character to the attribute value
7331 * - an entity reference is processed by recursively processing the
7332 * replacement text of the entity
7333 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7334 * appending #x20 to the normalized value, except that only a single
7335 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7336 * parsed entity or the literal entity value of an internal parsed entity
7337 * - other characters are processed by appending them to the normalized value
7338 * If the declared value is not CDATA, then the XML processor must further
7339 * process the normalized attribute value by discarding any leading and
7340 * trailing space (#x20) characters, and by replacing sequences of space
7341 * (#x20) characters by a single space (#x20) character.
7342 * All attributes for which no declaration has been read should be treated
7343 * by a non-validating parser as if declared CDATA.
7344 *
7345 * Returns the AttValue parsed or NULL. The value has to be freed by the
7346 * caller if it was copied, this can be detected by val[*len] == 0.
7347 */
7348
7349static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007350xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7351 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007352{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007353 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007354 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007355 xmlChar *ret = NULL;
7356
7357 GROW;
7358 in = (xmlChar *) CUR_PTR;
7359 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007360 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007361 return (NULL);
7362 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007363 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007364
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007365 /*
7366 * try to handle in this routine the most common case where no
7367 * allocation of a new string is required and where content is
7368 * pure ASCII.
7369 */
7370 limit = *in++;
7371 end = ctxt->input->end;
7372 start = in;
7373 if (in >= end) {
7374 const xmlChar *oldbase = ctxt->input->base;
7375 GROW;
7376 if (oldbase != ctxt->input->base) {
7377 long delta = ctxt->input->base - oldbase;
7378 start = start + delta;
7379 in = in + delta;
7380 }
7381 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007382 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007383 if (normalize) {
7384 /*
7385 * Skip any leading spaces
7386 */
7387 while ((in < end) && (*in != limit) &&
7388 ((*in == 0x20) || (*in == 0x9) ||
7389 (*in == 0xA) || (*in == 0xD))) {
7390 in++;
7391 start = in;
7392 if (in >= end) {
7393 const xmlChar *oldbase = ctxt->input->base;
7394 GROW;
7395 if (oldbase != ctxt->input->base) {
7396 long delta = ctxt->input->base - oldbase;
7397 start = start + delta;
7398 in = in + delta;
7399 }
7400 end = ctxt->input->end;
7401 }
7402 }
7403 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7404 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7405 if ((*in++ == 0x20) && (*in == 0x20)) break;
7406 if (in >= end) {
7407 const xmlChar *oldbase = ctxt->input->base;
7408 GROW;
7409 if (oldbase != ctxt->input->base) {
7410 long delta = ctxt->input->base - oldbase;
7411 start = start + delta;
7412 in = in + delta;
7413 }
7414 end = ctxt->input->end;
7415 }
7416 }
7417 last = in;
7418 /*
7419 * skip the trailing blanks
7420 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007421 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007422 while ((in < end) && (*in != limit) &&
7423 ((*in == 0x20) || (*in == 0x9) ||
7424 (*in == 0xA) || (*in == 0xD))) {
7425 in++;
7426 if (in >= end) {
7427 const xmlChar *oldbase = ctxt->input->base;
7428 GROW;
7429 if (oldbase != ctxt->input->base) {
7430 long delta = ctxt->input->base - oldbase;
7431 start = start + delta;
7432 in = in + delta;
7433 last = last + delta;
7434 }
7435 end = ctxt->input->end;
7436 }
7437 }
7438 if (*in != limit) goto need_complex;
7439 } else {
7440 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7441 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7442 in++;
7443 if (in >= end) {
7444 const xmlChar *oldbase = ctxt->input->base;
7445 GROW;
7446 if (oldbase != ctxt->input->base) {
7447 long delta = ctxt->input->base - oldbase;
7448 start = start + delta;
7449 in = in + delta;
7450 }
7451 end = ctxt->input->end;
7452 }
7453 }
7454 last = in;
7455 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007456 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007457 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007458 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007459 *len = last - start;
7460 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007462 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007463 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007464 }
7465 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007466 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007467 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007468need_complex:
7469 if (alloc) *alloc = 1;
7470 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007471}
7472
7473/**
7474 * xmlParseAttribute2:
7475 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007476 * @pref: the element prefix
7477 * @elem: the element name
7478 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007480 * @len: an int * to save the length of the attribute
7481 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007482 *
7483 * parse an attribute in the new SAX2 framework.
7484 *
7485 * Returns the attribute name, and the value in *value, .
7486 */
7487
7488static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007489xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7490 const xmlChar *pref, const xmlChar *elem,
7491 const xmlChar **prefix, xmlChar **value,
7492 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007493 const xmlChar *name;
7494 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007495 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007496
7497 *value = NULL;
7498 GROW;
7499 name = xmlParseQName(ctxt, prefix);
7500 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007501 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7502 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007503 return(NULL);
7504 }
7505
7506 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007507 * get the type if needed
7508 */
7509 if (ctxt->attsSpecial != NULL) {
7510 int type;
7511
7512 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7513 pref, elem, *prefix, name);
7514 if (type != 0) normalize = 1;
7515 }
7516
7517 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 * read the value
7519 */
7520 SKIP_BLANKS;
7521 if (RAW == '=') {
7522 NEXT;
7523 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007524 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 ctxt->instate = XML_PARSER_CONTENT;
7526 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007527 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007528 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007529 return(NULL);
7530 }
7531
7532 /*
7533 * Check that xml:lang conforms to the specification
7534 * No more registered as an error, just generate a warning now
7535 * since this was deprecated in XML second edition
7536 */
7537 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7538 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007539 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7540 "Malformed value for xml:lang : %s\n",
7541 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 }
7543 }
7544
7545 /*
7546 * Check that xml:space conforms to the specification
7547 */
7548 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7549 if (xmlStrEqual(val, BAD_CAST "default"))
7550 *(ctxt->space) = 0;
7551 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7552 *(ctxt->space) = 1;
7553 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007554 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007555"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7556 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 }
7558 }
7559
7560 *value = val;
7561 return(name);
7562}
7563
7564/**
7565 * xmlParseStartTag2:
7566 * @ctxt: an XML parser context
7567 *
7568 * parse a start of tag either for rule element or
7569 * EmptyElement. In both case we don't parse the tag closing chars.
7570 * This routine is called when running SAX2 parsing
7571 *
7572 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7573 *
7574 * [ WFC: Unique Att Spec ]
7575 * No attribute name may appear more than once in the same start-tag or
7576 * empty-element tag.
7577 *
7578 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7579 *
7580 * [ WFC: Unique Att Spec ]
7581 * No attribute name may appear more than once in the same start-tag or
7582 * empty-element tag.
7583 *
7584 * With namespace:
7585 *
7586 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7587 *
7588 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7589 *
7590 * Returns the element name parsed
7591 */
7592
7593static const xmlChar *
7594xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7595 const xmlChar **URI) {
7596 const xmlChar *localname;
7597 const xmlChar *prefix;
7598 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007599 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007600 const xmlChar *nsname;
7601 xmlChar *attvalue;
7602 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007603 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007604 int nratts, nbatts, nbdef;
7605 int i, j, nbNs, attval;
7606 const xmlChar *base;
7607 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007608
7609 if (RAW != '<') return(NULL);
7610 NEXT1;
7611
7612 /*
7613 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7614 * point since the attribute values may be stored as pointers to
7615 * the buffer and calling SHRINK would destroy them !
7616 * The Shrinking is only possible once the full set of attribute
7617 * callbacks have been done.
7618 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007620 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007621 base = ctxt->input->base;
7622 cur = ctxt->input->cur - ctxt->input->base;
7623 nbatts = 0;
7624 nratts = 0;
7625 nbdef = 0;
7626 nbNs = 0;
7627 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007628
7629 localname = xmlParseQName(ctxt, &prefix);
7630 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007631 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7632 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 return(NULL);
7634 }
7635
7636 /*
7637 * Now parse the attributes, it ends up with the ending
7638 *
7639 * (S Attribute)* S?
7640 */
7641 SKIP_BLANKS;
7642 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007643 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644
7645 while ((RAW != '>') &&
7646 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007647 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007648 const xmlChar *q = CUR_PTR;
7649 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007650 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007652 attname = xmlParseAttribute2(ctxt, prefix, localname,
7653 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654 if ((attname != NULL) && (attvalue != NULL)) {
7655 if (len < 0) len = xmlStrlen(attvalue);
7656 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007657 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7658 xmlURIPtr uri;
7659
7660 if (*URL != 0) {
7661 uri = xmlParseURI((const char *) URL);
7662 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007663 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7664 "xmlns: %s not a valid URI\n",
7665 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007666 } else {
7667 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007668 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7669 "xmlns: URI %s is not absolute\n",
7670 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 }
7672 xmlFreeURI(uri);
7673 }
7674 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007675 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007676 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007677 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007678 for (j = 1;j <= nbNs;j++)
7679 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7680 break;
7681 if (j <= nbNs)
7682 xmlErrAttributeDup(ctxt, NULL, attname);
7683 else
7684 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007685 if (alloc != 0) xmlFree(attvalue);
7686 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007687 continue;
7688 }
7689 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7691 xmlURIPtr uri;
7692
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007693 if (attname == ctxt->str_xml) {
7694 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007695 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7696 "xml namespace prefix mapped to wrong URI\n",
7697 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007698 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007699 /*
7700 * Do not keep a namespace definition node
7701 */
7702 if (alloc != 0) xmlFree(attvalue);
7703 SKIP_BLANKS;
7704 continue;
7705 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007706 uri = xmlParseURI((const char *) URL);
7707 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007708 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7709 "xmlns:%s: '%s' is not a valid URI\n",
7710 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007711 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007712 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007713 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7714 "xmlns:%s: URI %s is not absolute\n",
7715 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007716 }
7717 xmlFreeURI(uri);
7718 }
7719
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007721 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007722 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007723 for (j = 1;j <= nbNs;j++)
7724 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7725 break;
7726 if (j <= nbNs)
7727 xmlErrAttributeDup(ctxt, aprefix, attname);
7728 else
7729 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007730 if (alloc != 0) xmlFree(attvalue);
7731 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 continue;
7733 }
7734
7735 /*
7736 * Add the pair to atts
7737 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007738 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7739 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007740 if (attvalue[len] == 0)
7741 xmlFree(attvalue);
7742 goto failed;
7743 }
7744 maxatts = ctxt->maxatts;
7745 atts = ctxt->atts;
7746 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007747 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 atts[nbatts++] = attname;
7749 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007750 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751 atts[nbatts++] = attvalue;
7752 attvalue += len;
7753 atts[nbatts++] = attvalue;
7754 /*
7755 * tag if some deallocation is needed
7756 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007757 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007758 } else {
7759 if ((attvalue != NULL) && (attvalue[len] == 0))
7760 xmlFree(attvalue);
7761 }
7762
7763failed:
7764
7765 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007767 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7768 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007769 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007770 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7771 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 }
7773 SKIP_BLANKS;
7774 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7775 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007776 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007777 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007778 break;
7779 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007781 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 }
7783
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007785 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007786 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007787 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007788 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7789 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007790 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007791 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007792 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007794 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007795 /*
7796 * [ WFC: Unique Att Spec ]
7797 * No attribute name may appear more than once in the same
7798 * start-tag or empty-element tag.
7799 * As extended by the Namespace in XML REC.
7800 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007801 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802 if (atts[i] == atts[j]) {
7803 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007804 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007805 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007807 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007808 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007809 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007810 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007811 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 }
7813 }
7814 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007815 }
7816
7817 /*
7818 * The attributes defaulting
7819 */
7820 if (ctxt->attsDefault != NULL) {
7821 xmlDefAttrsPtr defaults;
7822
7823 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7824 if (defaults != NULL) {
7825 for (i = 0;i < defaults->nbAttrs;i++) {
7826 attname = defaults->values[4 * i];
7827 aprefix = defaults->values[4 * i + 1];
7828
7829 /*
7830 * special work for namespaces defaulted defs
7831 */
7832 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7833 /*
7834 * check that it's not a defined namespace
7835 */
7836 for (j = 1;j <= nbNs;j++)
7837 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7838 break;
7839 if (j <= nbNs) continue;
7840
7841 nsname = xmlGetNamespace(ctxt, NULL);
7842 if (nsname != defaults->values[4 * i + 2]) {
7843 if (nsPush(ctxt, NULL,
7844 defaults->values[4 * i + 2]) > 0)
7845 nbNs++;
7846 }
7847 } else if (aprefix == ctxt->str_xmlns) {
7848 /*
7849 * check that it's not a defined namespace
7850 */
7851 for (j = 1;j <= nbNs;j++)
7852 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7853 break;
7854 if (j <= nbNs) continue;
7855
7856 nsname = xmlGetNamespace(ctxt, attname);
7857 if (nsname != defaults->values[2]) {
7858 if (nsPush(ctxt, attname,
7859 defaults->values[4 * i + 2]) > 0)
7860 nbNs++;
7861 }
7862 } else {
7863 /*
7864 * check that it's not a defined attribute
7865 */
7866 for (j = 0;j < nbatts;j+=5) {
7867 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7868 break;
7869 }
7870 if (j < nbatts) continue;
7871
7872 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7873 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007874 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007875 }
7876 maxatts = ctxt->maxatts;
7877 atts = ctxt->atts;
7878 }
7879 atts[nbatts++] = attname;
7880 atts[nbatts++] = aprefix;
7881 if (aprefix == NULL)
7882 atts[nbatts++] = NULL;
7883 else
7884 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7885 atts[nbatts++] = defaults->values[4 * i + 2];
7886 atts[nbatts++] = defaults->values[4 * i + 3];
7887 nbdef++;
7888 }
7889 }
7890 }
7891 }
7892
7893 nsname = xmlGetNamespace(ctxt, prefix);
7894 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007895 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7896 "Namespace prefix %s on %s is not defined\n",
7897 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007898 }
7899 *pref = prefix;
7900 *URI = nsname;
7901
7902 /*
7903 * SAX: Start of Element !
7904 */
7905 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7906 (!ctxt->disableSAX)) {
7907 if (nbNs > 0)
7908 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7909 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7910 nbatts / 5, nbdef, atts);
7911 else
7912 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7913 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7914 }
7915
7916 /*
7917 * Free up attribute allocated strings if needed
7918 */
7919 if (attval != 0) {
7920 for (i = 3,j = 0; j < nratts;i += 5,j++)
7921 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7922 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007923 }
7924
7925 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007926
7927base_changed:
7928 /*
7929 * the attribute strings are valid iif the base didn't changed
7930 */
7931 if (attval != 0) {
7932 for (i = 3,j = 0; j < nratts;i += 5,j++)
7933 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7934 xmlFree((xmlChar *) atts[i]);
7935 }
7936 ctxt->input->cur = ctxt->input->base + cur;
7937 if (ctxt->wellFormed == 1) {
7938 goto reparse;
7939 }
7940 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007941}
7942
7943/**
7944 * xmlParseEndTag2:
7945 * @ctxt: an XML parser context
7946 * @line: line of the start tag
7947 * @nsNr: number of namespaces on the start tag
7948 *
7949 * parse an end of tag
7950 *
7951 * [42] ETag ::= '</' Name S? '>'
7952 *
7953 * With namespace
7954 *
7955 * [NS 9] ETag ::= '</' QName S? '>'
7956 */
7957
7958static void
7959xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7960 const xmlChar *URI, int line, int nsNr) {
7961 const xmlChar *name;
7962
7963 GROW;
7964 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007965 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 return;
7967 }
7968 SKIP(2);
7969
7970 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7971
7972 /*
7973 * We should definitely be at the ending "S? '>'" part
7974 */
7975 GROW;
7976 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007977 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007978 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007979 } else
7980 NEXT1;
7981
7982 /*
7983 * [ WFC: Element Type Match ]
7984 * The Name in an element's end-tag must match the element type in the
7985 * start-tag.
7986 *
7987 */
7988 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007989 if (name == NULL) name = BAD_CAST "unparseable";
7990 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007991 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007992 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007993 }
7994
7995 /*
7996 * SAX: End of Tag
7997 */
7998 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7999 (!ctxt->disableSAX))
8000 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8001
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 spacePop(ctxt);
8003 if (nsNr != 0)
8004 nsPop(ctxt, nsNr);
8005 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008006}
8007
8008/**
Owen Taylor3473f882001-02-23 17:55:21 +00008009 * xmlParseCDSect:
8010 * @ctxt: an XML parser context
8011 *
8012 * Parse escaped pure raw content.
8013 *
8014 * [18] CDSect ::= CDStart CData CDEnd
8015 *
8016 * [19] CDStart ::= '<![CDATA['
8017 *
8018 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8019 *
8020 * [21] CDEnd ::= ']]>'
8021 */
8022void
8023xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8024 xmlChar *buf = NULL;
8025 int len = 0;
8026 int size = XML_PARSER_BUFFER_SIZE;
8027 int r, rl;
8028 int s, sl;
8029 int cur, l;
8030 int count = 0;
8031
Daniel Veillard8f597c32003-10-06 08:19:27 +00008032 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008033 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008034 SKIP(9);
8035 } else
8036 return;
8037
8038 ctxt->instate = XML_PARSER_CDATA_SECTION;
8039 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008040 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008041 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008042 ctxt->instate = XML_PARSER_CONTENT;
8043 return;
8044 }
8045 NEXTL(rl);
8046 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008047 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008048 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008049 ctxt->instate = XML_PARSER_CONTENT;
8050 return;
8051 }
8052 NEXTL(sl);
8053 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008054 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008055 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008056 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008057 return;
8058 }
William M. Brack871611b2003-10-18 04:53:14 +00008059 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008060 ((r != ']') || (s != ']') || (cur != '>'))) {
8061 if (len + 5 >= size) {
8062 size *= 2;
8063 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8064 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008065 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008066 return;
8067 }
8068 }
8069 COPY_BUF(rl,buf,len,r);
8070 r = s;
8071 rl = sl;
8072 s = cur;
8073 sl = l;
8074 count++;
8075 if (count > 50) {
8076 GROW;
8077 count = 0;
8078 }
8079 NEXTL(l);
8080 cur = CUR_CHAR(l);
8081 }
8082 buf[len] = 0;
8083 ctxt->instate = XML_PARSER_CONTENT;
8084 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008085 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008086 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008087 xmlFree(buf);
8088 return;
8089 }
8090 NEXTL(l);
8091
8092 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008093 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008094 */
8095 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8096 if (ctxt->sax->cdataBlock != NULL)
8097 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008098 else if (ctxt->sax->characters != NULL)
8099 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008100 }
8101 xmlFree(buf);
8102}
8103
8104/**
8105 * xmlParseContent:
8106 * @ctxt: an XML parser context
8107 *
8108 * Parse a content:
8109 *
8110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8111 */
8112
8113void
8114xmlParseContent(xmlParserCtxtPtr ctxt) {
8115 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008116 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008117 ((RAW != '<') || (NXT(1) != '/'))) {
8118 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008119 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008120 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008121
8122 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008123 * First case : a Processing Instruction.
8124 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008125 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008126 xmlParsePI(ctxt);
8127 }
8128
8129 /*
8130 * Second case : a CDSection
8131 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008132 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008133 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008134 xmlParseCDSect(ctxt);
8135 }
8136
8137 /*
8138 * Third case : a comment
8139 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008140 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008141 (NXT(2) == '-') && (NXT(3) == '-')) {
8142 xmlParseComment(ctxt);
8143 ctxt->instate = XML_PARSER_CONTENT;
8144 }
8145
8146 /*
8147 * Fourth case : a sub-element.
8148 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008149 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008150 xmlParseElement(ctxt);
8151 }
8152
8153 /*
8154 * Fifth case : a reference. If if has not been resolved,
8155 * parsing returns it's Name, create the node
8156 */
8157
Daniel Veillard21a0f912001-02-25 19:54:14 +00008158 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008159 xmlParseReference(ctxt);
8160 }
8161
8162 /*
8163 * Last case, text. Note that References are handled directly.
8164 */
8165 else {
8166 xmlParseCharData(ctxt, 0);
8167 }
8168
8169 GROW;
8170 /*
8171 * Pop-up of finished entities.
8172 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008173 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008174 xmlPopInput(ctxt);
8175 SHRINK;
8176
Daniel Veillardfdc91562002-07-01 21:52:03 +00008177 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008178 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8179 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008180 ctxt->instate = XML_PARSER_EOF;
8181 break;
8182 }
8183 }
8184}
8185
8186/**
8187 * xmlParseElement:
8188 * @ctxt: an XML parser context
8189 *
8190 * parse an XML element, this is highly recursive
8191 *
8192 * [39] element ::= EmptyElemTag | STag content ETag
8193 *
8194 * [ WFC: Element Type Match ]
8195 * The Name in an element's end-tag must match the element type in the
8196 * start-tag.
8197 *
Owen Taylor3473f882001-02-23 17:55:21 +00008198 */
8199
8200void
8201xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008202 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203 const xmlChar *prefix;
8204 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008205 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008206 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008207 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008208 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008209
8210 /* Capture start position */
8211 if (ctxt->record_info) {
8212 node_info.begin_pos = ctxt->input->consumed +
8213 (CUR_PTR - ctxt->input->base);
8214 node_info.begin_line = ctxt->input->line;
8215 }
8216
8217 if (ctxt->spaceNr == 0)
8218 spacePush(ctxt, -1);
8219 else
8220 spacePush(ctxt, *ctxt->space);
8221
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008222 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008223#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008224 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008225#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008226 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008227#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228 else
8229 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008230#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008231 if (name == NULL) {
8232 spacePop(ctxt);
8233 return;
8234 }
8235 namePush(ctxt, name);
8236 ret = ctxt->node;
8237
Daniel Veillard4432df22003-09-28 18:58:27 +00008238#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008239 /*
8240 * [ VC: Root Element Type ]
8241 * The Name in the document type declaration must match the element
8242 * type of the root element.
8243 */
8244 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8245 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8246 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008247#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008248
8249 /*
8250 * Check for an Empty Element.
8251 */
8252 if ((RAW == '/') && (NXT(1) == '>')) {
8253 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008254 if (ctxt->sax2) {
8255 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8256 (!ctxt->disableSAX))
8257 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008258#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008259 } else {
8260 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8261 (!ctxt->disableSAX))
8262 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008263#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008264 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008265 namePop(ctxt);
8266 spacePop(ctxt);
8267 if (nsNr != ctxt->nsNr)
8268 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008269 if ( ret != NULL && ctxt->record_info ) {
8270 node_info.end_pos = ctxt->input->consumed +
8271 (CUR_PTR - ctxt->input->base);
8272 node_info.end_line = ctxt->input->line;
8273 node_info.node = ret;
8274 xmlParserAddNodeInfo(ctxt, &node_info);
8275 }
8276 return;
8277 }
8278 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008279 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008280 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008281 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8282 "Couldn't find end of Start Tag %s line %d\n",
8283 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008284
8285 /*
8286 * end of parsing of this node.
8287 */
8288 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008289 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008290 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008291 if (nsNr != ctxt->nsNr)
8292 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008293
8294 /*
8295 * Capture end position and add node
8296 */
8297 if ( ret != NULL && ctxt->record_info ) {
8298 node_info.end_pos = ctxt->input->consumed +
8299 (CUR_PTR - ctxt->input->base);
8300 node_info.end_line = ctxt->input->line;
8301 node_info.node = ret;
8302 xmlParserAddNodeInfo(ctxt, &node_info);
8303 }
8304 return;
8305 }
8306
8307 /*
8308 * Parse the content of the element:
8309 */
8310 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008311 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008312 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008313 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008314 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008315
8316 /*
8317 * end of parsing of this node.
8318 */
8319 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008320 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008321 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008322 if (nsNr != ctxt->nsNr)
8323 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008324 return;
8325 }
8326
8327 /*
8328 * parse the end of tag: '</' should be here.
8329 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008330 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008331 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008332 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008333 }
8334#ifdef LIBXML_SAX1_ENABLED
8335 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008336 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008337#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008338
8339 /*
8340 * Capture end position and add node
8341 */
8342 if ( ret != NULL && ctxt->record_info ) {
8343 node_info.end_pos = ctxt->input->consumed +
8344 (CUR_PTR - ctxt->input->base);
8345 node_info.end_line = ctxt->input->line;
8346 node_info.node = ret;
8347 xmlParserAddNodeInfo(ctxt, &node_info);
8348 }
8349}
8350
8351/**
8352 * xmlParseVersionNum:
8353 * @ctxt: an XML parser context
8354 *
8355 * parse the XML version value.
8356 *
8357 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8358 *
8359 * Returns the string giving the XML version number, or NULL
8360 */
8361xmlChar *
8362xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8363 xmlChar *buf = NULL;
8364 int len = 0;
8365 int size = 10;
8366 xmlChar cur;
8367
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008368 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008369 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008370 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 return(NULL);
8372 }
8373 cur = CUR;
8374 while (((cur >= 'a') && (cur <= 'z')) ||
8375 ((cur >= 'A') && (cur <= 'Z')) ||
8376 ((cur >= '0') && (cur <= '9')) ||
8377 (cur == '_') || (cur == '.') ||
8378 (cur == ':') || (cur == '-')) {
8379 if (len + 1 >= size) {
8380 size *= 2;
8381 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8382 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008383 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 return(NULL);
8385 }
8386 }
8387 buf[len++] = cur;
8388 NEXT;
8389 cur=CUR;
8390 }
8391 buf[len] = 0;
8392 return(buf);
8393}
8394
8395/**
8396 * xmlParseVersionInfo:
8397 * @ctxt: an XML parser context
8398 *
8399 * parse the XML version.
8400 *
8401 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8402 *
8403 * [25] Eq ::= S? '=' S?
8404 *
8405 * Returns the version string, e.g. "1.0"
8406 */
8407
8408xmlChar *
8409xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8410 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008411
Daniel Veillarda07050d2003-10-19 14:46:32 +00008412 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008413 SKIP(7);
8414 SKIP_BLANKS;
8415 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 return(NULL);
8418 }
8419 NEXT;
8420 SKIP_BLANKS;
8421 if (RAW == '"') {
8422 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008423 version = xmlParseVersionNum(ctxt);
8424 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008425 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008426 } else
8427 NEXT;
8428 } else if (RAW == '\''){
8429 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008430 version = xmlParseVersionNum(ctxt);
8431 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008432 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008433 } else
8434 NEXT;
8435 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008436 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008437 }
8438 }
8439 return(version);
8440}
8441
8442/**
8443 * xmlParseEncName:
8444 * @ctxt: an XML parser context
8445 *
8446 * parse the XML encoding name
8447 *
8448 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8449 *
8450 * Returns the encoding name value or NULL
8451 */
8452xmlChar *
8453xmlParseEncName(xmlParserCtxtPtr ctxt) {
8454 xmlChar *buf = NULL;
8455 int len = 0;
8456 int size = 10;
8457 xmlChar cur;
8458
8459 cur = CUR;
8460 if (((cur >= 'a') && (cur <= 'z')) ||
8461 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008462 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008463 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008464 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008465 return(NULL);
8466 }
8467
8468 buf[len++] = cur;
8469 NEXT;
8470 cur = CUR;
8471 while (((cur >= 'a') && (cur <= 'z')) ||
8472 ((cur >= 'A') && (cur <= 'Z')) ||
8473 ((cur >= '0') && (cur <= '9')) ||
8474 (cur == '.') || (cur == '_') ||
8475 (cur == '-')) {
8476 if (len + 1 >= size) {
8477 size *= 2;
8478 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8479 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008480 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008481 return(NULL);
8482 }
8483 }
8484 buf[len++] = cur;
8485 NEXT;
8486 cur = CUR;
8487 if (cur == 0) {
8488 SHRINK;
8489 GROW;
8490 cur = CUR;
8491 }
8492 }
8493 buf[len] = 0;
8494 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008495 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008496 }
8497 return(buf);
8498}
8499
8500/**
8501 * xmlParseEncodingDecl:
8502 * @ctxt: an XML parser context
8503 *
8504 * parse the XML encoding declaration
8505 *
8506 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8507 *
8508 * this setups the conversion filters.
8509 *
8510 * Returns the encoding value or NULL
8511 */
8512
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008513const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008514xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8515 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008516
8517 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008518 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008519 SKIP(8);
8520 SKIP_BLANKS;
8521 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008522 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008523 return(NULL);
8524 }
8525 NEXT;
8526 SKIP_BLANKS;
8527 if (RAW == '"') {
8528 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008529 encoding = xmlParseEncName(ctxt);
8530 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008531 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008532 } else
8533 NEXT;
8534 } else if (RAW == '\''){
8535 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008536 encoding = xmlParseEncName(ctxt);
8537 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008538 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008539 } else
8540 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008541 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008542 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008543 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008544 /*
8545 * UTF-16 encoding stwich has already taken place at this stage,
8546 * more over the little-endian/big-endian selection is already done
8547 */
8548 if ((encoding != NULL) &&
8549 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8550 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008551 if (ctxt->encoding != NULL)
8552 xmlFree((xmlChar *) ctxt->encoding);
8553 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008554 }
8555 /*
8556 * UTF-8 encoding is handled natively
8557 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008558 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008559 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8560 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008561 if (ctxt->encoding != NULL)
8562 xmlFree((xmlChar *) ctxt->encoding);
8563 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008564 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008565 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008566 xmlCharEncodingHandlerPtr handler;
8567
8568 if (ctxt->input->encoding != NULL)
8569 xmlFree((xmlChar *) ctxt->input->encoding);
8570 ctxt->input->encoding = encoding;
8571
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008572 handler = xmlFindCharEncodingHandler((const char *) encoding);
8573 if (handler != NULL) {
8574 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008575 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008577 "Unsupported encoding %s\n", encoding);
8578 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008579 }
8580 }
8581 }
8582 return(encoding);
8583}
8584
8585/**
8586 * xmlParseSDDecl:
8587 * @ctxt: an XML parser context
8588 *
8589 * parse the XML standalone declaration
8590 *
8591 * [32] SDDecl ::= S 'standalone' Eq
8592 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8593 *
8594 * [ VC: Standalone Document Declaration ]
8595 * TODO The standalone document declaration must have the value "no"
8596 * if any external markup declarations contain declarations of:
8597 * - attributes with default values, if elements to which these
8598 * attributes apply appear in the document without specifications
8599 * of values for these attributes, or
8600 * - entities (other than amp, lt, gt, apos, quot), if references
8601 * to those entities appear in the document, or
8602 * - attributes with values subject to normalization, where the
8603 * attribute appears in the document with a value which will change
8604 * as a result of normalization, or
8605 * - element types with element content, if white space occurs directly
8606 * within any instance of those types.
8607 *
8608 * Returns 1 if standalone, 0 otherwise
8609 */
8610
8611int
8612xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8613 int standalone = -1;
8614
8615 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008616 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008617 SKIP(10);
8618 SKIP_BLANKS;
8619 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008620 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008621 return(standalone);
8622 }
8623 NEXT;
8624 SKIP_BLANKS;
8625 if (RAW == '\''){
8626 NEXT;
8627 if ((RAW == 'n') && (NXT(1) == 'o')) {
8628 standalone = 0;
8629 SKIP(2);
8630 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8631 (NXT(2) == 's')) {
8632 standalone = 1;
8633 SKIP(3);
8634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008635 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 }
8637 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008638 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 } else
8640 NEXT;
8641 } else if (RAW == '"'){
8642 NEXT;
8643 if ((RAW == 'n') && (NXT(1) == 'o')) {
8644 standalone = 0;
8645 SKIP(2);
8646 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8647 (NXT(2) == 's')) {
8648 standalone = 1;
8649 SKIP(3);
8650 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008651 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008652 }
8653 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 } else
8656 NEXT;
8657 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008658 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008659 }
8660 }
8661 return(standalone);
8662}
8663
8664/**
8665 * xmlParseXMLDecl:
8666 * @ctxt: an XML parser context
8667 *
8668 * parse an XML declaration header
8669 *
8670 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8671 */
8672
8673void
8674xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8675 xmlChar *version;
8676
8677 /*
8678 * We know that '<?xml' is here.
8679 */
8680 SKIP(5);
8681
William M. Brack76e95df2003-10-18 16:20:14 +00008682 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8684 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008685 }
8686 SKIP_BLANKS;
8687
8688 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008689 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008690 */
8691 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008692 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008693 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008694 } else {
8695 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8696 /*
8697 * TODO: Blueberry should be detected here
8698 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008699 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8700 "Unsupported version '%s'\n",
8701 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008702 }
8703 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008704 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008705 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008706 }
Owen Taylor3473f882001-02-23 17:55:21 +00008707
8708 /*
8709 * We may have the encoding declaration
8710 */
William M. Brack76e95df2003-10-18 16:20:14 +00008711 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008712 if ((RAW == '?') && (NXT(1) == '>')) {
8713 SKIP(2);
8714 return;
8715 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008717 }
8718 xmlParseEncodingDecl(ctxt);
8719 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8720 /*
8721 * The XML REC instructs us to stop parsing right here
8722 */
8723 return;
8724 }
8725
8726 /*
8727 * We may have the standalone status.
8728 */
William M. Brack76e95df2003-10-18 16:20:14 +00008729 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008730 if ((RAW == '?') && (NXT(1) == '>')) {
8731 SKIP(2);
8732 return;
8733 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008735 }
8736 SKIP_BLANKS;
8737 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8738
8739 SKIP_BLANKS;
8740 if ((RAW == '?') && (NXT(1) == '>')) {
8741 SKIP(2);
8742 } else if (RAW == '>') {
8743 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008744 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008745 NEXT;
8746 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008747 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008748 MOVETO_ENDTAG(CUR_PTR);
8749 NEXT;
8750 }
8751}
8752
8753/**
8754 * xmlParseMisc:
8755 * @ctxt: an XML parser context
8756 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008757 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008758 *
8759 * [27] Misc ::= Comment | PI | S
8760 */
8761
8762void
8763xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008764 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008765 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008766 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008767 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008768 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008769 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008770 NEXT;
8771 } else
8772 xmlParseComment(ctxt);
8773 }
8774}
8775
8776/**
8777 * xmlParseDocument:
8778 * @ctxt: an XML parser context
8779 *
8780 * parse an XML document (and build a tree if using the standard SAX
8781 * interface).
8782 *
8783 * [1] document ::= prolog element Misc*
8784 *
8785 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8786 *
8787 * Returns 0, -1 in case of error. the parser context is augmented
8788 * as a result of the parsing.
8789 */
8790
8791int
8792xmlParseDocument(xmlParserCtxtPtr ctxt) {
8793 xmlChar start[4];
8794 xmlCharEncoding enc;
8795
8796 xmlInitParser();
8797
8798 GROW;
8799
8800 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008801 * SAX: detecting the level.
8802 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008803 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008804
8805 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008806 * SAX: beginning of the document processing.
8807 */
8808 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8809 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8810
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008811 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8812 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008813 /*
8814 * Get the 4 first bytes and decode the charset
8815 * if enc != XML_CHAR_ENCODING_NONE
8816 * plug some encoding conversion routines.
8817 */
8818 start[0] = RAW;
8819 start[1] = NXT(1);
8820 start[2] = NXT(2);
8821 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008822 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008823 if (enc != XML_CHAR_ENCODING_NONE) {
8824 xmlSwitchEncoding(ctxt, enc);
8825 }
Owen Taylor3473f882001-02-23 17:55:21 +00008826 }
8827
8828
8829 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008830 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008831 }
8832
8833 /*
8834 * Check for the XMLDecl in the Prolog.
8835 */
8836 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008837 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008838
8839 /*
8840 * Note that we will switch encoding on the fly.
8841 */
8842 xmlParseXMLDecl(ctxt);
8843 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8844 /*
8845 * The XML REC instructs us to stop parsing right here
8846 */
8847 return(-1);
8848 }
8849 ctxt->standalone = ctxt->input->standalone;
8850 SKIP_BLANKS;
8851 } else {
8852 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8853 }
8854 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8855 ctxt->sax->startDocument(ctxt->userData);
8856
8857 /*
8858 * The Misc part of the Prolog
8859 */
8860 GROW;
8861 xmlParseMisc(ctxt);
8862
8863 /*
8864 * Then possibly doc type declaration(s) and more Misc
8865 * (doctypedecl Misc*)?
8866 */
8867 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008868 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008869
8870 ctxt->inSubset = 1;
8871 xmlParseDocTypeDecl(ctxt);
8872 if (RAW == '[') {
8873 ctxt->instate = XML_PARSER_DTD;
8874 xmlParseInternalSubset(ctxt);
8875 }
8876
8877 /*
8878 * Create and update the external subset.
8879 */
8880 ctxt->inSubset = 2;
8881 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8882 (!ctxt->disableSAX))
8883 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8884 ctxt->extSubSystem, ctxt->extSubURI);
8885 ctxt->inSubset = 0;
8886
8887
8888 ctxt->instate = XML_PARSER_PROLOG;
8889 xmlParseMisc(ctxt);
8890 }
8891
8892 /*
8893 * Time to start parsing the tree itself
8894 */
8895 GROW;
8896 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008897 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8898 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008899 } else {
8900 ctxt->instate = XML_PARSER_CONTENT;
8901 xmlParseElement(ctxt);
8902 ctxt->instate = XML_PARSER_EPILOG;
8903
8904
8905 /*
8906 * The Misc part at the end
8907 */
8908 xmlParseMisc(ctxt);
8909
Daniel Veillard561b7f82002-03-20 21:55:57 +00008910 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008911 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008912 }
8913 ctxt->instate = XML_PARSER_EOF;
8914 }
8915
8916 /*
8917 * SAX: end of the document processing.
8918 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008919 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008920 ctxt->sax->endDocument(ctxt->userData);
8921
Daniel Veillard5997aca2002-03-18 18:36:20 +00008922 /*
8923 * Remove locally kept entity definitions if the tree was not built
8924 */
8925 if ((ctxt->myDoc != NULL) &&
8926 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8927 xmlFreeDoc(ctxt->myDoc);
8928 ctxt->myDoc = NULL;
8929 }
8930
Daniel Veillardc7612992002-02-17 22:47:37 +00008931 if (! ctxt->wellFormed) {
8932 ctxt->valid = 0;
8933 return(-1);
8934 }
Owen Taylor3473f882001-02-23 17:55:21 +00008935 return(0);
8936}
8937
8938/**
8939 * xmlParseExtParsedEnt:
8940 * @ctxt: an XML parser context
8941 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008942 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008943 * An external general parsed entity is well-formed if it matches the
8944 * production labeled extParsedEnt.
8945 *
8946 * [78] extParsedEnt ::= TextDecl? content
8947 *
8948 * Returns 0, -1 in case of error. the parser context is augmented
8949 * as a result of the parsing.
8950 */
8951
8952int
8953xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8954 xmlChar start[4];
8955 xmlCharEncoding enc;
8956
8957 xmlDefaultSAXHandlerInit();
8958
Daniel Veillard309f81d2003-09-23 09:02:53 +00008959 xmlDetectSAX2(ctxt);
8960
Owen Taylor3473f882001-02-23 17:55:21 +00008961 GROW;
8962
8963 /*
8964 * SAX: beginning of the document processing.
8965 */
8966 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8967 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8968
8969 /*
8970 * Get the 4 first bytes and decode the charset
8971 * if enc != XML_CHAR_ENCODING_NONE
8972 * plug some encoding conversion routines.
8973 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008974 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8975 start[0] = RAW;
8976 start[1] = NXT(1);
8977 start[2] = NXT(2);
8978 start[3] = NXT(3);
8979 enc = xmlDetectCharEncoding(start, 4);
8980 if (enc != XML_CHAR_ENCODING_NONE) {
8981 xmlSwitchEncoding(ctxt, enc);
8982 }
Owen Taylor3473f882001-02-23 17:55:21 +00008983 }
8984
8985
8986 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008987 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008988 }
8989
8990 /*
8991 * Check for the XMLDecl in the Prolog.
8992 */
8993 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008994 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008995
8996 /*
8997 * Note that we will switch encoding on the fly.
8998 */
8999 xmlParseXMLDecl(ctxt);
9000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9001 /*
9002 * The XML REC instructs us to stop parsing right here
9003 */
9004 return(-1);
9005 }
9006 SKIP_BLANKS;
9007 } else {
9008 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9009 }
9010 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9011 ctxt->sax->startDocument(ctxt->userData);
9012
9013 /*
9014 * Doing validity checking on chunk doesn't make sense
9015 */
9016 ctxt->instate = XML_PARSER_CONTENT;
9017 ctxt->validate = 0;
9018 ctxt->loadsubset = 0;
9019 ctxt->depth = 0;
9020
9021 xmlParseContent(ctxt);
9022
9023 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009024 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009025 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009026 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009027 }
9028
9029 /*
9030 * SAX: end of the document processing.
9031 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009032 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009033 ctxt->sax->endDocument(ctxt->userData);
9034
9035 if (! ctxt->wellFormed) return(-1);
9036 return(0);
9037}
9038
Daniel Veillard73b013f2003-09-30 12:36:01 +00009039#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009040/************************************************************************
9041 * *
9042 * Progressive parsing interfaces *
9043 * *
9044 ************************************************************************/
9045
9046/**
9047 * xmlParseLookupSequence:
9048 * @ctxt: an XML parser context
9049 * @first: the first char to lookup
9050 * @next: the next char to lookup or zero
9051 * @third: the next char to lookup or zero
9052 *
9053 * Try to find if a sequence (first, next, third) or just (first next) or
9054 * (first) is available in the input stream.
9055 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9056 * to avoid rescanning sequences of bytes, it DOES change the state of the
9057 * parser, do not use liberally.
9058 *
9059 * Returns the index to the current parsing point if the full sequence
9060 * is available, -1 otherwise.
9061 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009062static int
Owen Taylor3473f882001-02-23 17:55:21 +00009063xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9064 xmlChar next, xmlChar third) {
9065 int base, len;
9066 xmlParserInputPtr in;
9067 const xmlChar *buf;
9068
9069 in = ctxt->input;
9070 if (in == NULL) return(-1);
9071 base = in->cur - in->base;
9072 if (base < 0) return(-1);
9073 if (ctxt->checkIndex > base)
9074 base = ctxt->checkIndex;
9075 if (in->buf == NULL) {
9076 buf = in->base;
9077 len = in->length;
9078 } else {
9079 buf = in->buf->buffer->content;
9080 len = in->buf->buffer->use;
9081 }
9082 /* take into account the sequence length */
9083 if (third) len -= 2;
9084 else if (next) len --;
9085 for (;base < len;base++) {
9086 if (buf[base] == first) {
9087 if (third != 0) {
9088 if ((buf[base + 1] != next) ||
9089 (buf[base + 2] != third)) continue;
9090 } else if (next != 0) {
9091 if (buf[base + 1] != next) continue;
9092 }
9093 ctxt->checkIndex = 0;
9094#ifdef DEBUG_PUSH
9095 if (next == 0)
9096 xmlGenericError(xmlGenericErrorContext,
9097 "PP: lookup '%c' found at %d\n",
9098 first, base);
9099 else if (third == 0)
9100 xmlGenericError(xmlGenericErrorContext,
9101 "PP: lookup '%c%c' found at %d\n",
9102 first, next, base);
9103 else
9104 xmlGenericError(xmlGenericErrorContext,
9105 "PP: lookup '%c%c%c' found at %d\n",
9106 first, next, third, base);
9107#endif
9108 return(base - (in->cur - in->base));
9109 }
9110 }
9111 ctxt->checkIndex = base;
9112#ifdef DEBUG_PUSH
9113 if (next == 0)
9114 xmlGenericError(xmlGenericErrorContext,
9115 "PP: lookup '%c' failed\n", first);
9116 else if (third == 0)
9117 xmlGenericError(xmlGenericErrorContext,
9118 "PP: lookup '%c%c' failed\n", first, next);
9119 else
9120 xmlGenericError(xmlGenericErrorContext,
9121 "PP: lookup '%c%c%c' failed\n", first, next, third);
9122#endif
9123 return(-1);
9124}
9125
9126/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009127 * xmlParseGetLasts:
9128 * @ctxt: an XML parser context
9129 * @lastlt: pointer to store the last '<' from the input
9130 * @lastgt: pointer to store the last '>' from the input
9131 *
9132 * Lookup the last < and > in the current chunk
9133 */
9134static void
9135xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9136 const xmlChar **lastgt) {
9137 const xmlChar *tmp;
9138
9139 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9140 xmlGenericError(xmlGenericErrorContext,
9141 "Internal error: xmlParseGetLasts\n");
9142 return;
9143 }
9144 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9145 tmp = ctxt->input->end;
9146 tmp--;
9147 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9148 (*tmp != '>')) tmp--;
9149 if (tmp < ctxt->input->base) {
9150 *lastlt = NULL;
9151 *lastgt = NULL;
9152 } else if (*tmp == '<') {
9153 *lastlt = tmp;
9154 tmp--;
9155 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9156 if (tmp < ctxt->input->base)
9157 *lastgt = NULL;
9158 else
9159 *lastgt = tmp;
9160 } else {
9161 *lastgt = tmp;
9162 tmp--;
9163 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9164 if (tmp < ctxt->input->base)
9165 *lastlt = NULL;
9166 else
9167 *lastlt = tmp;
9168 }
9169
9170 } else {
9171 *lastlt = NULL;
9172 *lastgt = NULL;
9173 }
9174}
9175/**
Owen Taylor3473f882001-02-23 17:55:21 +00009176 * xmlParseTryOrFinish:
9177 * @ctxt: an XML parser context
9178 * @terminate: last chunk indicator
9179 *
9180 * Try to progress on parsing
9181 *
9182 * Returns zero if no parsing was possible
9183 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009184static int
Owen Taylor3473f882001-02-23 17:55:21 +00009185xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9186 int ret = 0;
9187 int avail;
9188 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009189 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009190
9191#ifdef DEBUG_PUSH
9192 switch (ctxt->instate) {
9193 case XML_PARSER_EOF:
9194 xmlGenericError(xmlGenericErrorContext,
9195 "PP: try EOF\n"); break;
9196 case XML_PARSER_START:
9197 xmlGenericError(xmlGenericErrorContext,
9198 "PP: try START\n"); break;
9199 case XML_PARSER_MISC:
9200 xmlGenericError(xmlGenericErrorContext,
9201 "PP: try MISC\n");break;
9202 case XML_PARSER_COMMENT:
9203 xmlGenericError(xmlGenericErrorContext,
9204 "PP: try COMMENT\n");break;
9205 case XML_PARSER_PROLOG:
9206 xmlGenericError(xmlGenericErrorContext,
9207 "PP: try PROLOG\n");break;
9208 case XML_PARSER_START_TAG:
9209 xmlGenericError(xmlGenericErrorContext,
9210 "PP: try START_TAG\n");break;
9211 case XML_PARSER_CONTENT:
9212 xmlGenericError(xmlGenericErrorContext,
9213 "PP: try CONTENT\n");break;
9214 case XML_PARSER_CDATA_SECTION:
9215 xmlGenericError(xmlGenericErrorContext,
9216 "PP: try CDATA_SECTION\n");break;
9217 case XML_PARSER_END_TAG:
9218 xmlGenericError(xmlGenericErrorContext,
9219 "PP: try END_TAG\n");break;
9220 case XML_PARSER_ENTITY_DECL:
9221 xmlGenericError(xmlGenericErrorContext,
9222 "PP: try ENTITY_DECL\n");break;
9223 case XML_PARSER_ENTITY_VALUE:
9224 xmlGenericError(xmlGenericErrorContext,
9225 "PP: try ENTITY_VALUE\n");break;
9226 case XML_PARSER_ATTRIBUTE_VALUE:
9227 xmlGenericError(xmlGenericErrorContext,
9228 "PP: try ATTRIBUTE_VALUE\n");break;
9229 case XML_PARSER_DTD:
9230 xmlGenericError(xmlGenericErrorContext,
9231 "PP: try DTD\n");break;
9232 case XML_PARSER_EPILOG:
9233 xmlGenericError(xmlGenericErrorContext,
9234 "PP: try EPILOG\n");break;
9235 case XML_PARSER_PI:
9236 xmlGenericError(xmlGenericErrorContext,
9237 "PP: try PI\n");break;
9238 case XML_PARSER_IGNORE:
9239 xmlGenericError(xmlGenericErrorContext,
9240 "PP: try IGNORE\n");break;
9241 }
9242#endif
9243
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009244 if ((ctxt->input != NULL) &&
9245 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009246 xmlSHRINK(ctxt);
9247 ctxt->checkIndex = 0;
9248 }
9249 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009250
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009252 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9253 return(0);
9254
9255
Owen Taylor3473f882001-02-23 17:55:21 +00009256 /*
9257 * Pop-up of finished entities.
9258 */
9259 while ((RAW == 0) && (ctxt->inputNr > 1))
9260 xmlPopInput(ctxt);
9261
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009262 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009263 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009264 avail = ctxt->input->length -
9265 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009266 else {
9267 /*
9268 * If we are operating on converted input, try to flush
9269 * remainng chars to avoid them stalling in the non-converted
9270 * buffer.
9271 */
9272 if ((ctxt->input->buf->raw != NULL) &&
9273 (ctxt->input->buf->raw->use > 0)) {
9274 int base = ctxt->input->base -
9275 ctxt->input->buf->buffer->content;
9276 int current = ctxt->input->cur - ctxt->input->base;
9277
9278 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9279 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9280 ctxt->input->cur = ctxt->input->base + current;
9281 ctxt->input->end =
9282 &ctxt->input->buf->buffer->content[
9283 ctxt->input->buf->buffer->use];
9284 }
9285 avail = ctxt->input->buf->buffer->use -
9286 (ctxt->input->cur - ctxt->input->base);
9287 }
Owen Taylor3473f882001-02-23 17:55:21 +00009288 if (avail < 1)
9289 goto done;
9290 switch (ctxt->instate) {
9291 case XML_PARSER_EOF:
9292 /*
9293 * Document parsing is done !
9294 */
9295 goto done;
9296 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009297 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9298 xmlChar start[4];
9299 xmlCharEncoding enc;
9300
9301 /*
9302 * Very first chars read from the document flow.
9303 */
9304 if (avail < 4)
9305 goto done;
9306
9307 /*
9308 * Get the 4 first bytes and decode the charset
9309 * if enc != XML_CHAR_ENCODING_NONE
9310 * plug some encoding conversion routines.
9311 */
9312 start[0] = RAW;
9313 start[1] = NXT(1);
9314 start[2] = NXT(2);
9315 start[3] = NXT(3);
9316 enc = xmlDetectCharEncoding(start, 4);
9317 if (enc != XML_CHAR_ENCODING_NONE) {
9318 xmlSwitchEncoding(ctxt, enc);
9319 }
9320 break;
9321 }
Owen Taylor3473f882001-02-23 17:55:21 +00009322
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009323 if (avail < 2)
9324 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009325 cur = ctxt->input->cur[0];
9326 next = ctxt->input->cur[1];
9327 if (cur == 0) {
9328 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9329 ctxt->sax->setDocumentLocator(ctxt->userData,
9330 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009331 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009332 ctxt->instate = XML_PARSER_EOF;
9333#ifdef DEBUG_PUSH
9334 xmlGenericError(xmlGenericErrorContext,
9335 "PP: entering EOF\n");
9336#endif
9337 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9338 ctxt->sax->endDocument(ctxt->userData);
9339 goto done;
9340 }
9341 if ((cur == '<') && (next == '?')) {
9342 /* PI or XML decl */
9343 if (avail < 5) return(ret);
9344 if ((!terminate) &&
9345 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9346 return(ret);
9347 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9348 ctxt->sax->setDocumentLocator(ctxt->userData,
9349 &xmlDefaultSAXLocator);
9350 if ((ctxt->input->cur[2] == 'x') &&
9351 (ctxt->input->cur[3] == 'm') &&
9352 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009353 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009354 ret += 5;
9355#ifdef DEBUG_PUSH
9356 xmlGenericError(xmlGenericErrorContext,
9357 "PP: Parsing XML Decl\n");
9358#endif
9359 xmlParseXMLDecl(ctxt);
9360 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9361 /*
9362 * The XML REC instructs us to stop parsing right
9363 * here
9364 */
9365 ctxt->instate = XML_PARSER_EOF;
9366 return(0);
9367 }
9368 ctxt->standalone = ctxt->input->standalone;
9369 if ((ctxt->encoding == NULL) &&
9370 (ctxt->input->encoding != NULL))
9371 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9372 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9373 (!ctxt->disableSAX))
9374 ctxt->sax->startDocument(ctxt->userData);
9375 ctxt->instate = XML_PARSER_MISC;
9376#ifdef DEBUG_PUSH
9377 xmlGenericError(xmlGenericErrorContext,
9378 "PP: entering MISC\n");
9379#endif
9380 } else {
9381 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9382 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9383 (!ctxt->disableSAX))
9384 ctxt->sax->startDocument(ctxt->userData);
9385 ctxt->instate = XML_PARSER_MISC;
9386#ifdef DEBUG_PUSH
9387 xmlGenericError(xmlGenericErrorContext,
9388 "PP: entering MISC\n");
9389#endif
9390 }
9391 } else {
9392 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9393 ctxt->sax->setDocumentLocator(ctxt->userData,
9394 &xmlDefaultSAXLocator);
9395 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9396 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9397 (!ctxt->disableSAX))
9398 ctxt->sax->startDocument(ctxt->userData);
9399 ctxt->instate = XML_PARSER_MISC;
9400#ifdef DEBUG_PUSH
9401 xmlGenericError(xmlGenericErrorContext,
9402 "PP: entering MISC\n");
9403#endif
9404 }
9405 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009407 const xmlChar *name;
9408 const xmlChar *prefix;
9409 const xmlChar *URI;
9410 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009411
9412 if ((avail < 2) && (ctxt->inputNr == 1))
9413 goto done;
9414 cur = ctxt->input->cur[0];
9415 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009416 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009417 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009418 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9419 ctxt->sax->endDocument(ctxt->userData);
9420 goto done;
9421 }
9422 if (!terminate) {
9423 if (ctxt->progressive) {
9424 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9425 goto done;
9426 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9427 goto done;
9428 }
9429 }
9430 if (ctxt->spaceNr == 0)
9431 spacePush(ctxt, -1);
9432 else
9433 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009434#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009436#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009437 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009438#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009439 else
9440 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009441#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009442 if (name == NULL) {
9443 spacePop(ctxt);
9444 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009445 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9446 ctxt->sax->endDocument(ctxt->userData);
9447 goto done;
9448 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009449#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009450 /*
9451 * [ VC: Root Element Type ]
9452 * The Name in the document type declaration must match
9453 * the element type of the root element.
9454 */
9455 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9456 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9457 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009458#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009459
9460 /*
9461 * Check for an Empty Element.
9462 */
9463 if ((RAW == '/') && (NXT(1) == '>')) {
9464 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009465
9466 if (ctxt->sax2) {
9467 if ((ctxt->sax != NULL) &&
9468 (ctxt->sax->endElementNs != NULL) &&
9469 (!ctxt->disableSAX))
9470 ctxt->sax->endElementNs(ctxt->userData, name,
9471 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009472#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009473 } else {
9474 if ((ctxt->sax != NULL) &&
9475 (ctxt->sax->endElement != NULL) &&
9476 (!ctxt->disableSAX))
9477 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009478#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009479 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009480 spacePop(ctxt);
9481 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009482 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009483 } else {
9484 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009485 }
9486 break;
9487 }
9488 if (RAW == '>') {
9489 NEXT;
9490 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009491 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009492 "Couldn't find end of Start Tag %s\n",
9493 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009494 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009495 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009496 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009497 if (ctxt->sax2)
9498 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009499#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009500 else
9501 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009502#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009503
Daniel Veillarda880b122003-04-21 21:36:41 +00009504 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009505 break;
9506 }
9507 case XML_PARSER_CONTENT: {
9508 const xmlChar *test;
9509 unsigned int cons;
9510 if ((avail < 2) && (ctxt->inputNr == 1))
9511 goto done;
9512 cur = ctxt->input->cur[0];
9513 next = ctxt->input->cur[1];
9514
9515 test = CUR_PTR;
9516 cons = ctxt->input->consumed;
9517 if ((cur == '<') && (next == '/')) {
9518 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009519 break;
9520 } else if ((cur == '<') && (next == '?')) {
9521 if ((!terminate) &&
9522 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9523 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009524 xmlParsePI(ctxt);
9525 } else if ((cur == '<') && (next != '!')) {
9526 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009527 break;
9528 } else if ((cur == '<') && (next == '!') &&
9529 (ctxt->input->cur[2] == '-') &&
9530 (ctxt->input->cur[3] == '-')) {
9531 if ((!terminate) &&
9532 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9533 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009534 xmlParseComment(ctxt);
9535 ctxt->instate = XML_PARSER_CONTENT;
9536 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9537 (ctxt->input->cur[2] == '[') &&
9538 (ctxt->input->cur[3] == 'C') &&
9539 (ctxt->input->cur[4] == 'D') &&
9540 (ctxt->input->cur[5] == 'A') &&
9541 (ctxt->input->cur[6] == 'T') &&
9542 (ctxt->input->cur[7] == 'A') &&
9543 (ctxt->input->cur[8] == '[')) {
9544 SKIP(9);
9545 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009546 break;
9547 } else if ((cur == '<') && (next == '!') &&
9548 (avail < 9)) {
9549 goto done;
9550 } else if (cur == '&') {
9551 if ((!terminate) &&
9552 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9553 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 xmlParseReference(ctxt);
9555 } else {
9556 /* TODO Avoid the extra copy, handle directly !!! */
9557 /*
9558 * Goal of the following test is:
9559 * - minimize calls to the SAX 'character' callback
9560 * when they are mergeable
9561 * - handle an problem for isBlank when we only parse
9562 * a sequence of blank chars and the next one is
9563 * not available to check against '<' presence.
9564 * - tries to homogenize the differences in SAX
9565 * callbacks between the push and pull versions
9566 * of the parser.
9567 */
9568 if ((ctxt->inputNr == 1) &&
9569 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9570 if (!terminate) {
9571 if (ctxt->progressive) {
9572 if ((lastlt == NULL) ||
9573 (ctxt->input->cur > lastlt))
9574 goto done;
9575 } else if (xmlParseLookupSequence(ctxt,
9576 '<', 0, 0) < 0) {
9577 goto done;
9578 }
9579 }
9580 }
9581 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009582 xmlParseCharData(ctxt, 0);
9583 }
9584 /*
9585 * Pop-up of finished entities.
9586 */
9587 while ((RAW == 0) && (ctxt->inputNr > 1))
9588 xmlPopInput(ctxt);
9589 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009590 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9591 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009592 ctxt->instate = XML_PARSER_EOF;
9593 break;
9594 }
9595 break;
9596 }
9597 case XML_PARSER_END_TAG:
9598 if (avail < 2)
9599 goto done;
9600 if (!terminate) {
9601 if (ctxt->progressive) {
9602 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9603 goto done;
9604 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9605 goto done;
9606 }
9607 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009608 if (ctxt->sax2) {
9609 xmlParseEndTag2(ctxt,
9610 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9611 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9612 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9613 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009614 }
9615#ifdef LIBXML_SAX1_ENABLED
9616 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009617 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009618#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009619 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009620 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009621 } else {
9622 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009623 }
9624 break;
9625 case XML_PARSER_CDATA_SECTION: {
9626 /*
9627 * The Push mode need to have the SAX callback for
9628 * cdataBlock merge back contiguous callbacks.
9629 */
9630 int base;
9631
9632 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9633 if (base < 0) {
9634 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9635 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9636 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009637 ctxt->sax->cdataBlock(ctxt->userData,
9638 ctxt->input->cur,
9639 XML_PARSER_BIG_BUFFER_SIZE);
9640 else if (ctxt->sax->characters != NULL)
9641 ctxt->sax->characters(ctxt->userData,
9642 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009643 XML_PARSER_BIG_BUFFER_SIZE);
9644 }
9645 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9646 ctxt->checkIndex = 0;
9647 }
9648 goto done;
9649 } else {
9650 if ((ctxt->sax != NULL) && (base > 0) &&
9651 (!ctxt->disableSAX)) {
9652 if (ctxt->sax->cdataBlock != NULL)
9653 ctxt->sax->cdataBlock(ctxt->userData,
9654 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009655 else if (ctxt->sax->characters != NULL)
9656 ctxt->sax->characters(ctxt->userData,
9657 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009658 }
9659 SKIP(base + 3);
9660 ctxt->checkIndex = 0;
9661 ctxt->instate = XML_PARSER_CONTENT;
9662#ifdef DEBUG_PUSH
9663 xmlGenericError(xmlGenericErrorContext,
9664 "PP: entering CONTENT\n");
9665#endif
9666 }
9667 break;
9668 }
Owen Taylor3473f882001-02-23 17:55:21 +00009669 case XML_PARSER_MISC:
9670 SKIP_BLANKS;
9671 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009672 avail = ctxt->input->length -
9673 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009674 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009675 avail = ctxt->input->buf->buffer->use -
9676 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009677 if (avail < 2)
9678 goto done;
9679 cur = ctxt->input->cur[0];
9680 next = ctxt->input->cur[1];
9681 if ((cur == '<') && (next == '?')) {
9682 if ((!terminate) &&
9683 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9684 goto done;
9685#ifdef DEBUG_PUSH
9686 xmlGenericError(xmlGenericErrorContext,
9687 "PP: Parsing PI\n");
9688#endif
9689 xmlParsePI(ctxt);
9690 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009691 (ctxt->input->cur[2] == '-') &&
9692 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009693 if ((!terminate) &&
9694 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9695 goto done;
9696#ifdef DEBUG_PUSH
9697 xmlGenericError(xmlGenericErrorContext,
9698 "PP: Parsing Comment\n");
9699#endif
9700 xmlParseComment(ctxt);
9701 ctxt->instate = XML_PARSER_MISC;
9702 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009703 (ctxt->input->cur[2] == 'D') &&
9704 (ctxt->input->cur[3] == 'O') &&
9705 (ctxt->input->cur[4] == 'C') &&
9706 (ctxt->input->cur[5] == 'T') &&
9707 (ctxt->input->cur[6] == 'Y') &&
9708 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009709 (ctxt->input->cur[8] == 'E')) {
9710 if ((!terminate) &&
9711 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9712 goto done;
9713#ifdef DEBUG_PUSH
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: Parsing internal subset\n");
9716#endif
9717 ctxt->inSubset = 1;
9718 xmlParseDocTypeDecl(ctxt);
9719 if (RAW == '[') {
9720 ctxt->instate = XML_PARSER_DTD;
9721#ifdef DEBUG_PUSH
9722 xmlGenericError(xmlGenericErrorContext,
9723 "PP: entering DTD\n");
9724#endif
9725 } else {
9726 /*
9727 * Create and update the external subset.
9728 */
9729 ctxt->inSubset = 2;
9730 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9731 (ctxt->sax->externalSubset != NULL))
9732 ctxt->sax->externalSubset(ctxt->userData,
9733 ctxt->intSubName, ctxt->extSubSystem,
9734 ctxt->extSubURI);
9735 ctxt->inSubset = 0;
9736 ctxt->instate = XML_PARSER_PROLOG;
9737#ifdef DEBUG_PUSH
9738 xmlGenericError(xmlGenericErrorContext,
9739 "PP: entering PROLOG\n");
9740#endif
9741 }
9742 } else if ((cur == '<') && (next == '!') &&
9743 (avail < 9)) {
9744 goto done;
9745 } else {
9746 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009747 ctxt->progressive = 1;
9748 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009749#ifdef DEBUG_PUSH
9750 xmlGenericError(xmlGenericErrorContext,
9751 "PP: entering START_TAG\n");
9752#endif
9753 }
9754 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009755 case XML_PARSER_PROLOG:
9756 SKIP_BLANKS;
9757 if (ctxt->input->buf == NULL)
9758 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9759 else
9760 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9761 if (avail < 2)
9762 goto done;
9763 cur = ctxt->input->cur[0];
9764 next = ctxt->input->cur[1];
9765 if ((cur == '<') && (next == '?')) {
9766 if ((!terminate) &&
9767 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9768 goto done;
9769#ifdef DEBUG_PUSH
9770 xmlGenericError(xmlGenericErrorContext,
9771 "PP: Parsing PI\n");
9772#endif
9773 xmlParsePI(ctxt);
9774 } else if ((cur == '<') && (next == '!') &&
9775 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9776 if ((!terminate) &&
9777 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9778 goto done;
9779#ifdef DEBUG_PUSH
9780 xmlGenericError(xmlGenericErrorContext,
9781 "PP: Parsing Comment\n");
9782#endif
9783 xmlParseComment(ctxt);
9784 ctxt->instate = XML_PARSER_PROLOG;
9785 } else if ((cur == '<') && (next == '!') &&
9786 (avail < 4)) {
9787 goto done;
9788 } else {
9789 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009790 ctxt->progressive = 1;
9791 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: entering START_TAG\n");
9795#endif
9796 }
9797 break;
9798 case XML_PARSER_EPILOG:
9799 SKIP_BLANKS;
9800 if (ctxt->input->buf == NULL)
9801 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9802 else
9803 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9804 if (avail < 2)
9805 goto done;
9806 cur = ctxt->input->cur[0];
9807 next = ctxt->input->cur[1];
9808 if ((cur == '<') && (next == '?')) {
9809 if ((!terminate) &&
9810 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9811 goto done;
9812#ifdef DEBUG_PUSH
9813 xmlGenericError(xmlGenericErrorContext,
9814 "PP: Parsing PI\n");
9815#endif
9816 xmlParsePI(ctxt);
9817 ctxt->instate = XML_PARSER_EPILOG;
9818 } else if ((cur == '<') && (next == '!') &&
9819 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9820 if ((!terminate) &&
9821 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9822 goto done;
9823#ifdef DEBUG_PUSH
9824 xmlGenericError(xmlGenericErrorContext,
9825 "PP: Parsing Comment\n");
9826#endif
9827 xmlParseComment(ctxt);
9828 ctxt->instate = XML_PARSER_EPILOG;
9829 } else if ((cur == '<') && (next == '!') &&
9830 (avail < 4)) {
9831 goto done;
9832 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009833 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009834 ctxt->instate = XML_PARSER_EOF;
9835#ifdef DEBUG_PUSH
9836 xmlGenericError(xmlGenericErrorContext,
9837 "PP: entering EOF\n");
9838#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009839 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009840 ctxt->sax->endDocument(ctxt->userData);
9841 goto done;
9842 }
9843 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009844 case XML_PARSER_DTD: {
9845 /*
9846 * Sorry but progressive parsing of the internal subset
9847 * is not expected to be supported. We first check that
9848 * the full content of the internal subset is available and
9849 * the parsing is launched only at that point.
9850 * Internal subset ends up with "']' S? '>'" in an unescaped
9851 * section and not in a ']]>' sequence which are conditional
9852 * sections (whoever argued to keep that crap in XML deserve
9853 * a place in hell !).
9854 */
9855 int base, i;
9856 xmlChar *buf;
9857 xmlChar quote = 0;
9858
9859 base = ctxt->input->cur - ctxt->input->base;
9860 if (base < 0) return(0);
9861 if (ctxt->checkIndex > base)
9862 base = ctxt->checkIndex;
9863 buf = ctxt->input->buf->buffer->content;
9864 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9865 base++) {
9866 if (quote != 0) {
9867 if (buf[base] == quote)
9868 quote = 0;
9869 continue;
9870 }
9871 if (buf[base] == '"') {
9872 quote = '"';
9873 continue;
9874 }
9875 if (buf[base] == '\'') {
9876 quote = '\'';
9877 continue;
9878 }
9879 if (buf[base] == ']') {
9880 if ((unsigned int) base +1 >=
9881 ctxt->input->buf->buffer->use)
9882 break;
9883 if (buf[base + 1] == ']') {
9884 /* conditional crap, skip both ']' ! */
9885 base++;
9886 continue;
9887 }
9888 for (i = 0;
9889 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9890 i++) {
9891 if (buf[base + i] == '>')
9892 goto found_end_int_subset;
9893 }
9894 break;
9895 }
9896 }
9897 /*
9898 * We didn't found the end of the Internal subset
9899 */
9900 if (quote == 0)
9901 ctxt->checkIndex = base;
9902#ifdef DEBUG_PUSH
9903 if (next == 0)
9904 xmlGenericError(xmlGenericErrorContext,
9905 "PP: lookup of int subset end filed\n");
9906#endif
9907 goto done;
9908
9909found_end_int_subset:
9910 xmlParseInternalSubset(ctxt);
9911 ctxt->inSubset = 2;
9912 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9913 (ctxt->sax->externalSubset != NULL))
9914 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9915 ctxt->extSubSystem, ctxt->extSubURI);
9916 ctxt->inSubset = 0;
9917 ctxt->instate = XML_PARSER_PROLOG;
9918 ctxt->checkIndex = 0;
9919#ifdef DEBUG_PUSH
9920 xmlGenericError(xmlGenericErrorContext,
9921 "PP: entering PROLOG\n");
9922#endif
9923 break;
9924 }
9925 case XML_PARSER_COMMENT:
9926 xmlGenericError(xmlGenericErrorContext,
9927 "PP: internal error, state == COMMENT\n");
9928 ctxt->instate = XML_PARSER_CONTENT;
9929#ifdef DEBUG_PUSH
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: entering CONTENT\n");
9932#endif
9933 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009934 case XML_PARSER_IGNORE:
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: internal error, state == IGNORE");
9937 ctxt->instate = XML_PARSER_DTD;
9938#ifdef DEBUG_PUSH
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: entering DTD\n");
9941#endif
9942 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009943 case XML_PARSER_PI:
9944 xmlGenericError(xmlGenericErrorContext,
9945 "PP: internal error, state == PI\n");
9946 ctxt->instate = XML_PARSER_CONTENT;
9947#ifdef DEBUG_PUSH
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: entering CONTENT\n");
9950#endif
9951 break;
9952 case XML_PARSER_ENTITY_DECL:
9953 xmlGenericError(xmlGenericErrorContext,
9954 "PP: internal error, state == ENTITY_DECL\n");
9955 ctxt->instate = XML_PARSER_DTD;
9956#ifdef DEBUG_PUSH
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: entering DTD\n");
9959#endif
9960 break;
9961 case XML_PARSER_ENTITY_VALUE:
9962 xmlGenericError(xmlGenericErrorContext,
9963 "PP: internal error, state == ENTITY_VALUE\n");
9964 ctxt->instate = XML_PARSER_CONTENT;
9965#ifdef DEBUG_PUSH
9966 xmlGenericError(xmlGenericErrorContext,
9967 "PP: entering DTD\n");
9968#endif
9969 break;
9970 case XML_PARSER_ATTRIBUTE_VALUE:
9971 xmlGenericError(xmlGenericErrorContext,
9972 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9973 ctxt->instate = XML_PARSER_START_TAG;
9974#ifdef DEBUG_PUSH
9975 xmlGenericError(xmlGenericErrorContext,
9976 "PP: entering START_TAG\n");
9977#endif
9978 break;
9979 case XML_PARSER_SYSTEM_LITERAL:
9980 xmlGenericError(xmlGenericErrorContext,
9981 "PP: internal error, state == SYSTEM_LITERAL\n");
9982 ctxt->instate = XML_PARSER_START_TAG;
9983#ifdef DEBUG_PUSH
9984 xmlGenericError(xmlGenericErrorContext,
9985 "PP: entering START_TAG\n");
9986#endif
9987 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009988 case XML_PARSER_PUBLIC_LITERAL:
9989 xmlGenericError(xmlGenericErrorContext,
9990 "PP: internal error, state == PUBLIC_LITERAL\n");
9991 ctxt->instate = XML_PARSER_START_TAG;
9992#ifdef DEBUG_PUSH
9993 xmlGenericError(xmlGenericErrorContext,
9994 "PP: entering START_TAG\n");
9995#endif
9996 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009997 }
9998 }
9999done:
10000#ifdef DEBUG_PUSH
10001 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10002#endif
10003 return(ret);
10004}
10005
10006/**
Owen Taylor3473f882001-02-23 17:55:21 +000010007 * xmlParseChunk:
10008 * @ctxt: an XML parser context
10009 * @chunk: an char array
10010 * @size: the size in byte of the chunk
10011 * @terminate: last chunk indicator
10012 *
10013 * Parse a Chunk of memory
10014 *
10015 * Returns zero if no error, the xmlParserErrors otherwise.
10016 */
10017int
10018xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10019 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010020 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10021 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010022 if (ctxt->instate == XML_PARSER_START)
10023 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010024 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10025 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10026 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10027 int cur = ctxt->input->cur - ctxt->input->base;
10028
10029 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10030 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10031 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010032 ctxt->input->end =
10033 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010034#ifdef DEBUG_PUSH
10035 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10036#endif
10037
Owen Taylor3473f882001-02-23 17:55:21 +000010038 } else if (ctxt->instate != XML_PARSER_EOF) {
10039 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10040 xmlParserInputBufferPtr in = ctxt->input->buf;
10041 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10042 (in->raw != NULL)) {
10043 int nbchars;
10044
10045 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10046 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010047 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010048 xmlGenericError(xmlGenericErrorContext,
10049 "xmlParseChunk: encoder error\n");
10050 return(XML_ERR_INVALID_ENCODING);
10051 }
10052 }
10053 }
10054 }
10055 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010056 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10057 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 if (terminate) {
10059 /*
10060 * Check for termination
10061 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010062 int avail = 0;
10063 if (ctxt->input->buf == NULL)
10064 avail = ctxt->input->length -
10065 (ctxt->input->cur - ctxt->input->base);
10066 else
10067 avail = ctxt->input->buf->buffer->use -
10068 (ctxt->input->cur - ctxt->input->base);
10069
Owen Taylor3473f882001-02-23 17:55:21 +000010070 if ((ctxt->instate != XML_PARSER_EOF) &&
10071 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010072 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010073 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010074 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010075 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010076 }
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010078 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010079 ctxt->sax->endDocument(ctxt->userData);
10080 }
10081 ctxt->instate = XML_PARSER_EOF;
10082 }
10083 return((xmlParserErrors) ctxt->errNo);
10084}
10085
10086/************************************************************************
10087 * *
10088 * I/O front end functions to the parser *
10089 * *
10090 ************************************************************************/
10091
10092/**
10093 * xmlStopParser:
10094 * @ctxt: an XML parser context
10095 *
10096 * Blocks further parser processing
10097 */
10098void
10099xmlStopParser(xmlParserCtxtPtr ctxt) {
10100 ctxt->instate = XML_PARSER_EOF;
10101 if (ctxt->input != NULL)
10102 ctxt->input->cur = BAD_CAST"";
10103}
10104
10105/**
10106 * xmlCreatePushParserCtxt:
10107 * @sax: a SAX handler
10108 * @user_data: The user data returned on SAX callbacks
10109 * @chunk: a pointer to an array of chars
10110 * @size: number of chars in the array
10111 * @filename: an optional file name or URI
10112 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010113 * Create a parser context for using the XML parser in push mode.
10114 * If @buffer and @size are non-NULL, the data is used to detect
10115 * the encoding. The remaining characters will be parsed so they
10116 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010117 * To allow content encoding detection, @size should be >= 4
10118 * The value of @filename is used for fetching external entities
10119 * and error/warning reports.
10120 *
10121 * Returns the new parser context or NULL
10122 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010123
Owen Taylor3473f882001-02-23 17:55:21 +000010124xmlParserCtxtPtr
10125xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10126 const char *chunk, int size, const char *filename) {
10127 xmlParserCtxtPtr ctxt;
10128 xmlParserInputPtr inputStream;
10129 xmlParserInputBufferPtr buf;
10130 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10131
10132 /*
10133 * plug some encoding conversion routines
10134 */
10135 if ((chunk != NULL) && (size >= 4))
10136 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10137
10138 buf = xmlAllocParserInputBuffer(enc);
10139 if (buf == NULL) return(NULL);
10140
10141 ctxt = xmlNewParserCtxt();
10142 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010143 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010144 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010145 return(NULL);
10146 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010147 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10148 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010149 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010150 xmlFreeParserInputBuffer(buf);
10151 xmlFreeParserCtxt(ctxt);
10152 return(NULL);
10153 }
Owen Taylor3473f882001-02-23 17:55:21 +000010154 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010155#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010156 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010157#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010158 xmlFree(ctxt->sax);
10159 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10160 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010161 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010162 xmlFreeParserInputBuffer(buf);
10163 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010164 return(NULL);
10165 }
10166 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10167 if (user_data != NULL)
10168 ctxt->userData = user_data;
10169 }
10170 if (filename == NULL) {
10171 ctxt->directory = NULL;
10172 } else {
10173 ctxt->directory = xmlParserGetDirectory(filename);
10174 }
10175
10176 inputStream = xmlNewInputStream(ctxt);
10177 if (inputStream == NULL) {
10178 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010179 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010180 return(NULL);
10181 }
10182
10183 if (filename == NULL)
10184 inputStream->filename = NULL;
10185 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010186 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010187 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010188 inputStream->buf = buf;
10189 inputStream->base = inputStream->buf->buffer->content;
10190 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010191 inputStream->end =
10192 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010193
10194 inputPush(ctxt, inputStream);
10195
10196 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10197 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010198 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10199 int cur = ctxt->input->cur - ctxt->input->base;
10200
Owen Taylor3473f882001-02-23 17:55:21 +000010201 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010202
10203 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10204 ctxt->input->cur = ctxt->input->base + cur;
10205 ctxt->input->end =
10206 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010207#ifdef DEBUG_PUSH
10208 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10209#endif
10210 }
10211
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010212 if (enc != XML_CHAR_ENCODING_NONE) {
10213 xmlSwitchEncoding(ctxt, enc);
10214 }
10215
Owen Taylor3473f882001-02-23 17:55:21 +000010216 return(ctxt);
10217}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010218#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010219
10220/**
10221 * xmlCreateIOParserCtxt:
10222 * @sax: a SAX handler
10223 * @user_data: The user data returned on SAX callbacks
10224 * @ioread: an I/O read function
10225 * @ioclose: an I/O close function
10226 * @ioctx: an I/O handler
10227 * @enc: the charset encoding if known
10228 *
10229 * Create a parser context for using the XML parser with an existing
10230 * I/O stream
10231 *
10232 * Returns the new parser context or NULL
10233 */
10234xmlParserCtxtPtr
10235xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10236 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10237 void *ioctx, xmlCharEncoding enc) {
10238 xmlParserCtxtPtr ctxt;
10239 xmlParserInputPtr inputStream;
10240 xmlParserInputBufferPtr buf;
10241
10242 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10243 if (buf == NULL) return(NULL);
10244
10245 ctxt = xmlNewParserCtxt();
10246 if (ctxt == NULL) {
10247 xmlFree(buf);
10248 return(NULL);
10249 }
10250 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010251#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010252 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010253#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010254 xmlFree(ctxt->sax);
10255 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10256 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010257 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010258 xmlFree(ctxt);
10259 return(NULL);
10260 }
10261 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10262 if (user_data != NULL)
10263 ctxt->userData = user_data;
10264 }
10265
10266 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10267 if (inputStream == NULL) {
10268 xmlFreeParserCtxt(ctxt);
10269 return(NULL);
10270 }
10271 inputPush(ctxt, inputStream);
10272
10273 return(ctxt);
10274}
10275
Daniel Veillard4432df22003-09-28 18:58:27 +000010276#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010277/************************************************************************
10278 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010279 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010280 * *
10281 ************************************************************************/
10282
10283/**
10284 * xmlIOParseDTD:
10285 * @sax: the SAX handler block or NULL
10286 * @input: an Input Buffer
10287 * @enc: the charset encoding if known
10288 *
10289 * Load and parse a DTD
10290 *
10291 * Returns the resulting xmlDtdPtr or NULL in case of error.
10292 * @input will be freed at parsing end.
10293 */
10294
10295xmlDtdPtr
10296xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10297 xmlCharEncoding enc) {
10298 xmlDtdPtr ret = NULL;
10299 xmlParserCtxtPtr ctxt;
10300 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010301 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010302
10303 if (input == NULL)
10304 return(NULL);
10305
10306 ctxt = xmlNewParserCtxt();
10307 if (ctxt == NULL) {
10308 return(NULL);
10309 }
10310
10311 /*
10312 * Set-up the SAX context
10313 */
10314 if (sax != NULL) {
10315 if (ctxt->sax != NULL)
10316 xmlFree(ctxt->sax);
10317 ctxt->sax = sax;
10318 ctxt->userData = NULL;
10319 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010320 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010321
10322 /*
10323 * generate a parser input from the I/O handler
10324 */
10325
10326 pinput = xmlNewIOInputStream(ctxt, input, enc);
10327 if (pinput == NULL) {
10328 if (sax != NULL) ctxt->sax = NULL;
10329 xmlFreeParserCtxt(ctxt);
10330 return(NULL);
10331 }
10332
10333 /*
10334 * plug some encoding conversion routines here.
10335 */
10336 xmlPushInput(ctxt, pinput);
10337
10338 pinput->filename = NULL;
10339 pinput->line = 1;
10340 pinput->col = 1;
10341 pinput->base = ctxt->input->cur;
10342 pinput->cur = ctxt->input->cur;
10343 pinput->free = NULL;
10344
10345 /*
10346 * let's parse that entity knowing it's an external subset.
10347 */
10348 ctxt->inSubset = 2;
10349 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10350 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10351 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010352
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010353 if ((enc == XML_CHAR_ENCODING_NONE) &&
10354 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010355 /*
10356 * Get the 4 first bytes and decode the charset
10357 * if enc != XML_CHAR_ENCODING_NONE
10358 * plug some encoding conversion routines.
10359 */
10360 start[0] = RAW;
10361 start[1] = NXT(1);
10362 start[2] = NXT(2);
10363 start[3] = NXT(3);
10364 enc = xmlDetectCharEncoding(start, 4);
10365 if (enc != XML_CHAR_ENCODING_NONE) {
10366 xmlSwitchEncoding(ctxt, enc);
10367 }
10368 }
10369
Owen Taylor3473f882001-02-23 17:55:21 +000010370 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10371
10372 if (ctxt->myDoc != NULL) {
10373 if (ctxt->wellFormed) {
10374 ret = ctxt->myDoc->extSubset;
10375 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010376 if (ret != NULL) {
10377 xmlNodePtr tmp;
10378
10379 ret->doc = NULL;
10380 tmp = ret->children;
10381 while (tmp != NULL) {
10382 tmp->doc = NULL;
10383 tmp = tmp->next;
10384 }
10385 }
Owen Taylor3473f882001-02-23 17:55:21 +000010386 } else {
10387 ret = NULL;
10388 }
10389 xmlFreeDoc(ctxt->myDoc);
10390 ctxt->myDoc = NULL;
10391 }
10392 if (sax != NULL) ctxt->sax = NULL;
10393 xmlFreeParserCtxt(ctxt);
10394
10395 return(ret);
10396}
10397
10398/**
10399 * xmlSAXParseDTD:
10400 * @sax: the SAX handler block
10401 * @ExternalID: a NAME* containing the External ID of the DTD
10402 * @SystemID: a NAME* containing the URL to the DTD
10403 *
10404 * Load and parse an external subset.
10405 *
10406 * Returns the resulting xmlDtdPtr or NULL in case of error.
10407 */
10408
10409xmlDtdPtr
10410xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10411 const xmlChar *SystemID) {
10412 xmlDtdPtr ret = NULL;
10413 xmlParserCtxtPtr ctxt;
10414 xmlParserInputPtr input = NULL;
10415 xmlCharEncoding enc;
10416
10417 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10418
10419 ctxt = xmlNewParserCtxt();
10420 if (ctxt == NULL) {
10421 return(NULL);
10422 }
10423
10424 /*
10425 * Set-up the SAX context
10426 */
10427 if (sax != NULL) {
10428 if (ctxt->sax != NULL)
10429 xmlFree(ctxt->sax);
10430 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010431 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010432 }
10433
10434 /*
10435 * Ask the Entity resolver to load the damn thing
10436 */
10437
10438 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010439 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010440 if (input == NULL) {
10441 if (sax != NULL) ctxt->sax = NULL;
10442 xmlFreeParserCtxt(ctxt);
10443 return(NULL);
10444 }
10445
10446 /*
10447 * plug some encoding conversion routines here.
10448 */
10449 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010450 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10451 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10452 xmlSwitchEncoding(ctxt, enc);
10453 }
Owen Taylor3473f882001-02-23 17:55:21 +000010454
10455 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010456 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010457 input->line = 1;
10458 input->col = 1;
10459 input->base = ctxt->input->cur;
10460 input->cur = ctxt->input->cur;
10461 input->free = NULL;
10462
10463 /*
10464 * let's parse that entity knowing it's an external subset.
10465 */
10466 ctxt->inSubset = 2;
10467 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10468 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10469 ExternalID, SystemID);
10470 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10471
10472 if (ctxt->myDoc != NULL) {
10473 if (ctxt->wellFormed) {
10474 ret = ctxt->myDoc->extSubset;
10475 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010476 if (ret != NULL) {
10477 xmlNodePtr tmp;
10478
10479 ret->doc = NULL;
10480 tmp = ret->children;
10481 while (tmp != NULL) {
10482 tmp->doc = NULL;
10483 tmp = tmp->next;
10484 }
10485 }
Owen Taylor3473f882001-02-23 17:55:21 +000010486 } else {
10487 ret = NULL;
10488 }
10489 xmlFreeDoc(ctxt->myDoc);
10490 ctxt->myDoc = NULL;
10491 }
10492 if (sax != NULL) ctxt->sax = NULL;
10493 xmlFreeParserCtxt(ctxt);
10494
10495 return(ret);
10496}
10497
Daniel Veillard4432df22003-09-28 18:58:27 +000010498
Owen Taylor3473f882001-02-23 17:55:21 +000010499/**
10500 * xmlParseDTD:
10501 * @ExternalID: a NAME* containing the External ID of the DTD
10502 * @SystemID: a NAME* containing the URL to the DTD
10503 *
10504 * Load and parse an external subset.
10505 *
10506 * Returns the resulting xmlDtdPtr or NULL in case of error.
10507 */
10508
10509xmlDtdPtr
10510xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10511 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10512}
Daniel Veillard4432df22003-09-28 18:58:27 +000010513#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010514
10515/************************************************************************
10516 * *
10517 * Front ends when parsing an Entity *
10518 * *
10519 ************************************************************************/
10520
10521/**
Owen Taylor3473f882001-02-23 17:55:21 +000010522 * xmlParseCtxtExternalEntity:
10523 * @ctx: the existing parsing context
10524 * @URL: the URL for the entity to load
10525 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010526 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010527 *
10528 * Parse an external general entity within an existing parsing context
10529 * An external general parsed entity is well-formed if it matches the
10530 * production labeled extParsedEnt.
10531 *
10532 * [78] extParsedEnt ::= TextDecl? content
10533 *
10534 * Returns 0 if the entity is well formed, -1 in case of args problem and
10535 * the parser error code otherwise
10536 */
10537
10538int
10539xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010540 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010541 xmlParserCtxtPtr ctxt;
10542 xmlDocPtr newDoc;
10543 xmlSAXHandlerPtr oldsax = NULL;
10544 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010545 xmlChar start[4];
10546 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010547
10548 if (ctx->depth > 40) {
10549 return(XML_ERR_ENTITY_LOOP);
10550 }
10551
Daniel Veillardcda96922001-08-21 10:56:31 +000010552 if (lst != NULL)
10553 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010554 if ((URL == NULL) && (ID == NULL))
10555 return(-1);
10556 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10557 return(-1);
10558
10559
10560 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10561 if (ctxt == NULL) return(-1);
10562 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010563 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010564 oldsax = ctxt->sax;
10565 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010566 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010567 newDoc = xmlNewDoc(BAD_CAST "1.0");
10568 if (newDoc == NULL) {
10569 xmlFreeParserCtxt(ctxt);
10570 return(-1);
10571 }
10572 if (ctx->myDoc != NULL) {
10573 newDoc->intSubset = ctx->myDoc->intSubset;
10574 newDoc->extSubset = ctx->myDoc->extSubset;
10575 }
10576 if (ctx->myDoc->URL != NULL) {
10577 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10578 }
10579 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10580 if (newDoc->children == NULL) {
10581 ctxt->sax = oldsax;
10582 xmlFreeParserCtxt(ctxt);
10583 newDoc->intSubset = NULL;
10584 newDoc->extSubset = NULL;
10585 xmlFreeDoc(newDoc);
10586 return(-1);
10587 }
10588 nodePush(ctxt, newDoc->children);
10589 if (ctx->myDoc == NULL) {
10590 ctxt->myDoc = newDoc;
10591 } else {
10592 ctxt->myDoc = ctx->myDoc;
10593 newDoc->children->doc = ctx->myDoc;
10594 }
10595
Daniel Veillard87a764e2001-06-20 17:41:10 +000010596 /*
10597 * Get the 4 first bytes and decode the charset
10598 * if enc != XML_CHAR_ENCODING_NONE
10599 * plug some encoding conversion routines.
10600 */
10601 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010602 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10603 start[0] = RAW;
10604 start[1] = NXT(1);
10605 start[2] = NXT(2);
10606 start[3] = NXT(3);
10607 enc = xmlDetectCharEncoding(start, 4);
10608 if (enc != XML_CHAR_ENCODING_NONE) {
10609 xmlSwitchEncoding(ctxt, enc);
10610 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010611 }
10612
Owen Taylor3473f882001-02-23 17:55:21 +000010613 /*
10614 * Parse a possible text declaration first
10615 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010616 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010617 xmlParseTextDecl(ctxt);
10618 }
10619
10620 /*
10621 * Doing validity checking on chunk doesn't make sense
10622 */
10623 ctxt->instate = XML_PARSER_CONTENT;
10624 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010625 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010626 ctxt->loadsubset = ctx->loadsubset;
10627 ctxt->depth = ctx->depth + 1;
10628 ctxt->replaceEntities = ctx->replaceEntities;
10629 if (ctxt->validate) {
10630 ctxt->vctxt.error = ctx->vctxt.error;
10631 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010632 } else {
10633 ctxt->vctxt.error = NULL;
10634 ctxt->vctxt.warning = NULL;
10635 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010636 ctxt->vctxt.nodeTab = NULL;
10637 ctxt->vctxt.nodeNr = 0;
10638 ctxt->vctxt.nodeMax = 0;
10639 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010640
10641 xmlParseContent(ctxt);
10642
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010643 ctx->validate = ctxt->validate;
10644 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010645 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010647 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010648 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010649 }
10650 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010651 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010652 }
10653
10654 if (!ctxt->wellFormed) {
10655 if (ctxt->errNo == 0)
10656 ret = 1;
10657 else
10658 ret = ctxt->errNo;
10659 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010660 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010661 xmlNodePtr cur;
10662
10663 /*
10664 * Return the newly created nodeset after unlinking it from
10665 * they pseudo parent.
10666 */
10667 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010668 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010669 while (cur != NULL) {
10670 cur->parent = NULL;
10671 cur = cur->next;
10672 }
10673 newDoc->children->children = NULL;
10674 }
10675 ret = 0;
10676 }
10677 ctxt->sax = oldsax;
10678 xmlFreeParserCtxt(ctxt);
10679 newDoc->intSubset = NULL;
10680 newDoc->extSubset = NULL;
10681 xmlFreeDoc(newDoc);
10682
10683 return(ret);
10684}
10685
10686/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010687 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010688 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010689 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010690 * @sax: the SAX handler bloc (possibly NULL)
10691 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10692 * @depth: Used for loop detection, use 0
10693 * @URL: the URL for the entity to load
10694 * @ID: the System ID for the entity to load
10695 * @list: the return value for the set of parsed nodes
10696 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010697 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010698 *
10699 * Returns 0 if the entity is well formed, -1 in case of args problem and
10700 * the parser error code otherwise
10701 */
10702
Daniel Veillard7d515752003-09-26 19:12:37 +000010703static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010704xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10705 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010706 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010707 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010708 xmlParserCtxtPtr ctxt;
10709 xmlDocPtr newDoc;
10710 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010711 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010712 xmlChar start[4];
10713 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010714
10715 if (depth > 40) {
10716 return(XML_ERR_ENTITY_LOOP);
10717 }
10718
10719
10720
10721 if (list != NULL)
10722 *list = NULL;
10723 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010724 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010726 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010727
10728
10729 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010730 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010731 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010732 if (oldctxt != NULL) {
10733 ctxt->_private = oldctxt->_private;
10734 ctxt->loadsubset = oldctxt->loadsubset;
10735 ctxt->validate = oldctxt->validate;
10736 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010737 ctxt->record_info = oldctxt->record_info;
10738 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10739 ctxt->node_seq.length = oldctxt->node_seq.length;
10740 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010741 } else {
10742 /*
10743 * Doing validity checking on chunk without context
10744 * doesn't make sense
10745 */
10746 ctxt->_private = NULL;
10747 ctxt->validate = 0;
10748 ctxt->external = 2;
10749 ctxt->loadsubset = 0;
10750 }
Owen Taylor3473f882001-02-23 17:55:21 +000010751 if (sax != NULL) {
10752 oldsax = ctxt->sax;
10753 ctxt->sax = sax;
10754 if (user_data != NULL)
10755 ctxt->userData = user_data;
10756 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010757 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010758 newDoc = xmlNewDoc(BAD_CAST "1.0");
10759 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010760 ctxt->node_seq.maximum = 0;
10761 ctxt->node_seq.length = 0;
10762 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010763 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010764 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010765 }
10766 if (doc != NULL) {
10767 newDoc->intSubset = doc->intSubset;
10768 newDoc->extSubset = doc->extSubset;
10769 }
10770 if (doc->URL != NULL) {
10771 newDoc->URL = xmlStrdup(doc->URL);
10772 }
10773 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10774 if (newDoc->children == NULL) {
10775 if (sax != NULL)
10776 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010777 ctxt->node_seq.maximum = 0;
10778 ctxt->node_seq.length = 0;
10779 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010780 xmlFreeParserCtxt(ctxt);
10781 newDoc->intSubset = NULL;
10782 newDoc->extSubset = NULL;
10783 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010784 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010785 }
10786 nodePush(ctxt, newDoc->children);
10787 if (doc == NULL) {
10788 ctxt->myDoc = newDoc;
10789 } else {
10790 ctxt->myDoc = doc;
10791 newDoc->children->doc = doc;
10792 }
10793
Daniel Veillard87a764e2001-06-20 17:41:10 +000010794 /*
10795 * Get the 4 first bytes and decode the charset
10796 * if enc != XML_CHAR_ENCODING_NONE
10797 * plug some encoding conversion routines.
10798 */
10799 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010800 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10801 start[0] = RAW;
10802 start[1] = NXT(1);
10803 start[2] = NXT(2);
10804 start[3] = NXT(3);
10805 enc = xmlDetectCharEncoding(start, 4);
10806 if (enc != XML_CHAR_ENCODING_NONE) {
10807 xmlSwitchEncoding(ctxt, enc);
10808 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010809 }
10810
Owen Taylor3473f882001-02-23 17:55:21 +000010811 /*
10812 * Parse a possible text declaration first
10813 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010814 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010815 xmlParseTextDecl(ctxt);
10816 }
10817
Owen Taylor3473f882001-02-23 17:55:21 +000010818 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010819 ctxt->depth = depth;
10820
10821 xmlParseContent(ctxt);
10822
Daniel Veillard561b7f82002-03-20 21:55:57 +000010823 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010824 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010825 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010826 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010827 }
10828 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010829 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010830 }
10831
10832 if (!ctxt->wellFormed) {
10833 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010834 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010835 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010836 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010837 } else {
10838 if (list != NULL) {
10839 xmlNodePtr cur;
10840
10841 /*
10842 * Return the newly created nodeset after unlinking it from
10843 * they pseudo parent.
10844 */
10845 cur = newDoc->children->children;
10846 *list = cur;
10847 while (cur != NULL) {
10848 cur->parent = NULL;
10849 cur = cur->next;
10850 }
10851 newDoc->children->children = NULL;
10852 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010853 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010854 }
10855 if (sax != NULL)
10856 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010857 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10858 oldctxt->node_seq.length = ctxt->node_seq.length;
10859 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010860 ctxt->node_seq.maximum = 0;
10861 ctxt->node_seq.length = 0;
10862 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010863 xmlFreeParserCtxt(ctxt);
10864 newDoc->intSubset = NULL;
10865 newDoc->extSubset = NULL;
10866 xmlFreeDoc(newDoc);
10867
10868 return(ret);
10869}
10870
Daniel Veillard81273902003-09-30 00:43:48 +000010871#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010872/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010873 * xmlParseExternalEntity:
10874 * @doc: the document the chunk pertains to
10875 * @sax: the SAX handler bloc (possibly NULL)
10876 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10877 * @depth: Used for loop detection, use 0
10878 * @URL: the URL for the entity to load
10879 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010880 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010881 *
10882 * Parse an external general entity
10883 * An external general parsed entity is well-formed if it matches the
10884 * production labeled extParsedEnt.
10885 *
10886 * [78] extParsedEnt ::= TextDecl? content
10887 *
10888 * Returns 0 if the entity is well formed, -1 in case of args problem and
10889 * the parser error code otherwise
10890 */
10891
10892int
10893xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010894 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010895 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010896 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010897}
10898
10899/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010900 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010901 * @doc: the document the chunk pertains to
10902 * @sax: the SAX handler bloc (possibly NULL)
10903 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10904 * @depth: Used for loop detection, use 0
10905 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010906 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010907 *
10908 * Parse a well-balanced chunk of an XML document
10909 * called by the parser
10910 * The allowed sequence for the Well Balanced Chunk is the one defined by
10911 * the content production in the XML grammar:
10912 *
10913 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10914 *
10915 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10916 * the parser error code otherwise
10917 */
10918
10919int
10920xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010921 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010922 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10923 depth, string, lst, 0 );
10924}
Daniel Veillard81273902003-09-30 00:43:48 +000010925#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010926
10927/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010928 * xmlParseBalancedChunkMemoryInternal:
10929 * @oldctxt: the existing parsing context
10930 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10931 * @user_data: the user data field for the parser context
10932 * @lst: the return value for the set of parsed nodes
10933 *
10934 *
10935 * Parse a well-balanced chunk of an XML document
10936 * called by the parser
10937 * The allowed sequence for the Well Balanced Chunk is the one defined by
10938 * the content production in the XML grammar:
10939 *
10940 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10941 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010942 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10943 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010944 *
10945 * In case recover is set to 1, the nodelist will not be empty even if
10946 * the parsed chunk is not well balanced.
10947 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010948static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010949xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10950 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10951 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010952 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010953 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010954 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010955 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010956 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010957
10958 if (oldctxt->depth > 40) {
10959 return(XML_ERR_ENTITY_LOOP);
10960 }
10961
10962
10963 if (lst != NULL)
10964 *lst = NULL;
10965 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010966 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010967
10968 size = xmlStrlen(string);
10969
10970 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010971 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010972 if (user_data != NULL)
10973 ctxt->userData = user_data;
10974 else
10975 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010976 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10977 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010978
10979 oldsax = ctxt->sax;
10980 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010981 xmlDetectSAX2(ctxt);
10982
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010983 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010984 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010985 newDoc = xmlNewDoc(BAD_CAST "1.0");
10986 if (newDoc == NULL) {
10987 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010988 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010989 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010990 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010991 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010992 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010993 } else {
10994 ctxt->myDoc = oldctxt->myDoc;
10995 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010996 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010997 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010998 BAD_CAST "pseudoroot", NULL);
10999 if (ctxt->myDoc->children == NULL) {
11000 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011001 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011002 xmlFreeParserCtxt(ctxt);
11003 if (newDoc != NULL)
11004 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011005 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011006 }
11007 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011008 ctxt->instate = XML_PARSER_CONTENT;
11009 ctxt->depth = oldctxt->depth + 1;
11010
Daniel Veillard328f48c2002-11-15 15:24:34 +000011011 ctxt->validate = 0;
11012 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011013 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11014 /*
11015 * ID/IDREF registration will be done in xmlValidateElement below
11016 */
11017 ctxt->loadsubset |= XML_SKIP_IDS;
11018 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011019 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011020
Daniel Veillard68e9e742002-11-16 15:35:11 +000011021 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011022 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011023 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011024 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011025 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011026 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011027 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011028 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011029 }
11030
11031 if (!ctxt->wellFormed) {
11032 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011033 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011034 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011035 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011036 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011037 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011038 }
11039
William M. Brack7b9154b2003-09-27 19:23:50 +000011040 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011041 xmlNodePtr cur;
11042
11043 /*
11044 * Return the newly created nodeset after unlinking it from
11045 * they pseudo parent.
11046 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011047 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011048 *lst = cur;
11049 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011050#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011051 if (oldctxt->validate && oldctxt->wellFormed &&
11052 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11053 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11054 oldctxt->myDoc, cur);
11055 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011056#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011057 cur->parent = NULL;
11058 cur = cur->next;
11059 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011060 ctxt->myDoc->children->children = NULL;
11061 }
11062 if (ctxt->myDoc != NULL) {
11063 xmlFreeNode(ctxt->myDoc->children);
11064 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011065 }
11066
11067 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011068 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011069 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011070 if (newDoc != NULL)
11071 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011072
11073 return(ret);
11074}
11075
Daniel Veillard81273902003-09-30 00:43:48 +000011076#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011077/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011078 * xmlParseBalancedChunkMemoryRecover:
11079 * @doc: the document the chunk pertains to
11080 * @sax: the SAX handler bloc (possibly NULL)
11081 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11082 * @depth: Used for loop detection, use 0
11083 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11084 * @lst: the return value for the set of parsed nodes
11085 * @recover: return nodes even if the data is broken (use 0)
11086 *
11087 *
11088 * Parse a well-balanced chunk of an XML document
11089 * called by the parser
11090 * The allowed sequence for the Well Balanced Chunk is the one defined by
11091 * the content production in the XML grammar:
11092 *
11093 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11094 *
11095 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11096 * the parser error code otherwise
11097 *
11098 * In case recover is set to 1, the nodelist will not be empty even if
11099 * the parsed chunk is not well balanced.
11100 */
11101int
11102xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11103 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11104 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011105 xmlParserCtxtPtr ctxt;
11106 xmlDocPtr newDoc;
11107 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011108 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011109 int size;
11110 int ret = 0;
11111
11112 if (depth > 40) {
11113 return(XML_ERR_ENTITY_LOOP);
11114 }
11115
11116
Daniel Veillardcda96922001-08-21 10:56:31 +000011117 if (lst != NULL)
11118 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011119 if (string == NULL)
11120 return(-1);
11121
11122 size = xmlStrlen(string);
11123
11124 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11125 if (ctxt == NULL) return(-1);
11126 ctxt->userData = ctxt;
11127 if (sax != NULL) {
11128 oldsax = ctxt->sax;
11129 ctxt->sax = sax;
11130 if (user_data != NULL)
11131 ctxt->userData = user_data;
11132 }
11133 newDoc = xmlNewDoc(BAD_CAST "1.0");
11134 if (newDoc == NULL) {
11135 xmlFreeParserCtxt(ctxt);
11136 return(-1);
11137 }
11138 if (doc != NULL) {
11139 newDoc->intSubset = doc->intSubset;
11140 newDoc->extSubset = doc->extSubset;
11141 }
11142 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11143 if (newDoc->children == NULL) {
11144 if (sax != NULL)
11145 ctxt->sax = oldsax;
11146 xmlFreeParserCtxt(ctxt);
11147 newDoc->intSubset = NULL;
11148 newDoc->extSubset = NULL;
11149 xmlFreeDoc(newDoc);
11150 return(-1);
11151 }
11152 nodePush(ctxt, newDoc->children);
11153 if (doc == NULL) {
11154 ctxt->myDoc = newDoc;
11155 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011156 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011157 newDoc->children->doc = doc;
11158 }
11159 ctxt->instate = XML_PARSER_CONTENT;
11160 ctxt->depth = depth;
11161
11162 /*
11163 * Doing validity checking on chunk doesn't make sense
11164 */
11165 ctxt->validate = 0;
11166 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011167 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011168
Daniel Veillardb39bc392002-10-26 19:29:51 +000011169 if ( doc != NULL ){
11170 content = doc->children;
11171 doc->children = NULL;
11172 xmlParseContent(ctxt);
11173 doc->children = content;
11174 }
11175 else {
11176 xmlParseContent(ctxt);
11177 }
Owen Taylor3473f882001-02-23 17:55:21 +000011178 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011179 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011180 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011181 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011182 }
11183 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011184 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011185 }
11186
11187 if (!ctxt->wellFormed) {
11188 if (ctxt->errNo == 0)
11189 ret = 1;
11190 else
11191 ret = ctxt->errNo;
11192 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011193 ret = 0;
11194 }
11195
11196 if (lst != NULL && (ret == 0 || recover == 1)) {
11197 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011198
11199 /*
11200 * Return the newly created nodeset after unlinking it from
11201 * they pseudo parent.
11202 */
11203 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011204 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011205 while (cur != NULL) {
11206 cur->parent = NULL;
11207 cur = cur->next;
11208 }
11209 newDoc->children->children = NULL;
11210 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011211
Owen Taylor3473f882001-02-23 17:55:21 +000011212 if (sax != NULL)
11213 ctxt->sax = oldsax;
11214 xmlFreeParserCtxt(ctxt);
11215 newDoc->intSubset = NULL;
11216 newDoc->extSubset = NULL;
11217 xmlFreeDoc(newDoc);
11218
11219 return(ret);
11220}
11221
11222/**
11223 * xmlSAXParseEntity:
11224 * @sax: the SAX handler block
11225 * @filename: the filename
11226 *
11227 * parse an XML external entity out of context and build a tree.
11228 * It use the given SAX function block to handle the parsing callback.
11229 * If sax is NULL, fallback to the default DOM tree building routines.
11230 *
11231 * [78] extParsedEnt ::= TextDecl? content
11232 *
11233 * This correspond to a "Well Balanced" chunk
11234 *
11235 * Returns the resulting document tree
11236 */
11237
11238xmlDocPtr
11239xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11240 xmlDocPtr ret;
11241 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011242
11243 ctxt = xmlCreateFileParserCtxt(filename);
11244 if (ctxt == NULL) {
11245 return(NULL);
11246 }
11247 if (sax != NULL) {
11248 if (ctxt->sax != NULL)
11249 xmlFree(ctxt->sax);
11250 ctxt->sax = sax;
11251 ctxt->userData = NULL;
11252 }
11253
Owen Taylor3473f882001-02-23 17:55:21 +000011254 xmlParseExtParsedEnt(ctxt);
11255
11256 if (ctxt->wellFormed)
11257 ret = ctxt->myDoc;
11258 else {
11259 ret = NULL;
11260 xmlFreeDoc(ctxt->myDoc);
11261 ctxt->myDoc = NULL;
11262 }
11263 if (sax != NULL)
11264 ctxt->sax = NULL;
11265 xmlFreeParserCtxt(ctxt);
11266
11267 return(ret);
11268}
11269
11270/**
11271 * xmlParseEntity:
11272 * @filename: the filename
11273 *
11274 * parse an XML external entity out of context and build a tree.
11275 *
11276 * [78] extParsedEnt ::= TextDecl? content
11277 *
11278 * This correspond to a "Well Balanced" chunk
11279 *
11280 * Returns the resulting document tree
11281 */
11282
11283xmlDocPtr
11284xmlParseEntity(const char *filename) {
11285 return(xmlSAXParseEntity(NULL, filename));
11286}
Daniel Veillard81273902003-09-30 00:43:48 +000011287#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011288
11289/**
11290 * xmlCreateEntityParserCtxt:
11291 * @URL: the entity URL
11292 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011293 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011294 *
11295 * Create a parser context for an external entity
11296 * Automatic support for ZLIB/Compress compressed document is provided
11297 * by default if found at compile-time.
11298 *
11299 * Returns the new parser context or NULL
11300 */
11301xmlParserCtxtPtr
11302xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11303 const xmlChar *base) {
11304 xmlParserCtxtPtr ctxt;
11305 xmlParserInputPtr inputStream;
11306 char *directory = NULL;
11307 xmlChar *uri;
11308
11309 ctxt = xmlNewParserCtxt();
11310 if (ctxt == NULL) {
11311 return(NULL);
11312 }
11313
11314 uri = xmlBuildURI(URL, base);
11315
11316 if (uri == NULL) {
11317 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11318 if (inputStream == NULL) {
11319 xmlFreeParserCtxt(ctxt);
11320 return(NULL);
11321 }
11322
11323 inputPush(ctxt, inputStream);
11324
11325 if ((ctxt->directory == NULL) && (directory == NULL))
11326 directory = xmlParserGetDirectory((char *)URL);
11327 if ((ctxt->directory == NULL) && (directory != NULL))
11328 ctxt->directory = directory;
11329 } else {
11330 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11331 if (inputStream == NULL) {
11332 xmlFree(uri);
11333 xmlFreeParserCtxt(ctxt);
11334 return(NULL);
11335 }
11336
11337 inputPush(ctxt, inputStream);
11338
11339 if ((ctxt->directory == NULL) && (directory == NULL))
11340 directory = xmlParserGetDirectory((char *)uri);
11341 if ((ctxt->directory == NULL) && (directory != NULL))
11342 ctxt->directory = directory;
11343 xmlFree(uri);
11344 }
Owen Taylor3473f882001-02-23 17:55:21 +000011345 return(ctxt);
11346}
11347
11348/************************************************************************
11349 * *
11350 * Front ends when parsing from a file *
11351 * *
11352 ************************************************************************/
11353
11354/**
11355 * xmlCreateFileParserCtxt:
11356 * @filename: the filename
11357 *
11358 * Create a parser context for a file content.
11359 * Automatic support for ZLIB/Compress compressed document is provided
11360 * by default if found at compile-time.
11361 *
11362 * Returns the new parser context or NULL
11363 */
11364xmlParserCtxtPtr
11365xmlCreateFileParserCtxt(const char *filename)
11366{
11367 xmlParserCtxtPtr ctxt;
11368 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011369 char *directory = NULL;
11370
Owen Taylor3473f882001-02-23 17:55:21 +000011371 ctxt = xmlNewParserCtxt();
11372 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011373 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011374 return(NULL);
11375 }
11376
Igor Zlatkovicce076162003-02-23 13:39:39 +000011377
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011378 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011379 if (inputStream == NULL) {
11380 xmlFreeParserCtxt(ctxt);
11381 return(NULL);
11382 }
11383
Owen Taylor3473f882001-02-23 17:55:21 +000011384 inputPush(ctxt, inputStream);
11385 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011386 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011387 if ((ctxt->directory == NULL) && (directory != NULL))
11388 ctxt->directory = directory;
11389
11390 return(ctxt);
11391}
11392
Daniel Veillard81273902003-09-30 00:43:48 +000011393#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011394/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011395 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011396 * @sax: the SAX handler block
11397 * @filename: the filename
11398 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11399 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011400 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011401 *
11402 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11403 * compressed document is provided by default if found at compile-time.
11404 * It use the given SAX function block to handle the parsing callback.
11405 * If sax is NULL, fallback to the default DOM tree building routines.
11406 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011407 * User data (void *) is stored within the parser context in the
11408 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011409 *
Owen Taylor3473f882001-02-23 17:55:21 +000011410 * Returns the resulting document tree
11411 */
11412
11413xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011414xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11415 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011416 xmlDocPtr ret;
11417 xmlParserCtxtPtr ctxt;
11418 char *directory = NULL;
11419
Daniel Veillard635ef722001-10-29 11:48:19 +000011420 xmlInitParser();
11421
Owen Taylor3473f882001-02-23 17:55:21 +000011422 ctxt = xmlCreateFileParserCtxt(filename);
11423 if (ctxt == NULL) {
11424 return(NULL);
11425 }
11426 if (sax != NULL) {
11427 if (ctxt->sax != NULL)
11428 xmlFree(ctxt->sax);
11429 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011430 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011431 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011432 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011433 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011434 }
Owen Taylor3473f882001-02-23 17:55:21 +000011435
11436 if ((ctxt->directory == NULL) && (directory == NULL))
11437 directory = xmlParserGetDirectory(filename);
11438 if ((ctxt->directory == NULL) && (directory != NULL))
11439 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11440
Daniel Veillarddad3f682002-11-17 16:47:27 +000011441 ctxt->recovery = recovery;
11442
Owen Taylor3473f882001-02-23 17:55:21 +000011443 xmlParseDocument(ctxt);
11444
William M. Brackc07329e2003-09-08 01:57:30 +000011445 if ((ctxt->wellFormed) || recovery) {
11446 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011447 if (ret != NULL) {
11448 if (ctxt->input->buf->compressed > 0)
11449 ret->compression = 9;
11450 else
11451 ret->compression = ctxt->input->buf->compressed;
11452 }
William M. Brackc07329e2003-09-08 01:57:30 +000011453 }
Owen Taylor3473f882001-02-23 17:55:21 +000011454 else {
11455 ret = NULL;
11456 xmlFreeDoc(ctxt->myDoc);
11457 ctxt->myDoc = NULL;
11458 }
11459 if (sax != NULL)
11460 ctxt->sax = NULL;
11461 xmlFreeParserCtxt(ctxt);
11462
11463 return(ret);
11464}
11465
11466/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011467 * xmlSAXParseFile:
11468 * @sax: the SAX handler block
11469 * @filename: the filename
11470 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11471 * documents
11472 *
11473 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11474 * compressed document is provided by default if found at compile-time.
11475 * It use the given SAX function block to handle the parsing callback.
11476 * If sax is NULL, fallback to the default DOM tree building routines.
11477 *
11478 * Returns the resulting document tree
11479 */
11480
11481xmlDocPtr
11482xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11483 int recovery) {
11484 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11485}
11486
11487/**
Owen Taylor3473f882001-02-23 17:55:21 +000011488 * xmlRecoverDoc:
11489 * @cur: a pointer to an array of xmlChar
11490 *
11491 * parse an XML in-memory document and build a tree.
11492 * In the case the document is not Well Formed, a tree is built anyway
11493 *
11494 * Returns the resulting document tree
11495 */
11496
11497xmlDocPtr
11498xmlRecoverDoc(xmlChar *cur) {
11499 return(xmlSAXParseDoc(NULL, cur, 1));
11500}
11501
11502/**
11503 * xmlParseFile:
11504 * @filename: the filename
11505 *
11506 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11507 * compressed document is provided by default if found at compile-time.
11508 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011509 * Returns the resulting document tree if the file was wellformed,
11510 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011511 */
11512
11513xmlDocPtr
11514xmlParseFile(const char *filename) {
11515 return(xmlSAXParseFile(NULL, filename, 0));
11516}
11517
11518/**
11519 * xmlRecoverFile:
11520 * @filename: the filename
11521 *
11522 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11523 * compressed document is provided by default if found at compile-time.
11524 * In the case the document is not Well Formed, a tree is built anyway
11525 *
11526 * Returns the resulting document tree
11527 */
11528
11529xmlDocPtr
11530xmlRecoverFile(const char *filename) {
11531 return(xmlSAXParseFile(NULL, filename, 1));
11532}
11533
11534
11535/**
11536 * xmlSetupParserForBuffer:
11537 * @ctxt: an XML parser context
11538 * @buffer: a xmlChar * buffer
11539 * @filename: a file name
11540 *
11541 * Setup the parser context to parse a new buffer; Clears any prior
11542 * contents from the parser context. The buffer parameter must not be
11543 * NULL, but the filename parameter can be
11544 */
11545void
11546xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11547 const char* filename)
11548{
11549 xmlParserInputPtr input;
11550
11551 input = xmlNewInputStream(ctxt);
11552 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011553 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011554 xmlFree(ctxt);
11555 return;
11556 }
11557
11558 xmlClearParserCtxt(ctxt);
11559 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011560 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011561 input->base = buffer;
11562 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011563 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011564 inputPush(ctxt, input);
11565}
11566
11567/**
11568 * xmlSAXUserParseFile:
11569 * @sax: a SAX handler
11570 * @user_data: The user data returned on SAX callbacks
11571 * @filename: a file name
11572 *
11573 * parse an XML file and call the given SAX handler routines.
11574 * Automatic support for ZLIB/Compress compressed document is provided
11575 *
11576 * Returns 0 in case of success or a error number otherwise
11577 */
11578int
11579xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11580 const char *filename) {
11581 int ret = 0;
11582 xmlParserCtxtPtr ctxt;
11583
11584 ctxt = xmlCreateFileParserCtxt(filename);
11585 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011586#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011587 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011588#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011589 xmlFree(ctxt->sax);
11590 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011591 xmlDetectSAX2(ctxt);
11592
Owen Taylor3473f882001-02-23 17:55:21 +000011593 if (user_data != NULL)
11594 ctxt->userData = user_data;
11595
11596 xmlParseDocument(ctxt);
11597
11598 if (ctxt->wellFormed)
11599 ret = 0;
11600 else {
11601 if (ctxt->errNo != 0)
11602 ret = ctxt->errNo;
11603 else
11604 ret = -1;
11605 }
11606 if (sax != NULL)
11607 ctxt->sax = NULL;
11608 xmlFreeParserCtxt(ctxt);
11609
11610 return ret;
11611}
Daniel Veillard81273902003-09-30 00:43:48 +000011612#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011613
11614/************************************************************************
11615 * *
11616 * Front ends when parsing from memory *
11617 * *
11618 ************************************************************************/
11619
11620/**
11621 * xmlCreateMemoryParserCtxt:
11622 * @buffer: a pointer to a char array
11623 * @size: the size of the array
11624 *
11625 * Create a parser context for an XML in-memory document.
11626 *
11627 * Returns the new parser context or NULL
11628 */
11629xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011630xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011631 xmlParserCtxtPtr ctxt;
11632 xmlParserInputPtr input;
11633 xmlParserInputBufferPtr buf;
11634
11635 if (buffer == NULL)
11636 return(NULL);
11637 if (size <= 0)
11638 return(NULL);
11639
11640 ctxt = xmlNewParserCtxt();
11641 if (ctxt == NULL)
11642 return(NULL);
11643
Daniel Veillard53350552003-09-18 13:35:51 +000011644 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011645 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011646 if (buf == NULL) {
11647 xmlFreeParserCtxt(ctxt);
11648 return(NULL);
11649 }
Owen Taylor3473f882001-02-23 17:55:21 +000011650
11651 input = xmlNewInputStream(ctxt);
11652 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011653 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011654 xmlFreeParserCtxt(ctxt);
11655 return(NULL);
11656 }
11657
11658 input->filename = NULL;
11659 input->buf = buf;
11660 input->base = input->buf->buffer->content;
11661 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011662 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011663
11664 inputPush(ctxt, input);
11665 return(ctxt);
11666}
11667
Daniel Veillard81273902003-09-30 00:43:48 +000011668#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011669/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011670 * xmlSAXParseMemoryWithData:
11671 * @sax: the SAX handler block
11672 * @buffer: an pointer to a char array
11673 * @size: the size of the array
11674 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11675 * documents
11676 * @data: the userdata
11677 *
11678 * parse an XML in-memory block and use the given SAX function block
11679 * to handle the parsing callback. If sax is NULL, fallback to the default
11680 * DOM tree building routines.
11681 *
11682 * User data (void *) is stored within the parser context in the
11683 * context's _private member, so it is available nearly everywhere in libxml
11684 *
11685 * Returns the resulting document tree
11686 */
11687
11688xmlDocPtr
11689xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11690 int size, int recovery, void *data) {
11691 xmlDocPtr ret;
11692 xmlParserCtxtPtr ctxt;
11693
11694 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11695 if (ctxt == NULL) return(NULL);
11696 if (sax != NULL) {
11697 if (ctxt->sax != NULL)
11698 xmlFree(ctxt->sax);
11699 ctxt->sax = sax;
11700 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011701 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011702 if (data!=NULL) {
11703 ctxt->_private=data;
11704 }
11705
Daniel Veillardadba5f12003-04-04 16:09:01 +000011706 ctxt->recovery = recovery;
11707
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011708 xmlParseDocument(ctxt);
11709
11710 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11711 else {
11712 ret = NULL;
11713 xmlFreeDoc(ctxt->myDoc);
11714 ctxt->myDoc = NULL;
11715 }
11716 if (sax != NULL)
11717 ctxt->sax = NULL;
11718 xmlFreeParserCtxt(ctxt);
11719
11720 return(ret);
11721}
11722
11723/**
Owen Taylor3473f882001-02-23 17:55:21 +000011724 * xmlSAXParseMemory:
11725 * @sax: the SAX handler block
11726 * @buffer: an pointer to a char array
11727 * @size: the size of the array
11728 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11729 * documents
11730 *
11731 * parse an XML in-memory block and use the given SAX function block
11732 * to handle the parsing callback. If sax is NULL, fallback to the default
11733 * DOM tree building routines.
11734 *
11735 * Returns the resulting document tree
11736 */
11737xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011738xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11739 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011740 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011741}
11742
11743/**
11744 * xmlParseMemory:
11745 * @buffer: an pointer to a char array
11746 * @size: the size of the array
11747 *
11748 * parse an XML in-memory block and build a tree.
11749 *
11750 * Returns the resulting document tree
11751 */
11752
Daniel Veillard50822cb2001-07-26 20:05:51 +000011753xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011754 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11755}
11756
11757/**
11758 * xmlRecoverMemory:
11759 * @buffer: an pointer to a char array
11760 * @size: the size of the array
11761 *
11762 * parse an XML in-memory block and build a tree.
11763 * In the case the document is not Well Formed, a tree is built anyway
11764 *
11765 * Returns the resulting document tree
11766 */
11767
Daniel Veillard50822cb2001-07-26 20:05:51 +000011768xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011769 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11770}
11771
11772/**
11773 * xmlSAXUserParseMemory:
11774 * @sax: a SAX handler
11775 * @user_data: The user data returned on SAX callbacks
11776 * @buffer: an in-memory XML document input
11777 * @size: the length of the XML document in bytes
11778 *
11779 * A better SAX parsing routine.
11780 * parse an XML in-memory buffer and call the given SAX handler routines.
11781 *
11782 * Returns 0 in case of success or a error number otherwise
11783 */
11784int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011785 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011786 int ret = 0;
11787 xmlParserCtxtPtr ctxt;
11788 xmlSAXHandlerPtr oldsax = NULL;
11789
Daniel Veillard9e923512002-08-14 08:48:52 +000011790 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011791 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11792 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011793 oldsax = ctxt->sax;
11794 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011795 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011796 if (user_data != NULL)
11797 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011798
11799 xmlParseDocument(ctxt);
11800
11801 if (ctxt->wellFormed)
11802 ret = 0;
11803 else {
11804 if (ctxt->errNo != 0)
11805 ret = ctxt->errNo;
11806 else
11807 ret = -1;
11808 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011809 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011810 xmlFreeParserCtxt(ctxt);
11811
11812 return ret;
11813}
Daniel Veillard81273902003-09-30 00:43:48 +000011814#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011815
11816/**
11817 * xmlCreateDocParserCtxt:
11818 * @cur: a pointer to an array of xmlChar
11819 *
11820 * Creates a parser context for an XML in-memory document.
11821 *
11822 * Returns the new parser context or NULL
11823 */
11824xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011825xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011826 int len;
11827
11828 if (cur == NULL)
11829 return(NULL);
11830 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011831 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011832}
11833
Daniel Veillard81273902003-09-30 00:43:48 +000011834#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011835/**
11836 * xmlSAXParseDoc:
11837 * @sax: the SAX handler block
11838 * @cur: a pointer to an array of xmlChar
11839 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11840 * documents
11841 *
11842 * parse an XML in-memory document and build a tree.
11843 * It use the given SAX function block to handle the parsing callback.
11844 * If sax is NULL, fallback to the default DOM tree building routines.
11845 *
11846 * Returns the resulting document tree
11847 */
11848
11849xmlDocPtr
11850xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11851 xmlDocPtr ret;
11852 xmlParserCtxtPtr ctxt;
11853
11854 if (cur == NULL) return(NULL);
11855
11856
11857 ctxt = xmlCreateDocParserCtxt(cur);
11858 if (ctxt == NULL) return(NULL);
11859 if (sax != NULL) {
11860 ctxt->sax = sax;
11861 ctxt->userData = NULL;
11862 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011863 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011864
11865 xmlParseDocument(ctxt);
11866 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11867 else {
11868 ret = NULL;
11869 xmlFreeDoc(ctxt->myDoc);
11870 ctxt->myDoc = NULL;
11871 }
11872 if (sax != NULL)
11873 ctxt->sax = NULL;
11874 xmlFreeParserCtxt(ctxt);
11875
11876 return(ret);
11877}
11878
11879/**
11880 * xmlParseDoc:
11881 * @cur: a pointer to an array of xmlChar
11882 *
11883 * parse an XML in-memory document and build a tree.
11884 *
11885 * Returns the resulting document tree
11886 */
11887
11888xmlDocPtr
11889xmlParseDoc(xmlChar *cur) {
11890 return(xmlSAXParseDoc(NULL, cur, 0));
11891}
Daniel Veillard81273902003-09-30 00:43:48 +000011892#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011893
Daniel Veillard81273902003-09-30 00:43:48 +000011894#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011895/************************************************************************
11896 * *
11897 * Specific function to keep track of entities references *
11898 * and used by the XSLT debugger *
11899 * *
11900 ************************************************************************/
11901
11902static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11903
11904/**
11905 * xmlAddEntityReference:
11906 * @ent : A valid entity
11907 * @firstNode : A valid first node for children of entity
11908 * @lastNode : A valid last node of children entity
11909 *
11910 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11911 */
11912static void
11913xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11914 xmlNodePtr lastNode)
11915{
11916 if (xmlEntityRefFunc != NULL) {
11917 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11918 }
11919}
11920
11921
11922/**
11923 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011924 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011925 *
11926 * Set the function to call call back when a xml reference has been made
11927 */
11928void
11929xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11930{
11931 xmlEntityRefFunc = func;
11932}
Daniel Veillard81273902003-09-30 00:43:48 +000011933#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011934
11935/************************************************************************
11936 * *
11937 * Miscellaneous *
11938 * *
11939 ************************************************************************/
11940
11941#ifdef LIBXML_XPATH_ENABLED
11942#include <libxml/xpath.h>
11943#endif
11944
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011945extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011946static int xmlParserInitialized = 0;
11947
11948/**
11949 * xmlInitParser:
11950 *
11951 * Initialization function for the XML parser.
11952 * This is not reentrant. Call once before processing in case of
11953 * use in multithreaded programs.
11954 */
11955
11956void
11957xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011958 if (xmlParserInitialized != 0)
11959 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011960
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011961 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11962 (xmlGenericError == NULL))
11963 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011964 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011965 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011966 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011967 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011968 xmlDefaultSAXHandlerInit();
11969 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011970#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011971 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011972#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011973#ifdef LIBXML_HTML_ENABLED
11974 htmlInitAutoClose();
11975 htmlDefaultSAXHandlerInit();
11976#endif
11977#ifdef LIBXML_XPATH_ENABLED
11978 xmlXPathInit();
11979#endif
11980 xmlParserInitialized = 1;
11981}
11982
11983/**
11984 * xmlCleanupParser:
11985 *
11986 * Cleanup function for the XML parser. It tries to reclaim all
11987 * parsing related global memory allocated for the parser processing.
11988 * It doesn't deallocate any document related memory. Calling this
11989 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011990 * One should call xmlCleanupParser() only when the process has
11991 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011992 */
11993
11994void
11995xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011996 if (!xmlParserInitialized)
11997 return;
11998
Owen Taylor3473f882001-02-23 17:55:21 +000011999 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012000#ifdef LIBXML_CATALOG_ENABLED
12001 xmlCatalogCleanup();
12002#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012003 xmlCleanupInputCallbacks();
12004#ifdef LIBXML_OUTPUT_ENABLED
12005 xmlCleanupOutputCallbacks();
12006#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012007 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012008 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012009 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000012010 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012011}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012012
12013/************************************************************************
12014 * *
12015 * New set (2.6.0) of simpler and more flexible APIs *
12016 * *
12017 ************************************************************************/
12018
12019/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012020 * DICT_FREE:
12021 * @str: a string
12022 *
12023 * Free a string if it is not owned by the "dict" dictionnary in the
12024 * current scope
12025 */
12026#define DICT_FREE(str) \
12027 if ((str) && ((!dict) || \
12028 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12029 xmlFree((char *)(str));
12030
12031/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012032 * xmlCtxtReset:
12033 * @ctxt: an XML parser context
12034 *
12035 * Reset a parser context
12036 */
12037void
12038xmlCtxtReset(xmlParserCtxtPtr ctxt)
12039{
12040 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012041 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012042
12043 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12044 xmlFreeInputStream(input);
12045 }
12046 ctxt->inputNr = 0;
12047 ctxt->input = NULL;
12048
12049 ctxt->spaceNr = 0;
12050 ctxt->spaceTab[0] = -1;
12051 ctxt->space = &ctxt->spaceTab[0];
12052
12053
12054 ctxt->nodeNr = 0;
12055 ctxt->node = NULL;
12056
12057 ctxt->nameNr = 0;
12058 ctxt->name = NULL;
12059
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012060 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012061 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012062 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012063 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012064 DICT_FREE(ctxt->directory);
12065 ctxt->directory = NULL;
12066 DICT_FREE(ctxt->extSubURI);
12067 ctxt->extSubURI = NULL;
12068 DICT_FREE(ctxt->extSubSystem);
12069 ctxt->extSubSystem = NULL;
12070 if (ctxt->myDoc != NULL)
12071 xmlFreeDoc(ctxt->myDoc);
12072 ctxt->myDoc = NULL;
12073
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012074 ctxt->standalone = -1;
12075 ctxt->hasExternalSubset = 0;
12076 ctxt->hasPErefs = 0;
12077 ctxt->html = 0;
12078 ctxt->external = 0;
12079 ctxt->instate = XML_PARSER_START;
12080 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012081
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012082 ctxt->wellFormed = 1;
12083 ctxt->nsWellFormed = 1;
12084 ctxt->valid = 1;
12085 ctxt->vctxt.userData = ctxt;
12086 ctxt->vctxt.error = xmlParserValidityError;
12087 ctxt->vctxt.warning = xmlParserValidityWarning;
12088 ctxt->record_info = 0;
12089 ctxt->nbChars = 0;
12090 ctxt->checkIndex = 0;
12091 ctxt->inSubset = 0;
12092 ctxt->errNo = XML_ERR_OK;
12093 ctxt->depth = 0;
12094 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12095 ctxt->catalogs = NULL;
12096 xmlInitNodeInfoSeq(&ctxt->node_seq);
12097
12098 if (ctxt->attsDefault != NULL) {
12099 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12100 ctxt->attsDefault = NULL;
12101 }
12102 if (ctxt->attsSpecial != NULL) {
12103 xmlHashFree(ctxt->attsSpecial, NULL);
12104 ctxt->attsSpecial = NULL;
12105 }
12106
Daniel Veillard4432df22003-09-28 18:58:27 +000012107#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012108 if (ctxt->catalogs != NULL)
12109 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012110#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012111}
12112
12113/**
12114 * xmlCtxtUseOptions:
12115 * @ctxt: an XML parser context
12116 * @options: a combination of xmlParserOption(s)
12117 *
12118 * Applies the options to the parser context
12119 *
12120 * Returns 0 in case of success, the set of unknown or unimplemented options
12121 * in case of error.
12122 */
12123int
12124xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12125{
12126 if (options & XML_PARSE_RECOVER) {
12127 ctxt->recovery = 1;
12128 options -= XML_PARSE_RECOVER;
12129 } else
12130 ctxt->recovery = 0;
12131 if (options & XML_PARSE_DTDLOAD) {
12132 ctxt->loadsubset = XML_DETECT_IDS;
12133 options -= XML_PARSE_DTDLOAD;
12134 } else
12135 ctxt->loadsubset = 0;
12136 if (options & XML_PARSE_DTDATTR) {
12137 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12138 options -= XML_PARSE_DTDATTR;
12139 }
12140 if (options & XML_PARSE_NOENT) {
12141 ctxt->replaceEntities = 1;
12142 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12143 options -= XML_PARSE_NOENT;
12144 } else
12145 ctxt->replaceEntities = 0;
12146 if (options & XML_PARSE_NOWARNING) {
12147 ctxt->sax->warning = NULL;
12148 options -= XML_PARSE_NOWARNING;
12149 }
12150 if (options & XML_PARSE_NOERROR) {
12151 ctxt->sax->error = NULL;
12152 ctxt->sax->fatalError = NULL;
12153 options -= XML_PARSE_NOERROR;
12154 }
12155 if (options & XML_PARSE_PEDANTIC) {
12156 ctxt->pedantic = 1;
12157 options -= XML_PARSE_PEDANTIC;
12158 } else
12159 ctxt->pedantic = 0;
12160 if (options & XML_PARSE_NOBLANKS) {
12161 ctxt->keepBlanks = 0;
12162 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12163 options -= XML_PARSE_NOBLANKS;
12164 } else
12165 ctxt->keepBlanks = 1;
12166 if (options & XML_PARSE_DTDVALID) {
12167 ctxt->validate = 1;
12168 if (options & XML_PARSE_NOWARNING)
12169 ctxt->vctxt.warning = NULL;
12170 if (options & XML_PARSE_NOERROR)
12171 ctxt->vctxt.error = NULL;
12172 options -= XML_PARSE_DTDVALID;
12173 } else
12174 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012175#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012176 if (options & XML_PARSE_SAX1) {
12177 ctxt->sax->startElement = xmlSAX2StartElement;
12178 ctxt->sax->endElement = xmlSAX2EndElement;
12179 ctxt->sax->startElementNs = NULL;
12180 ctxt->sax->endElementNs = NULL;
12181 ctxt->sax->initialized = 1;
12182 options -= XML_PARSE_SAX1;
12183 }
Daniel Veillard81273902003-09-30 00:43:48 +000012184#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012185 if (options & XML_PARSE_NODICT) {
12186 ctxt->dictNames = 0;
12187 options -= XML_PARSE_NODICT;
12188 } else {
12189 ctxt->dictNames = 1;
12190 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012191 if (options & XML_PARSE_NOCDATA) {
12192 ctxt->sax->cdataBlock = NULL;
12193 options -= XML_PARSE_NOCDATA;
12194 }
12195 if (options & XML_PARSE_NSCLEAN) {
12196 ctxt->options |= XML_PARSE_NSCLEAN;
12197 options -= XML_PARSE_NSCLEAN;
12198 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012199 return (options);
12200}
12201
12202/**
12203 * xmlDoRead:
12204 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012205 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012206 * @encoding: the document encoding, or NULL
12207 * @options: a combination of xmlParserOption(s)
12208 * @reuse: keep the context for reuse
12209 *
12210 * Common front-end for the xmlRead functions
12211 *
12212 * Returns the resulting document tree or NULL
12213 */
12214static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012215xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12216 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012217{
12218 xmlDocPtr ret;
12219
12220 xmlCtxtUseOptions(ctxt, options);
12221 if (encoding != NULL) {
12222 xmlCharEncodingHandlerPtr hdlr;
12223
12224 hdlr = xmlFindCharEncodingHandler(encoding);
12225 if (hdlr != NULL)
12226 xmlSwitchToEncoding(ctxt, hdlr);
12227 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012228 if ((URL != NULL) && (ctxt->input != NULL) &&
12229 (ctxt->input->filename == NULL))
12230 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012231 xmlParseDocument(ctxt);
12232 if ((ctxt->wellFormed) || ctxt->recovery)
12233 ret = ctxt->myDoc;
12234 else {
12235 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012236 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012237 if ((ctxt->dictNames) &&
12238 (ctxt->myDoc->dict == ctxt->dict))
12239 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012240 xmlFreeDoc(ctxt->myDoc);
12241 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012242 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012243 ctxt->myDoc = NULL;
12244 if (!reuse) {
12245 if ((ctxt->dictNames) &&
12246 (ret != NULL) &&
12247 (ret->dict == ctxt->dict))
12248 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012249 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012250 } else {
12251 /* Must duplicate the reference to the dictionary */
12252 if ((ctxt->dictNames) &&
12253 (ret != NULL) &&
12254 (ret->dict == ctxt->dict))
12255 xmlDictReference(ctxt->dict);
12256 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012257
12258 return (ret);
12259}
12260
12261/**
12262 * xmlReadDoc:
12263 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012264 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012265 * @encoding: the document encoding, or NULL
12266 * @options: a combination of xmlParserOption(s)
12267 *
12268 * parse an XML in-memory document and build a tree.
12269 *
12270 * Returns the resulting document tree
12271 */
12272xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012273xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012274{
12275 xmlParserCtxtPtr ctxt;
12276
12277 if (cur == NULL)
12278 return (NULL);
12279
12280 ctxt = xmlCreateDocParserCtxt(cur);
12281 if (ctxt == NULL)
12282 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012283 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012284}
12285
12286/**
12287 * xmlReadFile:
12288 * @filename: a file or URL
12289 * @encoding: the document encoding, or NULL
12290 * @options: a combination of xmlParserOption(s)
12291 *
12292 * parse an XML file from the filesystem or the network.
12293 *
12294 * Returns the resulting document tree
12295 */
12296xmlDocPtr
12297xmlReadFile(const char *filename, const char *encoding, int options)
12298{
12299 xmlParserCtxtPtr ctxt;
12300
12301 ctxt = xmlCreateFileParserCtxt(filename);
12302 if (ctxt == NULL)
12303 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012304 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012305}
12306
12307/**
12308 * xmlReadMemory:
12309 * @buffer: a pointer to a char array
12310 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012311 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012312 * @encoding: the document encoding, or NULL
12313 * @options: a combination of xmlParserOption(s)
12314 *
12315 * parse an XML in-memory document and build a tree.
12316 *
12317 * Returns the resulting document tree
12318 */
12319xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012320xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012321{
12322 xmlParserCtxtPtr ctxt;
12323
12324 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12325 if (ctxt == NULL)
12326 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012327 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012328}
12329
12330/**
12331 * xmlReadFd:
12332 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012333 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012334 * @encoding: the document encoding, or NULL
12335 * @options: a combination of xmlParserOption(s)
12336 *
12337 * parse an XML from a file descriptor and build a tree.
12338 *
12339 * Returns the resulting document tree
12340 */
12341xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012342xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012343{
12344 xmlParserCtxtPtr ctxt;
12345 xmlParserInputBufferPtr input;
12346 xmlParserInputPtr stream;
12347
12348 if (fd < 0)
12349 return (NULL);
12350
12351 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12352 if (input == NULL)
12353 return (NULL);
12354 ctxt = xmlNewParserCtxt();
12355 if (ctxt == NULL) {
12356 xmlFreeParserInputBuffer(input);
12357 return (NULL);
12358 }
12359 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12360 if (stream == NULL) {
12361 xmlFreeParserInputBuffer(input);
12362 xmlFreeParserCtxt(ctxt);
12363 return (NULL);
12364 }
12365 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012366 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012367}
12368
12369/**
12370 * xmlReadIO:
12371 * @ioread: an I/O read function
12372 * @ioclose: an I/O close function
12373 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012374 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012375 * @encoding: the document encoding, or NULL
12376 * @options: a combination of xmlParserOption(s)
12377 *
12378 * parse an XML document from I/O functions and source and build a tree.
12379 *
12380 * Returns the resulting document tree
12381 */
12382xmlDocPtr
12383xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012384 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012385{
12386 xmlParserCtxtPtr ctxt;
12387 xmlParserInputBufferPtr input;
12388 xmlParserInputPtr stream;
12389
12390 if (ioread == NULL)
12391 return (NULL);
12392
12393 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12394 XML_CHAR_ENCODING_NONE);
12395 if (input == NULL)
12396 return (NULL);
12397 ctxt = xmlNewParserCtxt();
12398 if (ctxt == NULL) {
12399 xmlFreeParserInputBuffer(input);
12400 return (NULL);
12401 }
12402 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12403 if (stream == NULL) {
12404 xmlFreeParserInputBuffer(input);
12405 xmlFreeParserCtxt(ctxt);
12406 return (NULL);
12407 }
12408 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012409 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012410}
12411
12412/**
12413 * xmlCtxtReadDoc:
12414 * @ctxt: an XML parser context
12415 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012416 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012417 * @encoding: the document encoding, or NULL
12418 * @options: a combination of xmlParserOption(s)
12419 *
12420 * parse an XML in-memory document and build a tree.
12421 * This reuses the existing @ctxt parser context
12422 *
12423 * Returns the resulting document tree
12424 */
12425xmlDocPtr
12426xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012427 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012428{
12429 xmlParserInputPtr stream;
12430
12431 if (cur == NULL)
12432 return (NULL);
12433 if (ctxt == NULL)
12434 return (NULL);
12435
12436 xmlCtxtReset(ctxt);
12437
12438 stream = xmlNewStringInputStream(ctxt, cur);
12439 if (stream == NULL) {
12440 return (NULL);
12441 }
12442 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012443 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012444}
12445
12446/**
12447 * xmlCtxtReadFile:
12448 * @ctxt: an XML parser context
12449 * @filename: a file or URL
12450 * @encoding: the document encoding, or NULL
12451 * @options: a combination of xmlParserOption(s)
12452 *
12453 * parse an XML file from the filesystem or the network.
12454 * This reuses the existing @ctxt parser context
12455 *
12456 * Returns the resulting document tree
12457 */
12458xmlDocPtr
12459xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12460 const char *encoding, int options)
12461{
12462 xmlParserInputPtr stream;
12463
12464 if (filename == NULL)
12465 return (NULL);
12466 if (ctxt == NULL)
12467 return (NULL);
12468
12469 xmlCtxtReset(ctxt);
12470
12471 stream = xmlNewInputFromFile(ctxt, filename);
12472 if (stream == NULL) {
12473 return (NULL);
12474 }
12475 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012476 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012477}
12478
12479/**
12480 * xmlCtxtReadMemory:
12481 * @ctxt: an XML parser context
12482 * @buffer: a pointer to a char array
12483 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012484 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012485 * @encoding: the document encoding, or NULL
12486 * @options: a combination of xmlParserOption(s)
12487 *
12488 * parse an XML in-memory document and build a tree.
12489 * This reuses the existing @ctxt parser context
12490 *
12491 * Returns the resulting document tree
12492 */
12493xmlDocPtr
12494xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012495 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012496{
12497 xmlParserInputBufferPtr input;
12498 xmlParserInputPtr stream;
12499
12500 if (ctxt == NULL)
12501 return (NULL);
12502 if (buffer == NULL)
12503 return (NULL);
12504
12505 xmlCtxtReset(ctxt);
12506
12507 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12508 if (input == NULL) {
12509 return(NULL);
12510 }
12511
12512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12513 if (stream == NULL) {
12514 xmlFreeParserInputBuffer(input);
12515 return(NULL);
12516 }
12517
12518 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012519 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012520}
12521
12522/**
12523 * xmlCtxtReadFd:
12524 * @ctxt: an XML parser context
12525 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012526 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012527 * @encoding: the document encoding, or NULL
12528 * @options: a combination of xmlParserOption(s)
12529 *
12530 * parse an XML from a file descriptor and build a tree.
12531 * This reuses the existing @ctxt parser context
12532 *
12533 * Returns the resulting document tree
12534 */
12535xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012536xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12537 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012538{
12539 xmlParserInputBufferPtr input;
12540 xmlParserInputPtr stream;
12541
12542 if (fd < 0)
12543 return (NULL);
12544 if (ctxt == NULL)
12545 return (NULL);
12546
12547 xmlCtxtReset(ctxt);
12548
12549
12550 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12551 if (input == NULL)
12552 return (NULL);
12553 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12554 if (stream == NULL) {
12555 xmlFreeParserInputBuffer(input);
12556 return (NULL);
12557 }
12558 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012559 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012560}
12561
12562/**
12563 * xmlCtxtReadIO:
12564 * @ctxt: an XML parser context
12565 * @ioread: an I/O read function
12566 * @ioclose: an I/O close function
12567 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012568 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012569 * @encoding: the document encoding, or NULL
12570 * @options: a combination of xmlParserOption(s)
12571 *
12572 * parse an XML document from I/O functions and source and build a tree.
12573 * This reuses the existing @ctxt parser context
12574 *
12575 * Returns the resulting document tree
12576 */
12577xmlDocPtr
12578xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12579 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012580 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012581 const char *encoding, int options)
12582{
12583 xmlParserInputBufferPtr input;
12584 xmlParserInputPtr stream;
12585
12586 if (ioread == NULL)
12587 return (NULL);
12588 if (ctxt == NULL)
12589 return (NULL);
12590
12591 xmlCtxtReset(ctxt);
12592
12593 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12594 XML_CHAR_ENCODING_NONE);
12595 if (input == NULL)
12596 return (NULL);
12597 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12598 if (stream == NULL) {
12599 xmlFreeParserInputBuffer(input);
12600 return (NULL);
12601 }
12602 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012603 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012604}