blob: 17b8b1c8d4333cdae75fb8fccb8d040e77412412 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
105/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000106xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
107 const xmlChar **str);
108
Daniel Veillard7d515752003-09-26 19:12:37 +0000109static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
111 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000112 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000114
Daniel Veillard81273902003-09-30 00:43:48 +0000115#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000116static void
117xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
118 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000120
Daniel Veillard7d515752003-09-26 19:12:37 +0000121static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000122xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
123 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000124
125/************************************************************************
126 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000127 * Some factorized error routines *
128 * *
129 ************************************************************************/
130
131/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000132 * xmlErrAttributeDup:
133 * @ctxt: an XML parser context
134 * @prefix: the attribute prefix
135 * @localname: the attribute localname
136 *
137 * Handle a redefinition of attribute error
138 */
139static void
140xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
141 const xmlChar * localname)
142{
143 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000144 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000145 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000146 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
147 (const char *) localname, NULL, NULL, 0, 0,
148 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000149 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000150 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000151 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
152 (const char *) prefix, (const char *) localname,
153 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
154 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000155 ctxt->wellFormed = 0;
156 if (ctxt->recovery == 0)
157 ctxt->disableSAX = 1;
158}
159
160/**
161 * xmlFatalErr:
162 * @ctxt: an XML parser context
163 * @error: the error number
164 * @extra: extra information string
165 *
166 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
167 */
168static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000169xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000170{
171 const char *errmsg;
172
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000173 switch (error) {
174 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000175 errmsg = "CharRef: invalid hexadecimal value\n";
176 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000178 errmsg = "CharRef: invalid decimal value\n";
179 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000181 errmsg = "CharRef: invalid value\n";
182 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000184 errmsg = "internal error";
185 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000187 errmsg = "PEReference at end of document\n";
188 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000190 errmsg = "PEReference in prolog\n";
191 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000193 errmsg = "PEReference in epilog\n";
194 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000195 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000196 errmsg = "PEReference: no name\n";
197 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000198 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000199 errmsg = "PEReference: expecting ';'\n";
200 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000201 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000202 errmsg = "Detected an entity reference loop\n";
203 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000204 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000205 errmsg = "EntityValue: \" or ' expected\n";
206 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000207 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000208 errmsg = "PEReferences forbidden in internal subset\n";
209 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000210 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000211 errmsg = "EntityValue: \" or ' expected\n";
212 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000213 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000214 errmsg = "AttValue: \" or ' expected\n";
215 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000216 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000217 errmsg = "Unescaped '<' not allowed in attributes values\n";
218 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000219 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000220 errmsg = "SystemLiteral \" or ' expected\n";
221 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000222 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000223 errmsg = "Unfinished System or Public ID \" or ' expected\n";
224 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000225 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000226 errmsg = "Sequence ']]>' not allowed in content\n";
227 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000229 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
230 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000231 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000232 errmsg = "PUBLIC, the Public Identifier is missing\n";
233 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000234 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000235 errmsg = "Comment must not contain '--' (double-hyphen)\n";
236 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000238 errmsg = "xmlParsePI : no target name\n";
239 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000241 errmsg = "Invalid PI name\n";
242 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000243 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000244 errmsg = "NOTATION: Name expected here\n";
245 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000246 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000247 errmsg = "'>' required to close NOTATION declaration\n";
248 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000249 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000250 errmsg = "Entity value required\n";
251 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000253 errmsg = "Fragment not allowed";
254 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000255 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 errmsg = "'(' required to start ATTLIST enumeration\n";
257 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000258 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000259 errmsg = "NmToken expected in ATTLIST enumeration\n";
260 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000261 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 errmsg = "')' required to finish ATTLIST enumeration\n";
263 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000264 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
266 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000267 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000268 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
269 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000271 errmsg = "ContentDecl : Name or '(' expected\n";
272 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000273 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000274 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
275 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000276 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000277 errmsg =
278 "PEReference: forbidden within markup decl in internal subset\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "expected '>'\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "XML conditional section '[' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "Content error in the external subset\n";
288 break;
289 case XML_ERR_CONDSEC_INVALID_KEYWORD:
290 errmsg =
291 "conditional section INCLUDE or IGNORE keyword expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section not closed\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Text declaration '<?xml' required\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "parsing XML declaration: '?>' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "external parsed entities cannot be standalone\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "EntityRef: expecting ';'\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "DOCTYPE improperly terminated\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "EndTag: '</' not found\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "expected '='\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "String not closed expecting \" or '\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "String not started expecting ' or \"\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "Invalid XML encoding name\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "standalone accepts only 'yes' or 'no'\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "Document is empty\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Extra content at the end of the document\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "chunk is not well balanced\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "extra content at the end of well balanced chunk\n";
340 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000341 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Malformed declaration expecting version\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 case:
346 errmsg = "\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 default:
350 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 }
352 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000353 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
355 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 ctxt->wellFormed = 0;
357 if (ctxt->recovery == 0)
358 ctxt->disableSAX = 1;
359}
360
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361/**
362 * xmlFatalErrMsg:
363 * @ctxt: an XML parser context
364 * @error: the error number
365 * @msg: the error message
366 *
367 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
368 */
369static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
371 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000372{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000374 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379}
380
381/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000382 * xmlWarningMsg:
383 * @ctxt: an XML parser context
384 * @error: the error number
385 * @msg: the error message
386 * @str1: extra data
387 * @str2: extra data
388 *
389 * Handle a warning.
390 */
391static void
392xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg, const xmlChar *str1, const xmlChar *str2)
394{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000395 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000396
Daniel Veillard24eb9782003-10-04 21:08:09 +0000397 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000398 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000399 schannel = ctxt->sax->serror;
400 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000401 (ctxt->sax) ? ctxt->sax->warning : NULL,
402 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000403 ctxt, NULL, XML_FROM_PARSER, error,
404 XML_ERR_WARNING, NULL, 0,
405 (const char *) str1, (const char *) str2, NULL, 0, 0,
406 msg, (const char *) str1, (const char *) str2);
407}
408
409/**
410 * xmlValidityError:
411 * @ctxt: an XML parser context
412 * @error: the error number
413 * @msg: the error message
414 * @str1: extra data
415 *
416 * Handle a warning.
417 */
418static void
419xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
420 const char *msg, const xmlChar *str1)
421{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000422 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000423 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000424 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000425 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000426 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000427 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_DTD, error,
429 XML_ERR_ERROR, NULL, 0, (const char *) str1,
430 NULL, NULL, 0, 0,
431 msg, (const char *) str1);
432 ctxt->valid = 0;
433}
434
435/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000436 * xmlFatalErrMsgInt:
437 * @ctxt: an XML parser context
438 * @error: the error number
439 * @msg: the error message
440 * @val: an integer value
441 *
442 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
443 */
444static void
445xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000447{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000448 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000449 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
451 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000452 ctxt->wellFormed = 0;
453 if (ctxt->recovery == 0)
454 ctxt->disableSAX = 1;
455}
456
457/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000458 * xmlFatalErrMsgStrIntStr:
459 * @ctxt: an XML parser context
460 * @error: the error number
461 * @msg: the error message
462 * @str1: an string info
463 * @val: an integer value
464 * @str2: an string info
465 *
466 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
467 */
468static void
469xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
470 const char *msg, const xmlChar *str1, int val,
471 const xmlChar *str2)
472{
473 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000474 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000475 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
476 NULL, 0, (const char *) str1, (const char *) str2,
477 NULL, val, 0, msg, str1, val, str2);
478 ctxt->wellFormed = 0;
479 if (ctxt->recovery == 0)
480 ctxt->disableSAX = 1;
481}
482
483/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000484 * xmlFatalErrMsgStr:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 * @val: a string value
489 *
490 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
491 */
492static void
493xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000495{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
500 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000507 * xmlErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a non fatal parser error
514 */
515static void
516xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar * val)
518{
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 XML_FROM_PARSER, error, XML_ERR_ERROR,
522 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
523 val);
524}
525
526/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000527 * xmlNsErr:
528 * @ctxt: an XML parser context
529 * @error: the error number
530 * @msg: the message
531 * @info1: extra information string
532 * @info2: extra information string
533 *
534 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
535 */
536static void
537xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
538 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const xmlChar * info1, const xmlChar * info2,
540 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000541{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000542 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000544 XML_ERR_ERROR, NULL, 0, (const char *) info1,
545 (const char *) info2, (const char *) info3, 0, 0, msg,
546 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000547 ctxt->nsWellFormed = 0;
548}
549
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000550/************************************************************************
551 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000552 * SAX2 defaulted attributes handling *
553 * *
554 ************************************************************************/
555
556/**
557 * xmlDetectSAX2:
558 * @ctxt: an XML parser context
559 *
560 * Do the SAX2 detection and specific intialization
561 */
562static void
563xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
564 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000565#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000566 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
567 ((ctxt->sax->startElementNs != NULL) ||
568 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000569#else
570 ctxt->sax2 = 1;
571#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000572
573 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
574 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
575 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
576}
577
Daniel Veillarde57ec792003-09-10 10:50:59 +0000578typedef struct _xmlDefAttrs xmlDefAttrs;
579typedef xmlDefAttrs *xmlDefAttrsPtr;
580struct _xmlDefAttrs {
581 int nbAttrs; /* number of defaulted attributes on that element */
582 int maxAttrs; /* the size of the array */
583 const xmlChar *values[4]; /* array of localname/prefix/values */
584};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000585
586/**
587 * xmlAddDefAttrs:
588 * @ctxt: an XML parser context
589 * @fullname: the element fullname
590 * @fullattr: the attribute fullname
591 * @value: the attribute value
592 *
593 * Add a defaulted attribute for an element
594 */
595static void
596xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
597 const xmlChar *fullname,
598 const xmlChar *fullattr,
599 const xmlChar *value) {
600 xmlDefAttrsPtr defaults;
601 int len;
602 const xmlChar *name;
603 const xmlChar *prefix;
604
605 if (ctxt->attsDefault == NULL) {
606 ctxt->attsDefault = xmlHashCreate(10);
607 if (ctxt->attsDefault == NULL)
608 goto mem_error;
609 }
610
611 /*
612 * plit the element name into prefix:localname , the string found
613 * are within the DTD and hen not associated to namespace names.
614 */
615 name = xmlSplitQName3(fullname, &len);
616 if (name == NULL) {
617 name = xmlDictLookup(ctxt->dict, fullname, -1);
618 prefix = NULL;
619 } else {
620 name = xmlDictLookup(ctxt->dict, name, -1);
621 prefix = xmlDictLookup(ctxt->dict, fullname, len);
622 }
623
624 /*
625 * make sure there is some storage
626 */
627 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
628 if (defaults == NULL) {
629 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
630 12 * sizeof(const xmlChar *));
631 if (defaults == NULL)
632 goto mem_error;
633 defaults->maxAttrs = 4;
634 defaults->nbAttrs = 0;
635 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
636 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
637 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
638 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
639 if (defaults == NULL)
640 goto mem_error;
641 defaults->maxAttrs *= 2;
642 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
643 }
644
645 /*
646 * plit the element name into prefix:localname , the string found
647 * are within the DTD and hen not associated to namespace names.
648 */
649 name = xmlSplitQName3(fullattr, &len);
650 if (name == NULL) {
651 name = xmlDictLookup(ctxt->dict, fullattr, -1);
652 prefix = NULL;
653 } else {
654 name = xmlDictLookup(ctxt->dict, name, -1);
655 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
656 }
657
658 defaults->values[4 * defaults->nbAttrs] = name;
659 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
660 /* intern the string and precompute the end */
661 len = xmlStrlen(value);
662 value = xmlDictLookup(ctxt->dict, value, len);
663 defaults->values[4 * defaults->nbAttrs + 2] = value;
664 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
665 defaults->nbAttrs++;
666
667 return;
668
669mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000670 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 return;
672}
673
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000674/**
675 * xmlAddSpecialAttr:
676 * @ctxt: an XML parser context
677 * @fullname: the element fullname
678 * @fullattr: the attribute fullname
679 * @type: the attribute type
680 *
681 * Register that this attribute is not CDATA
682 */
683static void
684xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
685 const xmlChar *fullname,
686 const xmlChar *fullattr,
687 int type)
688{
689 if (ctxt->attsSpecial == NULL) {
690 ctxt->attsSpecial = xmlHashCreate(10);
691 if (ctxt->attsSpecial == NULL)
692 goto mem_error;
693 }
694
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000695 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
696 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000697 return;
698
699mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000700 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000701 return;
702}
703
Daniel Veillard4432df22003-09-28 18:58:27 +0000704/**
705 * xmlCheckLanguageID:
706 * @lang: pointer to the string value
707 *
708 * Checks that the value conforms to the LanguageID production:
709 *
710 * NOTE: this is somewhat deprecated, those productions were removed from
711 * the XML Second edition.
712 *
713 * [33] LanguageID ::= Langcode ('-' Subcode)*
714 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
715 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
716 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
717 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
718 * [38] Subcode ::= ([a-z] | [A-Z])+
719 *
720 * Returns 1 if correct 0 otherwise
721 **/
722int
723xmlCheckLanguageID(const xmlChar * lang)
724{
725 const xmlChar *cur = lang;
726
727 if (cur == NULL)
728 return (0);
729 if (((cur[0] == 'i') && (cur[1] == '-')) ||
730 ((cur[0] == 'I') && (cur[1] == '-'))) {
731 /*
732 * IANA code
733 */
734 cur += 2;
735 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
736 ((cur[0] >= 'a') && (cur[0] <= 'z')))
737 cur++;
738 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
739 ((cur[0] == 'X') && (cur[1] == '-'))) {
740 /*
741 * User code
742 */
743 cur += 2;
744 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
745 ((cur[0] >= 'a') && (cur[0] <= 'z')))
746 cur++;
747 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
748 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
749 /*
750 * ISO639
751 */
752 cur++;
753 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
754 ((cur[0] >= 'a') && (cur[0] <= 'z')))
755 cur++;
756 else
757 return (0);
758 } else
759 return (0);
760 while (cur[0] != 0) { /* non input consuming */
761 if (cur[0] != '-')
762 return (0);
763 cur++;
764 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
765 ((cur[0] >= 'a') && (cur[0] <= 'z')))
766 cur++;
767 else
768 return (0);
769 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
770 ((cur[0] >= 'a') && (cur[0] <= 'z')))
771 cur++;
772 }
773 return (1);
774}
775
Owen Taylor3473f882001-02-23 17:55:21 +0000776/************************************************************************
777 * *
778 * Parser stacks related functions and macros *
779 * *
780 ************************************************************************/
781
782xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
783 const xmlChar ** str);
784
Daniel Veillard0fb18932003-09-07 09:14:37 +0000785#ifdef SAX2
786/**
787 * nsPush:
788 * @ctxt: an XML parser context
789 * @prefix: the namespace prefix or NULL
790 * @URL: the namespace name
791 *
792 * Pushes a new parser namespace on top of the ns stack
793 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000794 * Returns -1 in case of error, -2 if the namespace should be discarded
795 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000796 */
797static int
798nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
799{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000800 if (ctxt->options & XML_PARSE_NSCLEAN) {
801 int i;
802 for (i = 0;i < ctxt->nsNr;i += 2) {
803 if (ctxt->nsTab[i] == prefix) {
804 /* in scope */
805 if (ctxt->nsTab[i + 1] == URL)
806 return(-2);
807 /* out of scope keep it */
808 break;
809 }
810 }
811 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000812 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
813 ctxt->nsMax = 10;
814 ctxt->nsNr = 0;
815 ctxt->nsTab = (const xmlChar **)
816 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
817 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000818 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000819 ctxt->nsMax = 0;
820 return (-1);
821 }
822 } else if (ctxt->nsNr >= ctxt->nsMax) {
823 ctxt->nsMax *= 2;
824 ctxt->nsTab = (const xmlChar **)
825 xmlRealloc(ctxt->nsTab,
826 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
827 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000828 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000829 ctxt->nsMax /= 2;
830 return (-1);
831 }
832 }
833 ctxt->nsTab[ctxt->nsNr++] = prefix;
834 ctxt->nsTab[ctxt->nsNr++] = URL;
835 return (ctxt->nsNr);
836}
837/**
838 * nsPop:
839 * @ctxt: an XML parser context
840 * @nr: the number to pop
841 *
842 * Pops the top @nr parser prefix/namespace from the ns stack
843 *
844 * Returns the number of namespaces removed
845 */
846static int
847nsPop(xmlParserCtxtPtr ctxt, int nr)
848{
849 int i;
850
851 if (ctxt->nsTab == NULL) return(0);
852 if (ctxt->nsNr < nr) {
853 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
854 nr = ctxt->nsNr;
855 }
856 if (ctxt->nsNr <= 0)
857 return (0);
858
859 for (i = 0;i < nr;i++) {
860 ctxt->nsNr--;
861 ctxt->nsTab[ctxt->nsNr] = NULL;
862 }
863 return(nr);
864}
865#endif
866
867static int
868xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
869 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000870 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 int maxatts;
872
873 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000874 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000875 atts = (const xmlChar **)
876 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000878 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
880 if (attallocs == NULL) goto mem_error;
881 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000882 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 } else if (nr + 5 > ctxt->maxatts) {
884 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000885 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
886 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000887 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000888 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000889 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
890 (maxatts / 5) * sizeof(int));
891 if (attallocs == NULL) goto mem_error;
892 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000893 ctxt->maxatts = maxatts;
894 }
895 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000896mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000897 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000898 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000899}
900
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000901/**
902 * inputPush:
903 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000904 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000905 *
906 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000907 *
908 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000909 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000910extern int
911inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
912{
913 if (ctxt->inputNr >= ctxt->inputMax) {
914 ctxt->inputMax *= 2;
915 ctxt->inputTab =
916 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
917 ctxt->inputMax *
918 sizeof(ctxt->inputTab[0]));
919 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000920 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000921 return (0);
922 }
923 }
924 ctxt->inputTab[ctxt->inputNr] = value;
925 ctxt->input = value;
926 return (ctxt->inputNr++);
927}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000928/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000929 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000930 * @ctxt: an XML parser context
931 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000932 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000934 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000935 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000936extern xmlParserInputPtr
937inputPop(xmlParserCtxtPtr ctxt)
938{
939 xmlParserInputPtr ret;
940
941 if (ctxt->inputNr <= 0)
942 return (0);
943 ctxt->inputNr--;
944 if (ctxt->inputNr > 0)
945 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
946 else
947 ctxt->input = NULL;
948 ret = ctxt->inputTab[ctxt->inputNr];
949 ctxt->inputTab[ctxt->inputNr] = 0;
950 return (ret);
951}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000952/**
953 * nodePush:
954 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000955 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000956 *
957 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000958 *
959 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961extern int
962nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
963{
964 if (ctxt->nodeNr >= ctxt->nodeMax) {
965 ctxt->nodeMax *= 2;
966 ctxt->nodeTab =
967 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
968 ctxt->nodeMax *
969 sizeof(ctxt->nodeTab[0]));
970 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000971 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 return (0);
973 }
974 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000975 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000976 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000977 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
978 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000979 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000980 return(0);
981 }
Daniel Veillard1c732d22002-11-30 11:22:59 +0000982 ctxt->nodeTab[ctxt->nodeNr] = value;
983 ctxt->node = value;
984 return (ctxt->nodeNr++);
985}
986/**
987 * nodePop:
988 * @ctxt: an XML parser context
989 *
990 * Pops the top element node from the node stack
991 *
992 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000993 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000994extern xmlNodePtr
995nodePop(xmlParserCtxtPtr ctxt)
996{
997 xmlNodePtr ret;
998
999 if (ctxt->nodeNr <= 0)
1000 return (0);
1001 ctxt->nodeNr--;
1002 if (ctxt->nodeNr > 0)
1003 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1004 else
1005 ctxt->node = NULL;
1006 ret = ctxt->nodeTab[ctxt->nodeNr];
1007 ctxt->nodeTab[ctxt->nodeNr] = 0;
1008 return (ret);
1009}
1010/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001011 * nameNsPush:
1012 * @ctxt: an XML parser context
1013 * @value: the element name
1014 * @prefix: the element prefix
1015 * @URI: the element namespace name
1016 *
1017 * Pushes a new element name/prefix/URL on top of the name stack
1018 *
1019 * Returns -1 in case of error, the index in the stack otherwise
1020 */
1021static int
1022nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1023 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1024{
1025 if (ctxt->nameNr >= ctxt->nameMax) {
1026 const xmlChar * *tmp;
1027 void **tmp2;
1028 ctxt->nameMax *= 2;
1029 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1030 ctxt->nameMax *
1031 sizeof(ctxt->nameTab[0]));
1032 if (tmp == NULL) {
1033 ctxt->nameMax /= 2;
1034 goto mem_error;
1035 }
1036 ctxt->nameTab = tmp;
1037 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1038 ctxt->nameMax * 3 *
1039 sizeof(ctxt->pushTab[0]));
1040 if (tmp2 == NULL) {
1041 ctxt->nameMax /= 2;
1042 goto mem_error;
1043 }
1044 ctxt->pushTab = tmp2;
1045 }
1046 ctxt->nameTab[ctxt->nameNr] = value;
1047 ctxt->name = value;
1048 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1049 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001050 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001051 return (ctxt->nameNr++);
1052mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001053 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001054 return (-1);
1055}
1056/**
1057 * nameNsPop:
1058 * @ctxt: an XML parser context
1059 *
1060 * Pops the top element/prefix/URI name from the name stack
1061 *
1062 * Returns the name just removed
1063 */
1064static const xmlChar *
1065nameNsPop(xmlParserCtxtPtr ctxt)
1066{
1067 const xmlChar *ret;
1068
1069 if (ctxt->nameNr <= 0)
1070 return (0);
1071 ctxt->nameNr--;
1072 if (ctxt->nameNr > 0)
1073 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1074 else
1075 ctxt->name = NULL;
1076 ret = ctxt->nameTab[ctxt->nameNr];
1077 ctxt->nameTab[ctxt->nameNr] = NULL;
1078 return (ret);
1079}
1080
1081/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001082 * namePush:
1083 * @ctxt: an XML parser context
1084 * @value: the element name
1085 *
1086 * Pushes a new element name on top of the name stack
1087 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001088 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001089 */
1090extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001091namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001092{
1093 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001094 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001095 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001097 ctxt->nameMax *
1098 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001099 if (tmp == NULL) {
1100 ctxt->nameMax /= 2;
1101 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001102 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001104 }
1105 ctxt->nameTab[ctxt->nameNr] = value;
1106 ctxt->name = value;
1107 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001108mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001109 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001110 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001111}
1112/**
1113 * namePop:
1114 * @ctxt: an XML parser context
1115 *
1116 * Pops the top element name from the name stack
1117 *
1118 * Returns the name just removed
1119 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001120extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121namePop(xmlParserCtxtPtr ctxt)
1122{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124
1125 if (ctxt->nameNr <= 0)
1126 return (0);
1127 ctxt->nameNr--;
1128 if (ctxt->nameNr > 0)
1129 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1130 else
1131 ctxt->name = NULL;
1132 ret = ctxt->nameTab[ctxt->nameNr];
1133 ctxt->nameTab[ctxt->nameNr] = 0;
1134 return (ret);
1135}
Owen Taylor3473f882001-02-23 17:55:21 +00001136
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001137static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001138 if (ctxt->spaceNr >= ctxt->spaceMax) {
1139 ctxt->spaceMax *= 2;
1140 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1141 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1142 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001143 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001144 return(0);
1145 }
1146 }
1147 ctxt->spaceTab[ctxt->spaceNr] = val;
1148 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1149 return(ctxt->spaceNr++);
1150}
1151
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001152static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001153 int ret;
1154 if (ctxt->spaceNr <= 0) return(0);
1155 ctxt->spaceNr--;
1156 if (ctxt->spaceNr > 0)
1157 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1158 else
1159 ctxt->space = NULL;
1160 ret = ctxt->spaceTab[ctxt->spaceNr];
1161 ctxt->spaceTab[ctxt->spaceNr] = -1;
1162 return(ret);
1163}
1164
1165/*
1166 * Macros for accessing the content. Those should be used only by the parser,
1167 * and not exported.
1168 *
1169 * Dirty macros, i.e. one often need to make assumption on the context to
1170 * use them
1171 *
1172 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1173 * To be used with extreme caution since operations consuming
1174 * characters may move the input buffer to a different location !
1175 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1176 * This should be used internally by the parser
1177 * only to compare to ASCII values otherwise it would break when
1178 * running with UTF-8 encoding.
1179 * RAW same as CUR but in the input buffer, bypass any token
1180 * extraction that may have been done
1181 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1182 * to compare on ASCII based substring.
1183 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001184 * strings without newlines within the parser.
1185 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1186 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001187 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1188 *
1189 * NEXT Skip to the next character, this does the proper decoding
1190 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001191 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001192 * CUR_CHAR(l) returns the current unicode character (int), set l
1193 * to the number of xmlChars used for the encoding [0-5].
1194 * CUR_SCHAR same but operate on a string instead of the context
1195 * COPY_BUF copy the current unicode char to the target buffer, increment
1196 * the index
1197 * GROW, SHRINK handling of input buffers
1198 */
1199
Daniel Veillardfdc91562002-07-01 21:52:03 +00001200#define RAW (*ctxt->input->cur)
1201#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001202#define NXT(val) ctxt->input->cur[(val)]
1203#define CUR_PTR ctxt->input->cur
1204
1205#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001206 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001207 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001208 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001209 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1210 xmlPopInput(ctxt); \
1211 } while (0)
1212
Daniel Veillarda880b122003-04-21 21:36:41 +00001213#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001214 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1215 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001216 xmlSHRINK (ctxt);
1217
1218static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1219 xmlParserInputShrink(ctxt->input);
1220 if ((*ctxt->input->cur == 0) &&
1221 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1222 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001223 }
Owen Taylor3473f882001-02-23 17:55:21 +00001224
Daniel Veillarda880b122003-04-21 21:36:41 +00001225#define GROW if ((ctxt->progressive == 0) && \
1226 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001227 xmlGROW (ctxt);
1228
1229static void xmlGROW (xmlParserCtxtPtr ctxt) {
1230 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1231 if ((*ctxt->input->cur == 0) &&
1232 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1233 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001234}
Owen Taylor3473f882001-02-23 17:55:21 +00001235
1236#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1237
1238#define NEXT xmlNextChar(ctxt)
1239
Daniel Veillard21a0f912001-02-25 19:54:14 +00001240#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001241 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001242 ctxt->input->cur++; \
1243 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001244 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001245 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1246 }
1247
Owen Taylor3473f882001-02-23 17:55:21 +00001248#define NEXTL(l) do { \
1249 if (*(ctxt->input->cur) == '\n') { \
1250 ctxt->input->line++; ctxt->input->col = 1; \
1251 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001252 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001253 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001254 } while (0)
1255
1256#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1257#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1258
1259#define COPY_BUF(l,b,i,v) \
1260 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001261 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001262
1263/**
1264 * xmlSkipBlankChars:
1265 * @ctxt: the XML parser context
1266 *
1267 * skip all blanks character found at that point in the input streams.
1268 * It pops up finished entities in the process if allowable at that point.
1269 *
1270 * Returns the number of space chars skipped
1271 */
1272
1273int
1274xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001275 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001276
1277 /*
1278 * It's Okay to use CUR/NEXT here since all the blanks are on
1279 * the ASCII range.
1280 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001281 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1282 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001283 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001284 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001285 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001286 cur = ctxt->input->cur;
1287 while (IS_BLANK(*cur)) {
1288 if (*cur == '\n') {
1289 ctxt->input->line++; ctxt->input->col = 1;
1290 }
1291 cur++;
1292 res++;
1293 if (*cur == 0) {
1294 ctxt->input->cur = cur;
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 cur = ctxt->input->cur;
1297 }
1298 }
1299 ctxt->input->cur = cur;
1300 } else {
1301 int cur;
1302 do {
1303 cur = CUR;
1304 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1305 NEXT;
1306 cur = CUR;
1307 res++;
1308 }
1309 while ((cur == 0) && (ctxt->inputNr > 1) &&
1310 (ctxt->instate != XML_PARSER_COMMENT)) {
1311 xmlPopInput(ctxt);
1312 cur = CUR;
1313 }
1314 /*
1315 * Need to handle support of entities branching here
1316 */
1317 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1318 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1319 }
Owen Taylor3473f882001-02-23 17:55:21 +00001320 return(res);
1321}
1322
1323/************************************************************************
1324 * *
1325 * Commodity functions to handle entities *
1326 * *
1327 ************************************************************************/
1328
1329/**
1330 * xmlPopInput:
1331 * @ctxt: an XML parser context
1332 *
1333 * xmlPopInput: the current input pointed by ctxt->input came to an end
1334 * pop it and return the next char.
1335 *
1336 * Returns the current xmlChar in the parser context
1337 */
1338xmlChar
1339xmlPopInput(xmlParserCtxtPtr ctxt) {
1340 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1341 if (xmlParserDebugEntities)
1342 xmlGenericError(xmlGenericErrorContext,
1343 "Popping input %d\n", ctxt->inputNr);
1344 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001345 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1347 return(xmlPopInput(ctxt));
1348 return(CUR);
1349}
1350
1351/**
1352 * xmlPushInput:
1353 * @ctxt: an XML parser context
1354 * @input: an XML parser input fragment (entity, XML fragment ...).
1355 *
1356 * xmlPushInput: switch to a new input stream which is stacked on top
1357 * of the previous one(s).
1358 */
1359void
1360xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1361 if (input == NULL) return;
1362
1363 if (xmlParserDebugEntities) {
1364 if ((ctxt->input != NULL) && (ctxt->input->filename))
1365 xmlGenericError(xmlGenericErrorContext,
1366 "%s(%d): ", ctxt->input->filename,
1367 ctxt->input->line);
1368 xmlGenericError(xmlGenericErrorContext,
1369 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1370 }
1371 inputPush(ctxt, input);
1372 GROW;
1373}
1374
1375/**
1376 * xmlParseCharRef:
1377 * @ctxt: an XML parser context
1378 *
1379 * parse Reference declarations
1380 *
1381 * [66] CharRef ::= '&#' [0-9]+ ';' |
1382 * '&#x' [0-9a-fA-F]+ ';'
1383 *
1384 * [ WFC: Legal Character ]
1385 * Characters referred to using character references must match the
1386 * production for Char.
1387 *
1388 * Returns the value parsed (as an int), 0 in case of error
1389 */
1390int
1391xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001392 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001393 int count = 0;
1394
Owen Taylor3473f882001-02-23 17:55:21 +00001395 /*
1396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1397 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001398 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001399 (NXT(2) == 'x')) {
1400 SKIP(3);
1401 GROW;
1402 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001403 if (count++ > 20) {
1404 count = 0;
1405 GROW;
1406 }
1407 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001408 val = val * 16 + (CUR - '0');
1409 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1410 val = val * 16 + (CUR - 'a') + 10;
1411 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1412 val = val * 16 + (CUR - 'A') + 10;
1413 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001415 val = 0;
1416 break;
1417 }
1418 NEXT;
1419 count++;
1420 }
1421 if (RAW == ';') {
1422 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001423 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001424 ctxt->nbChars ++;
1425 ctxt->input->cur++;
1426 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001427 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001428 SKIP(2);
1429 GROW;
1430 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001431 if (count++ > 20) {
1432 count = 0;
1433 GROW;
1434 }
1435 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001436 val = val * 10 + (CUR - '0');
1437 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001438 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001439 val = 0;
1440 break;
1441 }
1442 NEXT;
1443 count++;
1444 }
1445 if (RAW == ';') {
1446 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001447 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001448 ctxt->nbChars ++;
1449 ctxt->input->cur++;
1450 }
1451 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001452 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001453 }
1454
1455 /*
1456 * [ WFC: Legal Character ]
1457 * Characters referred to using character references must match the
1458 * production for Char.
1459 */
William M. Brack871611b2003-10-18 04:53:14 +00001460 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001461 return(val);
1462 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001463 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1464 "xmlParseCharRef: invalid xmlChar value %d\n",
1465 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001466 }
1467 return(0);
1468}
1469
1470/**
1471 * xmlParseStringCharRef:
1472 * @ctxt: an XML parser context
1473 * @str: a pointer to an index in the string
1474 *
1475 * parse Reference declarations, variant parsing from a string rather
1476 * than an an input flow.
1477 *
1478 * [66] CharRef ::= '&#' [0-9]+ ';' |
1479 * '&#x' [0-9a-fA-F]+ ';'
1480 *
1481 * [ WFC: Legal Character ]
1482 * Characters referred to using character references must match the
1483 * production for Char.
1484 *
1485 * Returns the value parsed (as an int), 0 in case of error, str will be
1486 * updated to the current value of the index
1487 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001488static int
Owen Taylor3473f882001-02-23 17:55:21 +00001489xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1490 const xmlChar *ptr;
1491 xmlChar cur;
1492 int val = 0;
1493
1494 if ((str == NULL) || (*str == NULL)) return(0);
1495 ptr = *str;
1496 cur = *ptr;
1497 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1498 ptr += 3;
1499 cur = *ptr;
1500 while (cur != ';') { /* Non input consuming loop */
1501 if ((cur >= '0') && (cur <= '9'))
1502 val = val * 16 + (cur - '0');
1503 else if ((cur >= 'a') && (cur <= 'f'))
1504 val = val * 16 + (cur - 'a') + 10;
1505 else if ((cur >= 'A') && (cur <= 'F'))
1506 val = val * 16 + (cur - 'A') + 10;
1507 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001508 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001509 val = 0;
1510 break;
1511 }
1512 ptr++;
1513 cur = *ptr;
1514 }
1515 if (cur == ';')
1516 ptr++;
1517 } else if ((cur == '&') && (ptr[1] == '#')){
1518 ptr += 2;
1519 cur = *ptr;
1520 while (cur != ';') { /* Non input consuming loops */
1521 if ((cur >= '0') && (cur <= '9'))
1522 val = val * 10 + (cur - '0');
1523 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001524 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 val = 0;
1526 break;
1527 }
1528 ptr++;
1529 cur = *ptr;
1530 }
1531 if (cur == ';')
1532 ptr++;
1533 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001534 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001535 return(0);
1536 }
1537 *str = ptr;
1538
1539 /*
1540 * [ WFC: Legal Character ]
1541 * Characters referred to using character references must match the
1542 * production for Char.
1543 */
William M. Brack871611b2003-10-18 04:53:14 +00001544 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001545 return(val);
1546 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001547 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1548 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1549 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001550 }
1551 return(0);
1552}
1553
1554/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001555 * xmlNewBlanksWrapperInputStream:
1556 * @ctxt: an XML parser context
1557 * @entity: an Entity pointer
1558 *
1559 * Create a new input stream for wrapping
1560 * blanks around a PEReference
1561 *
1562 * Returns the new input stream or NULL
1563 */
1564
1565static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1566
Daniel Veillardf4862f02002-09-10 11:13:43 +00001567static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001568xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1569 xmlParserInputPtr input;
1570 xmlChar *buffer;
1571 size_t length;
1572 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001573 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1574 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001575 return(NULL);
1576 }
1577 if (xmlParserDebugEntities)
1578 xmlGenericError(xmlGenericErrorContext,
1579 "new blanks wrapper for entity: %s\n", entity->name);
1580 input = xmlNewInputStream(ctxt);
1581 if (input == NULL) {
1582 return(NULL);
1583 }
1584 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001585 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001586 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001587 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001588 return(NULL);
1589 }
1590 buffer [0] = ' ';
1591 buffer [1] = '%';
1592 buffer [length-3] = ';';
1593 buffer [length-2] = ' ';
1594 buffer [length-1] = 0;
1595 memcpy(buffer + 2, entity->name, length - 5);
1596 input->free = deallocblankswrapper;
1597 input->base = buffer;
1598 input->cur = buffer;
1599 input->length = length;
1600 input->end = &buffer[length];
1601 return(input);
1602}
1603
1604/**
Owen Taylor3473f882001-02-23 17:55:21 +00001605 * xmlParserHandlePEReference:
1606 * @ctxt: the parser context
1607 *
1608 * [69] PEReference ::= '%' Name ';'
1609 *
1610 * [ WFC: No Recursion ]
1611 * A parsed entity must not contain a recursive
1612 * reference to itself, either directly or indirectly.
1613 *
1614 * [ WFC: Entity Declared ]
1615 * In a document without any DTD, a document with only an internal DTD
1616 * subset which contains no parameter entity references, or a document
1617 * with "standalone='yes'", ... ... The declaration of a parameter
1618 * entity must precede any reference to it...
1619 *
1620 * [ VC: Entity Declared ]
1621 * In a document with an external subset or external parameter entities
1622 * with "standalone='no'", ... ... The declaration of a parameter entity
1623 * must precede any reference to it...
1624 *
1625 * [ WFC: In DTD ]
1626 * Parameter-entity references may only appear in the DTD.
1627 * NOTE: misleading but this is handled.
1628 *
1629 * A PEReference may have been detected in the current input stream
1630 * the handling is done accordingly to
1631 * http://www.w3.org/TR/REC-xml#entproc
1632 * i.e.
1633 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001634 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001635 */
1636void
1637xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001638 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001639 xmlEntityPtr entity = NULL;
1640 xmlParserInputPtr input;
1641
Owen Taylor3473f882001-02-23 17:55:21 +00001642 if (RAW != '%') return;
1643 switch(ctxt->instate) {
1644 case XML_PARSER_CDATA_SECTION:
1645 return;
1646 case XML_PARSER_COMMENT:
1647 return;
1648 case XML_PARSER_START_TAG:
1649 return;
1650 case XML_PARSER_END_TAG:
1651 return;
1652 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001653 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001654 return;
1655 case XML_PARSER_PROLOG:
1656 case XML_PARSER_START:
1657 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001658 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001659 return;
1660 case XML_PARSER_ENTITY_DECL:
1661 case XML_PARSER_CONTENT:
1662 case XML_PARSER_ATTRIBUTE_VALUE:
1663 case XML_PARSER_PI:
1664 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001665 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001666 /* we just ignore it there */
1667 return;
1668 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001669 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001670 return;
1671 case XML_PARSER_ENTITY_VALUE:
1672 /*
1673 * NOTE: in the case of entity values, we don't do the
1674 * substitution here since we need the literal
1675 * entity value to be able to save the internal
1676 * subset of the document.
1677 * This will be handled by xmlStringDecodeEntities
1678 */
1679 return;
1680 case XML_PARSER_DTD:
1681 /*
1682 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1683 * In the internal DTD subset, parameter-entity references
1684 * can occur only where markup declarations can occur, not
1685 * within markup declarations.
1686 * In that case this is handled in xmlParseMarkupDecl
1687 */
1688 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1689 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001690 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1691 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001692 break;
1693 case XML_PARSER_IGNORE:
1694 return;
1695 }
1696
1697 NEXT;
1698 name = xmlParseName(ctxt);
1699 if (xmlParserDebugEntities)
1700 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001701 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001702 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 } else {
1705 if (RAW == ';') {
1706 NEXT;
1707 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1708 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1709 if (entity == NULL) {
1710
1711 /*
1712 * [ WFC: Entity Declared ]
1713 * In a document without any DTD, a document with only an
1714 * internal DTD subset which contains no parameter entity
1715 * references, or a document with "standalone='yes'", ...
1716 * ... The declaration of a parameter entity must precede
1717 * any reference to it...
1718 */
1719 if ((ctxt->standalone == 1) ||
1720 ((ctxt->hasExternalSubset == 0) &&
1721 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001722 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001723 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001724 } else {
1725 /*
1726 * [ VC: Entity Declared ]
1727 * In a document with an external subset or external
1728 * parameter entities with "standalone='no'", ...
1729 * ... The declaration of a parameter entity must precede
1730 * any reference to it...
1731 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001732 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1733 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1734 "PEReference: %%%s; not found\n",
1735 name);
1736 } else
1737 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1738 "PEReference: %%%s; not found\n",
1739 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001740 ctxt->valid = 0;
1741 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001742 } else if (ctxt->input->free != deallocblankswrapper) {
1743 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1744 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001745 } else {
1746 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1747 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001748 xmlChar start[4];
1749 xmlCharEncoding enc;
1750
Owen Taylor3473f882001-02-23 17:55:21 +00001751 /*
1752 * handle the extra spaces added before and after
1753 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001754 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001755 */
1756 input = xmlNewEntityInputStream(ctxt, entity);
1757 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001758
1759 /*
1760 * Get the 4 first bytes and decode the charset
1761 * if enc != XML_CHAR_ENCODING_NONE
1762 * plug some encoding conversion routines.
1763 */
1764 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001765 if (entity->length >= 4) {
1766 start[0] = RAW;
1767 start[1] = NXT(1);
1768 start[2] = NXT(2);
1769 start[3] = NXT(3);
1770 enc = xmlDetectCharEncoding(start, 4);
1771 if (enc != XML_CHAR_ENCODING_NONE) {
1772 xmlSwitchEncoding(ctxt, enc);
1773 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001774 }
1775
Owen Taylor3473f882001-02-23 17:55:21 +00001776 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00001777 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001778 xmlParseTextDecl(ctxt);
1779 }
Owen Taylor3473f882001-02-23 17:55:21 +00001780 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1782 "PEReference: %s is not a parameter entity\n",
1783 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001784 }
1785 }
1786 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001787 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001788 }
Owen Taylor3473f882001-02-23 17:55:21 +00001789 }
1790}
1791
1792/*
1793 * Macro used to grow the current buffer.
1794 */
1795#define growBuffer(buffer) { \
1796 buffer##_size *= 2; \
1797 buffer = (xmlChar *) \
1798 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001799 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001800}
1801
1802/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001803 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001804 * @ctxt: the parser context
1805 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001806 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001807 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1808 * @end: an end marker xmlChar, 0 if none
1809 * @end2: an end marker xmlChar, 0 if none
1810 * @end3: an end marker xmlChar, 0 if none
1811 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001812 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001813 *
1814 * [67] Reference ::= EntityRef | CharRef
1815 *
1816 * [69] PEReference ::= '%' Name ';'
1817 *
1818 * Returns A newly allocated string with the substitution done. The caller
1819 * must deallocate it !
1820 */
1821xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001822xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1823 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001824 xmlChar *buffer = NULL;
1825 int buffer_size = 0;
1826
1827 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001828 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlEntityPtr ent;
1830 int c,l;
1831 int nbchars = 0;
1832
Daniel Veillarde57ec792003-09-10 10:50:59 +00001833 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001834 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001836
1837 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 return(NULL);
1840 }
1841
1842 /*
1843 * allocate a translation buffer.
1844 */
1845 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001846 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001847 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001848
1849 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001850 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001851 * we are operating on already parsed values.
1852 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001853 if (str < last)
1854 c = CUR_SCHAR(str, l);
1855 else
1856 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001857 while ((c != 0) && (c != end) && /* non input consuming loop */
1858 (c != end2) && (c != end3)) {
1859
1860 if (c == 0) break;
1861 if ((c == '&') && (str[1] == '#')) {
1862 int val = xmlParseStringCharRef(ctxt, &str);
1863 if (val != 0) {
1864 COPY_BUF(0,buffer,nbchars,val);
1865 }
1866 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1867 if (xmlParserDebugEntities)
1868 xmlGenericError(xmlGenericErrorContext,
1869 "String decoding Entity Reference: %.30s\n",
1870 str);
1871 ent = xmlParseStringEntityRef(ctxt, &str);
1872 if ((ent != NULL) &&
1873 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1874 if (ent->content != NULL) {
1875 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1876 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001877 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1878 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001879 }
1880 } else if ((ent != NULL) && (ent->content != NULL)) {
1881 xmlChar *rep;
1882
1883 ctxt->depth++;
1884 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1885 0, 0, 0);
1886 ctxt->depth--;
1887 if (rep != NULL) {
1888 current = rep;
1889 while (*current != 0) { /* non input consuming loop */
1890 buffer[nbchars++] = *current++;
1891 if (nbchars >
1892 buffer_size - XML_PARSER_BUFFER_SIZE) {
1893 growBuffer(buffer);
1894 }
1895 }
1896 xmlFree(rep);
1897 }
1898 } else if (ent != NULL) {
1899 int i = xmlStrlen(ent->name);
1900 const xmlChar *cur = ent->name;
1901
1902 buffer[nbchars++] = '&';
1903 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1904 growBuffer(buffer);
1905 }
1906 for (;i > 0;i--)
1907 buffer[nbchars++] = *cur++;
1908 buffer[nbchars++] = ';';
1909 }
1910 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1911 if (xmlParserDebugEntities)
1912 xmlGenericError(xmlGenericErrorContext,
1913 "String decoding PE Reference: %.30s\n", str);
1914 ent = xmlParseStringPEReference(ctxt, &str);
1915 if (ent != NULL) {
1916 xmlChar *rep;
1917
1918 ctxt->depth++;
1919 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1920 0, 0, 0);
1921 ctxt->depth--;
1922 if (rep != NULL) {
1923 current = rep;
1924 while (*current != 0) { /* non input consuming loop */
1925 buffer[nbchars++] = *current++;
1926 if (nbchars >
1927 buffer_size - XML_PARSER_BUFFER_SIZE) {
1928 growBuffer(buffer);
1929 }
1930 }
1931 xmlFree(rep);
1932 }
1933 }
1934 } else {
1935 COPY_BUF(l,buffer,nbchars,c);
1936 str += l;
1937 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1938 growBuffer(buffer);
1939 }
1940 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001941 if (str < last)
1942 c = CUR_SCHAR(str, l);
1943 else
1944 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001945 }
1946 buffer[nbchars++] = 0;
1947 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001948
1949mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001950 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001951 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001952}
1953
Daniel Veillarde57ec792003-09-10 10:50:59 +00001954/**
1955 * xmlStringDecodeEntities:
1956 * @ctxt: the parser context
1957 * @str: the input string
1958 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1959 * @end: an end marker xmlChar, 0 if none
1960 * @end2: an end marker xmlChar, 0 if none
1961 * @end3: an end marker xmlChar, 0 if none
1962 *
1963 * Takes a entity string content and process to do the adequate substitutions.
1964 *
1965 * [67] Reference ::= EntityRef | CharRef
1966 *
1967 * [69] PEReference ::= '%' Name ';'
1968 *
1969 * Returns A newly allocated string with the substitution done. The caller
1970 * must deallocate it !
1971 */
1972xmlChar *
1973xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1974 xmlChar end, xmlChar end2, xmlChar end3) {
1975 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1976 end, end2, end3));
1977}
Owen Taylor3473f882001-02-23 17:55:21 +00001978
1979/************************************************************************
1980 * *
1981 * Commodity functions to handle xmlChars *
1982 * *
1983 ************************************************************************/
1984
1985/**
1986 * xmlStrndup:
1987 * @cur: the input xmlChar *
1988 * @len: the len of @cur
1989 *
1990 * a strndup for array of xmlChar's
1991 *
1992 * Returns a new xmlChar * or NULL
1993 */
1994xmlChar *
1995xmlStrndup(const xmlChar *cur, int len) {
1996 xmlChar *ret;
1997
1998 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001999 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002000 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002002 return(NULL);
2003 }
2004 memcpy(ret, cur, len * sizeof(xmlChar));
2005 ret[len] = 0;
2006 return(ret);
2007}
2008
2009/**
2010 * xmlStrdup:
2011 * @cur: the input xmlChar *
2012 *
2013 * a strdup for array of xmlChar's. Since they are supposed to be
2014 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2015 * a termination mark of '0'.
2016 *
2017 * Returns a new xmlChar * or NULL
2018 */
2019xmlChar *
2020xmlStrdup(const xmlChar *cur) {
2021 const xmlChar *p = cur;
2022
2023 if (cur == NULL) return(NULL);
2024 while (*p != 0) p++; /* non input consuming */
2025 return(xmlStrndup(cur, p - cur));
2026}
2027
2028/**
2029 * xmlCharStrndup:
2030 * @cur: the input char *
2031 * @len: the len of @cur
2032 *
2033 * a strndup for char's to xmlChar's
2034 *
2035 * Returns a new xmlChar * or NULL
2036 */
2037
2038xmlChar *
2039xmlCharStrndup(const char *cur, int len) {
2040 int i;
2041 xmlChar *ret;
2042
2043 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002044 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002045 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002046 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 return(NULL);
2048 }
2049 for (i = 0;i < len;i++)
2050 ret[i] = (xmlChar) cur[i];
2051 ret[len] = 0;
2052 return(ret);
2053}
2054
2055/**
2056 * xmlCharStrdup:
2057 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002058 *
2059 * a strdup for char's to xmlChar's
2060 *
2061 * Returns a new xmlChar * or NULL
2062 */
2063
2064xmlChar *
2065xmlCharStrdup(const char *cur) {
2066 const char *p = cur;
2067
2068 if (cur == NULL) return(NULL);
2069 while (*p != '\0') p++; /* non input consuming */
2070 return(xmlCharStrndup(cur, p - cur));
2071}
2072
2073/**
2074 * xmlStrcmp:
2075 * @str1: the first xmlChar *
2076 * @str2: the second xmlChar *
2077 *
2078 * a strcmp for xmlChar's
2079 *
2080 * Returns the integer result of the comparison
2081 */
2082
2083int
2084xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2085 register int tmp;
2086
2087 if (str1 == str2) return(0);
2088 if (str1 == NULL) return(-1);
2089 if (str2 == NULL) return(1);
2090 do {
2091 tmp = *str1++ - *str2;
2092 if (tmp != 0) return(tmp);
2093 } while (*str2++ != 0);
2094 return 0;
2095}
2096
2097/**
2098 * xmlStrEqual:
2099 * @str1: the first xmlChar *
2100 * @str2: the second xmlChar *
2101 *
2102 * Check if both string are equal of have same content
2103 * Should be a bit more readable and faster than xmlStrEqual()
2104 *
2105 * Returns 1 if they are equal, 0 if they are different
2106 */
2107
2108int
2109xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2110 if (str1 == str2) return(1);
2111 if (str1 == NULL) return(0);
2112 if (str2 == NULL) return(0);
2113 do {
2114 if (*str1++ != *str2) return(0);
2115 } while (*str2++);
2116 return(1);
2117}
2118
2119/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * xmlStrQEqual:
2121 * @pref: the prefix of the QName
2122 * @name: the localname of the QName
2123 * @str: the second xmlChar *
2124 *
2125 * Check if a QName is Equal to a given string
2126 *
2127 * Returns 1 if they are equal, 0 if they are different
2128 */
2129
2130int
2131xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2132 if (pref == NULL) return(xmlStrEqual(name, str));
2133 if (name == NULL) return(0);
2134 if (str == NULL) return(0);
2135
2136 do {
2137 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002138 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002139 if (*str++ != ':') return(0);
2140 do {
2141 if (*name++ != *str) return(0);
2142 } while (*str++);
2143 return(1);
2144}
2145
2146/**
Owen Taylor3473f882001-02-23 17:55:21 +00002147 * xmlStrncmp:
2148 * @str1: the first xmlChar *
2149 * @str2: the second xmlChar *
2150 * @len: the max comparison length
2151 *
2152 * a strncmp for xmlChar's
2153 *
2154 * Returns the integer result of the comparison
2155 */
2156
2157int
2158xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2159 register int tmp;
2160
2161 if (len <= 0) return(0);
2162 if (str1 == str2) return(0);
2163 if (str1 == NULL) return(-1);
2164 if (str2 == NULL) return(1);
2165 do {
2166 tmp = *str1++ - *str2;
2167 if (tmp != 0 || --len == 0) return(tmp);
2168 } while (*str2++ != 0);
2169 return 0;
2170}
2171
Daniel Veillardb44025c2001-10-11 22:55:55 +00002172static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002173 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2174 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2175 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2176 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2177 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2178 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2179 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2180 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2181 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2182 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2183 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2184 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2185 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2186 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2187 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2188 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2189 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2190 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2191 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2192 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2193 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2194 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2195 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2196 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2197 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2198 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2199 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2200 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2201 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2202 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2203 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2204 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2205};
2206
2207/**
2208 * xmlStrcasecmp:
2209 * @str1: the first xmlChar *
2210 * @str2: the second xmlChar *
2211 *
2212 * a strcasecmp for xmlChar's
2213 *
2214 * Returns the integer result of the comparison
2215 */
2216
2217int
2218xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2219 register int tmp;
2220
2221 if (str1 == str2) return(0);
2222 if (str1 == NULL) return(-1);
2223 if (str2 == NULL) return(1);
2224 do {
2225 tmp = casemap[*str1++] - casemap[*str2];
2226 if (tmp != 0) return(tmp);
2227 } while (*str2++ != 0);
2228 return 0;
2229}
2230
2231/**
2232 * xmlStrncasecmp:
2233 * @str1: the first xmlChar *
2234 * @str2: the second xmlChar *
2235 * @len: the max comparison length
2236 *
2237 * a strncasecmp for xmlChar's
2238 *
2239 * Returns the integer result of the comparison
2240 */
2241
2242int
2243xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2244 register int tmp;
2245
2246 if (len <= 0) return(0);
2247 if (str1 == str2) return(0);
2248 if (str1 == NULL) return(-1);
2249 if (str2 == NULL) return(1);
2250 do {
2251 tmp = casemap[*str1++] - casemap[*str2];
2252 if (tmp != 0 || --len == 0) return(tmp);
2253 } while (*str2++ != 0);
2254 return 0;
2255}
2256
2257/**
2258 * xmlStrchr:
2259 * @str: the xmlChar * array
2260 * @val: the xmlChar to search
2261 *
2262 * a strchr for xmlChar's
2263 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002264 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002265 */
2266
2267const xmlChar *
2268xmlStrchr(const xmlChar *str, xmlChar val) {
2269 if (str == NULL) return(NULL);
2270 while (*str != 0) { /* non input consuming */
2271 if (*str == val) return((xmlChar *) str);
2272 str++;
2273 }
2274 return(NULL);
2275}
2276
2277/**
2278 * xmlStrstr:
2279 * @str: the xmlChar * array (haystack)
2280 * @val: the xmlChar to search (needle)
2281 *
2282 * a strstr for xmlChar's
2283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002284 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002285 */
2286
2287const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002288xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002289 int n;
2290
2291 if (str == NULL) return(NULL);
2292 if (val == NULL) return(NULL);
2293 n = xmlStrlen(val);
2294
2295 if (n == 0) return(str);
2296 while (*str != 0) { /* non input consuming */
2297 if (*str == *val) {
2298 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2299 }
2300 str++;
2301 }
2302 return(NULL);
2303}
2304
2305/**
2306 * xmlStrcasestr:
2307 * @str: the xmlChar * array (haystack)
2308 * @val: the xmlChar to search (needle)
2309 *
2310 * a case-ignoring strstr for xmlChar's
2311 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002312 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002313 */
2314
2315const xmlChar *
2316xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2317 int n;
2318
2319 if (str == NULL) return(NULL);
2320 if (val == NULL) return(NULL);
2321 n = xmlStrlen(val);
2322
2323 if (n == 0) return(str);
2324 while (*str != 0) { /* non input consuming */
2325 if (casemap[*str] == casemap[*val])
2326 if (!xmlStrncasecmp(str, val, n)) return(str);
2327 str++;
2328 }
2329 return(NULL);
2330}
2331
2332/**
2333 * xmlStrsub:
2334 * @str: the xmlChar * array (haystack)
2335 * @start: the index of the first char (zero based)
2336 * @len: the length of the substring
2337 *
2338 * Extract a substring of a given string
2339 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002341 */
2342
2343xmlChar *
2344xmlStrsub(const xmlChar *str, int start, int len) {
2345 int i;
2346
2347 if (str == NULL) return(NULL);
2348 if (start < 0) return(NULL);
2349 if (len < 0) return(NULL);
2350
2351 for (i = 0;i < start;i++) {
2352 if (*str == 0) return(NULL);
2353 str++;
2354 }
2355 if (*str == 0) return(NULL);
2356 return(xmlStrndup(str, len));
2357}
2358
2359/**
2360 * xmlStrlen:
2361 * @str: the xmlChar * array
2362 *
2363 * length of a xmlChar's string
2364 *
2365 * Returns the number of xmlChar contained in the ARRAY.
2366 */
2367
2368int
2369xmlStrlen(const xmlChar *str) {
2370 int len = 0;
2371
2372 if (str == NULL) return(0);
2373 while (*str != 0) { /* non input consuming */
2374 str++;
2375 len++;
2376 }
2377 return(len);
2378}
2379
2380/**
2381 * xmlStrncat:
2382 * @cur: the original xmlChar * array
2383 * @add: the xmlChar * array added
2384 * @len: the length of @add
2385 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002386 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002387 * first bytes of @add.
2388 *
2389 * Returns a new xmlChar *, the original @cur is reallocated if needed
2390 * and should not be freed
2391 */
2392
2393xmlChar *
2394xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2395 int size;
2396 xmlChar *ret;
2397
2398 if ((add == NULL) || (len == 0))
2399 return(cur);
2400 if (cur == NULL)
2401 return(xmlStrndup(add, len));
2402
2403 size = xmlStrlen(cur);
2404 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2405 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002406 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002407 return(cur);
2408 }
2409 memcpy(&ret[size], add, len * sizeof(xmlChar));
2410 ret[size + len] = 0;
2411 return(ret);
2412}
2413
2414/**
2415 * xmlStrcat:
2416 * @cur: the original xmlChar * array
2417 * @add: the xmlChar * array added
2418 *
2419 * a strcat for array of xmlChar's. Since they are supposed to be
2420 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2421 * a termination mark of '0'.
2422 *
2423 * Returns a new xmlChar * containing the concatenated string.
2424 */
2425xmlChar *
2426xmlStrcat(xmlChar *cur, const xmlChar *add) {
2427 const xmlChar *p = add;
2428
2429 if (add == NULL) return(cur);
2430 if (cur == NULL)
2431 return(xmlStrdup(add));
2432
2433 while (*p != 0) p++; /* non input consuming */
2434 return(xmlStrncat(cur, add, p - add));
2435}
2436
Aleksey Sanine7acf432003-10-02 20:05:27 +00002437/**
2438 * xmlStrPrintf:
2439 * @buf: the result buffer.
2440 * @len: the result buffer length.
2441 * @msg: the message with printf formatting.
2442 * @...: extra parameters for the message.
2443 *
2444 * Formats @msg and places result into @buf.
2445 *
2446 * Returns the number of characters written to @buf or -1 if an error occurs.
2447 */
2448int
2449xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2450 va_list args;
2451 int ret;
2452
2453 if((buf == NULL) || (msg == NULL)) {
2454 return(-1);
2455 }
2456
2457 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002458 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002459 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002460 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002461
2462 return(ret);
2463}
2464
Owen Taylor3473f882001-02-23 17:55:21 +00002465/************************************************************************
2466 * *
2467 * Commodity functions, cleanup needed ? *
2468 * *
2469 ************************************************************************/
2470
2471/**
2472 * areBlanks:
2473 * @ctxt: an XML parser context
2474 * @str: a xmlChar *
2475 * @len: the size of @str
2476 *
2477 * Is this a sequence of blank chars that one can ignore ?
2478 *
2479 * Returns 1 if ignorable 0 otherwise.
2480 */
2481
2482static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2483 int i, ret;
2484 xmlNodePtr lastChild;
2485
Daniel Veillard05c13a22001-09-09 08:38:09 +00002486 /*
2487 * Don't spend time trying to differentiate them, the same callback is
2488 * used !
2489 */
2490 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002491 return(0);
2492
Owen Taylor3473f882001-02-23 17:55:21 +00002493 /*
2494 * Check for xml:space value.
2495 */
2496 if (*(ctxt->space) == 1)
2497 return(0);
2498
2499 /*
2500 * Check that the string is made of blanks
2501 */
2502 for (i = 0;i < len;i++)
2503 if (!(IS_BLANK(str[i]))) return(0);
2504
2505 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002506 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002507 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002508 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002509 if (ctxt->myDoc != NULL) {
2510 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2511 if (ret == 0) return(1);
2512 if (ret == 1) return(0);
2513 }
2514
2515 /*
2516 * Otherwise, heuristic :-\
2517 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002518 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 if ((ctxt->node->children == NULL) &&
2520 (RAW == '<') && (NXT(1) == '/')) return(0);
2521
2522 lastChild = xmlGetLastChild(ctxt->node);
2523 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002524 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2525 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002526 } else if (xmlNodeIsText(lastChild))
2527 return(0);
2528 else if ((ctxt->node->children != NULL) &&
2529 (xmlNodeIsText(ctxt->node->children)))
2530 return(0);
2531 return(1);
2532}
2533
Owen Taylor3473f882001-02-23 17:55:21 +00002534/************************************************************************
2535 * *
2536 * Extra stuff for namespace support *
2537 * Relates to http://www.w3.org/TR/WD-xml-names *
2538 * *
2539 ************************************************************************/
2540
2541/**
2542 * xmlSplitQName:
2543 * @ctxt: an XML parser context
2544 * @name: an XML parser context
2545 * @prefix: a xmlChar **
2546 *
2547 * parse an UTF8 encoded XML qualified name string
2548 *
2549 * [NS 5] QName ::= (Prefix ':')? LocalPart
2550 *
2551 * [NS 6] Prefix ::= NCName
2552 *
2553 * [NS 7] LocalPart ::= NCName
2554 *
2555 * Returns the local part, and prefix is updated
2556 * to get the Prefix if any.
2557 */
2558
2559xmlChar *
2560xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2561 xmlChar buf[XML_MAX_NAMELEN + 5];
2562 xmlChar *buffer = NULL;
2563 int len = 0;
2564 int max = XML_MAX_NAMELEN;
2565 xmlChar *ret = NULL;
2566 const xmlChar *cur = name;
2567 int c;
2568
2569 *prefix = NULL;
2570
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002571 if (cur == NULL) return(NULL);
2572
Owen Taylor3473f882001-02-23 17:55:21 +00002573#ifndef XML_XML_NAMESPACE
2574 /* xml: prefix is not really a namespace */
2575 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2576 (cur[2] == 'l') && (cur[3] == ':'))
2577 return(xmlStrdup(name));
2578#endif
2579
Daniel Veillard597bc482003-07-24 16:08:28 +00002580 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002581 if (cur[0] == ':')
2582 return(xmlStrdup(name));
2583
2584 c = *cur++;
2585 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2586 buf[len++] = c;
2587 c = *cur++;
2588 }
2589 if (len >= max) {
2590 /*
2591 * Okay someone managed to make a huge name, so he's ready to pay
2592 * for the processing speed.
2593 */
2594 max = len * 2;
2595
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002596 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002597 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002598 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 return(NULL);
2600 }
2601 memcpy(buffer, buf, len);
2602 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2603 if (len + 10 > max) {
2604 max *= 2;
2605 buffer = (xmlChar *) xmlRealloc(buffer,
2606 max * sizeof(xmlChar));
2607 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002608 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002609 return(NULL);
2610 }
2611 }
2612 buffer[len++] = c;
2613 c = *cur++;
2614 }
2615 buffer[len] = 0;
2616 }
2617
Daniel Veillard597bc482003-07-24 16:08:28 +00002618 /* nasty but well=formed
2619 if ((c == ':') && (*cur == 0)) {
2620 return(xmlStrdup(name));
2621 } */
2622
Owen Taylor3473f882001-02-23 17:55:21 +00002623 if (buffer == NULL)
2624 ret = xmlStrndup(buf, len);
2625 else {
2626 ret = buffer;
2627 buffer = NULL;
2628 max = XML_MAX_NAMELEN;
2629 }
2630
2631
2632 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002633 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002634 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002635 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002636 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002637 }
Owen Taylor3473f882001-02-23 17:55:21 +00002638 len = 0;
2639
Daniel Veillardbb284f42002-10-16 18:02:47 +00002640 /*
2641 * Check that the first character is proper to start
2642 * a new name
2643 */
2644 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2645 ((c >= 0x41) && (c <= 0x5A)) ||
2646 (c == '_') || (c == ':'))) {
2647 int l;
2648 int first = CUR_SCHAR(cur, l);
2649
2650 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002651 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002652 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002653 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002654 }
2655 }
2656 cur++;
2657
Owen Taylor3473f882001-02-23 17:55:21 +00002658 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2659 buf[len++] = c;
2660 c = *cur++;
2661 }
2662 if (len >= max) {
2663 /*
2664 * Okay someone managed to make a huge name, so he's ready to pay
2665 * for the processing speed.
2666 */
2667 max = len * 2;
2668
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002669 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002670 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002671 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002672 return(NULL);
2673 }
2674 memcpy(buffer, buf, len);
2675 while (c != 0) { /* tested bigname2.xml */
2676 if (len + 10 > max) {
2677 max *= 2;
2678 buffer = (xmlChar *) xmlRealloc(buffer,
2679 max * sizeof(xmlChar));
2680 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002682 return(NULL);
2683 }
2684 }
2685 buffer[len++] = c;
2686 c = *cur++;
2687 }
2688 buffer[len] = 0;
2689 }
2690
2691 if (buffer == NULL)
2692 ret = xmlStrndup(buf, len);
2693 else {
2694 ret = buffer;
2695 }
2696 }
2697
2698 return(ret);
2699}
2700
2701/************************************************************************
2702 * *
2703 * The parser itself *
2704 * Relates to http://www.w3.org/TR/REC-xml *
2705 * *
2706 ************************************************************************/
2707
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002708static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002709static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002710 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002711
Owen Taylor3473f882001-02-23 17:55:21 +00002712/**
2713 * xmlParseName:
2714 * @ctxt: an XML parser context
2715 *
2716 * parse an XML name.
2717 *
2718 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2719 * CombiningChar | Extender
2720 *
2721 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2722 *
2723 * [6] Names ::= Name (S Name)*
2724 *
2725 * Returns the Name parsed or NULL
2726 */
2727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002729xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002730 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002731 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002732 int count = 0;
2733
2734 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002735
2736 /*
2737 * Accelerator for simple ASCII names
2738 */
2739 in = ctxt->input->cur;
2740 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2741 ((*in >= 0x41) && (*in <= 0x5A)) ||
2742 (*in == '_') || (*in == ':')) {
2743 in++;
2744 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2745 ((*in >= 0x41) && (*in <= 0x5A)) ||
2746 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002747 (*in == '_') || (*in == '-') ||
2748 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002750 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002751 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002752 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002753 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002754 ctxt->nbChars += count;
2755 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002756 if (ret == NULL)
2757 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002758 return(ret);
2759 }
2760 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002761 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002762}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002763
Daniel Veillard46de64e2002-05-29 08:21:33 +00002764/**
2765 * xmlParseNameAndCompare:
2766 * @ctxt: an XML parser context
2767 *
2768 * parse an XML name and compares for match
2769 * (specialized for endtag parsing)
2770 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002771 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2772 * and the name for mismatch
2773 */
2774
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002775static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002776xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2777 const xmlChar *cmp = other;
2778 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002779 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002780
2781 GROW;
2782
2783 in = ctxt->input->cur;
2784 while (*in != 0 && *in == *cmp) {
2785 ++in;
2786 ++cmp;
2787 }
2788 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2789 /* success */
2790 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002791 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002792 }
2793 /* failure (or end of input buffer), check with full function */
2794 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002795 /* strings coming from the dictionnary direct compare possible */
2796 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002797 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002798 }
2799 return ret;
2800}
2801
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002802static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002803xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002804 int len = 0, l;
2805 int c;
2806 int count = 0;
2807
2808 /*
2809 * Handler for more complex cases
2810 */
2811 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002812 c = CUR_CHAR(l);
2813 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2814 (!IS_LETTER(c) && (c != '_') &&
2815 (c != ':'))) {
2816 return(NULL);
2817 }
2818
2819 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002820 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002829 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002830 NEXTL(l);
2831 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002832 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002833 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002834}
2835
2836/**
2837 * xmlParseStringName:
2838 * @ctxt: an XML parser context
2839 * @str: a pointer to the string pointer (IN/OUT)
2840 *
2841 * parse an XML name.
2842 *
2843 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2844 * CombiningChar | Extender
2845 *
2846 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2847 *
2848 * [6] Names ::= Name (S Name)*
2849 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002850 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002851 * is updated to the current location in the string.
2852 */
2853
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002854static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002855xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2856 xmlChar buf[XML_MAX_NAMELEN + 5];
2857 const xmlChar *cur = *str;
2858 int len = 0, l;
2859 int c;
2860
2861 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002862 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002863 (c != ':')) {
2864 return(NULL);
2865 }
2866
William M. Brack871611b2003-10-18 04:53:14 +00002867 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002868 (c == '.') || (c == '-') ||
2869 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002870 (IS_COMBINING(c)) ||
2871 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002872 COPY_BUF(l,buf,len,c);
2873 cur += l;
2874 c = CUR_SCHAR(cur, l);
2875 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2876 /*
2877 * Okay someone managed to make a huge name, so he's ready to pay
2878 * for the processing speed.
2879 */
2880 xmlChar *buffer;
2881 int max = len * 2;
2882
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002883 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002884 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002885 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 return(NULL);
2887 }
2888 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002889 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002890 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002891 (c == '.') || (c == '-') ||
2892 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002893 (IS_COMBINING(c)) ||
2894 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002895 if (len + 10 > max) {
2896 max *= 2;
2897 buffer = (xmlChar *) xmlRealloc(buffer,
2898 max * sizeof(xmlChar));
2899 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002900 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002901 return(NULL);
2902 }
2903 }
2904 COPY_BUF(l,buffer,len,c);
2905 cur += l;
2906 c = CUR_SCHAR(cur, l);
2907 }
2908 buffer[len] = 0;
2909 *str = cur;
2910 return(buffer);
2911 }
2912 }
2913 *str = cur;
2914 return(xmlStrndup(buf, len));
2915}
2916
2917/**
2918 * xmlParseNmtoken:
2919 * @ctxt: an XML parser context
2920 *
2921 * parse an XML Nmtoken.
2922 *
2923 * [7] Nmtoken ::= (NameChar)+
2924 *
2925 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2926 *
2927 * Returns the Nmtoken parsed or NULL
2928 */
2929
2930xmlChar *
2931xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2932 xmlChar buf[XML_MAX_NAMELEN + 5];
2933 int len = 0, l;
2934 int c;
2935 int count = 0;
2936
2937 GROW;
2938 c = CUR_CHAR(l);
2939
William M. Brack871611b2003-10-18 04:53:14 +00002940 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002941 (c == '.') || (c == '-') ||
2942 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002943 (IS_COMBINING(c)) ||
2944 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002945 if (count++ > 100) {
2946 count = 0;
2947 GROW;
2948 }
2949 COPY_BUF(l,buf,len,c);
2950 NEXTL(l);
2951 c = CUR_CHAR(l);
2952 if (len >= XML_MAX_NAMELEN) {
2953 /*
2954 * Okay someone managed to make a huge token, so he's ready to pay
2955 * for the processing speed.
2956 */
2957 xmlChar *buffer;
2958 int max = len * 2;
2959
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002960 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002961 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002962 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002963 return(NULL);
2964 }
2965 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002966 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002967 (c == '.') || (c == '-') ||
2968 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002969 (IS_COMBINING(c)) ||
2970 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002971 if (count++ > 100) {
2972 count = 0;
2973 GROW;
2974 }
2975 if (len + 10 > max) {
2976 max *= 2;
2977 buffer = (xmlChar *) xmlRealloc(buffer,
2978 max * sizeof(xmlChar));
2979 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002980 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002981 return(NULL);
2982 }
2983 }
2984 COPY_BUF(l,buffer,len,c);
2985 NEXTL(l);
2986 c = CUR_CHAR(l);
2987 }
2988 buffer[len] = 0;
2989 return(buffer);
2990 }
2991 }
2992 if (len == 0)
2993 return(NULL);
2994 return(xmlStrndup(buf, len));
2995}
2996
2997/**
2998 * xmlParseEntityValue:
2999 * @ctxt: an XML parser context
3000 * @orig: if non-NULL store a copy of the original entity value
3001 *
3002 * parse a value for ENTITY declarations
3003 *
3004 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3005 * "'" ([^%&'] | PEReference | Reference)* "'"
3006 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003007 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003008 */
3009
3010xmlChar *
3011xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3012 xmlChar *buf = NULL;
3013 int len = 0;
3014 int size = XML_PARSER_BUFFER_SIZE;
3015 int c, l;
3016 xmlChar stop;
3017 xmlChar *ret = NULL;
3018 const xmlChar *cur = NULL;
3019 xmlParserInputPtr input;
3020
3021 if (RAW == '"') stop = '"';
3022 else if (RAW == '\'') stop = '\'';
3023 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003024 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003025 return(NULL);
3026 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003027 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003028 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003030 return(NULL);
3031 }
3032
3033 /*
3034 * The content of the entity definition is copied in a buffer.
3035 */
3036
3037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3038 input = ctxt->input;
3039 GROW;
3040 NEXT;
3041 c = CUR_CHAR(l);
3042 /*
3043 * NOTE: 4.4.5 Included in Literal
3044 * When a parameter entity reference appears in a literal entity
3045 * value, ... a single or double quote character in the replacement
3046 * text is always treated as a normal data character and will not
3047 * terminate the literal.
3048 * In practice it means we stop the loop only when back at parsing
3049 * the initial entity and the quote is found
3050 */
William M. Brack871611b2003-10-18 04:53:14 +00003051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003052 (ctxt->input != input))) {
3053 if (len + 5 >= size) {
3054 size *= 2;
3055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3056 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003057 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003058 return(NULL);
3059 }
3060 }
3061 COPY_BUF(l,buf,len,c);
3062 NEXTL(l);
3063 /*
3064 * Pop-up of finished entities.
3065 */
3066 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3067 xmlPopInput(ctxt);
3068
3069 GROW;
3070 c = CUR_CHAR(l);
3071 if (c == 0) {
3072 GROW;
3073 c = CUR_CHAR(l);
3074 }
3075 }
3076 buf[len] = 0;
3077
3078 /*
3079 * Raise problem w.r.t. '&' and '%' being used in non-entities
3080 * reference constructs. Note Charref will be handled in
3081 * xmlStringDecodeEntities()
3082 */
3083 cur = buf;
3084 while (*cur != 0) { /* non input consuming */
3085 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3086 xmlChar *name;
3087 xmlChar tmp = *cur;
3088
3089 cur++;
3090 name = xmlParseStringName(ctxt, &cur);
3091 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003092 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003093 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003094 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003095 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003096 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3097 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003098 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003099 }
3100 if (name != NULL)
3101 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003102 if (*cur == 0)
3103 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003104 }
3105 cur++;
3106 }
3107
3108 /*
3109 * Then PEReference entities are substituted.
3110 */
3111 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003112 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003113 xmlFree(buf);
3114 } else {
3115 NEXT;
3116 /*
3117 * NOTE: 4.4.7 Bypassed
3118 * When a general entity reference appears in the EntityValue in
3119 * an entity declaration, it is bypassed and left as is.
3120 * so XML_SUBSTITUTE_REF is not set here.
3121 */
3122 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3123 0, 0, 0);
3124 if (orig != NULL)
3125 *orig = buf;
3126 else
3127 xmlFree(buf);
3128 }
3129
3130 return(ret);
3131}
3132
3133/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003134 * xmlParseAttValueComplex:
3135 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003136 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003137 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003138 *
3139 * parse a value for an attribute, this is the fallback function
3140 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003141 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003142 *
3143 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3144 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003145static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003146xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003147 xmlChar limit = 0;
3148 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003149 int len = 0;
3150 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003151 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003152 xmlChar *current = NULL;
3153 xmlEntityPtr ent;
3154
Owen Taylor3473f882001-02-23 17:55:21 +00003155 if (NXT(0) == '"') {
3156 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3157 limit = '"';
3158 NEXT;
3159 } else if (NXT(0) == '\'') {
3160 limit = '\'';
3161 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3162 NEXT;
3163 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003164 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(NULL);
3166 }
3167
3168 /*
3169 * allocate a translation buffer.
3170 */
3171 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003172 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003173 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003174
3175 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003176 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003177 */
3178 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003179 while ((NXT(0) != limit) && /* checked */
3180 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003181 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003182 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003183 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 if (NXT(1) == '#') {
3185 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003186
Owen Taylor3473f882001-02-23 17:55:21 +00003187 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003188 if (ctxt->replaceEntities) {
3189 if (len > buf_size - 10) {
3190 growBuffer(buf);
3191 }
3192 buf[len++] = '&';
3193 } else {
3194 /*
3195 * The reparsing will be done in xmlStringGetNodeList()
3196 * called by the attribute() function in SAX.c
3197 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003198 if (len > buf_size - 10) {
3199 growBuffer(buf);
3200 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003201 buf[len++] = '&';
3202 buf[len++] = '#';
3203 buf[len++] = '3';
3204 buf[len++] = '8';
3205 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003206 }
3207 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003208 if (len > buf_size - 10) {
3209 growBuffer(buf);
3210 }
Owen Taylor3473f882001-02-23 17:55:21 +00003211 len += xmlCopyChar(0, &buf[len], val);
3212 }
3213 } else {
3214 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215 if ((ent != NULL) &&
3216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3217 if (len > buf_size - 10) {
3218 growBuffer(buf);
3219 }
3220 if ((ctxt->replaceEntities == 0) &&
3221 (ent->content[0] == '&')) {
3222 buf[len++] = '&';
3223 buf[len++] = '#';
3224 buf[len++] = '3';
3225 buf[len++] = '8';
3226 buf[len++] = ';';
3227 } else {
3228 buf[len++] = ent->content[0];
3229 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003230 } else if ((ent != NULL) &&
3231 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003232 xmlChar *rep;
3233
3234 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3235 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003236 XML_SUBSTITUTE_REF,
3237 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003238 if (rep != NULL) {
3239 current = rep;
3240 while (*current != 0) { /* non input consuming */
3241 buf[len++] = *current++;
3242 if (len > buf_size - 10) {
3243 growBuffer(buf);
3244 }
3245 }
3246 xmlFree(rep);
3247 }
3248 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003249 if (len > buf_size - 10) {
3250 growBuffer(buf);
3251 }
Owen Taylor3473f882001-02-23 17:55:21 +00003252 if (ent->content != NULL)
3253 buf[len++] = ent->content[0];
3254 }
3255 } else if (ent != NULL) {
3256 int i = xmlStrlen(ent->name);
3257 const xmlChar *cur = ent->name;
3258
3259 /*
3260 * This may look absurd but is needed to detect
3261 * entities problems
3262 */
3263 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3264 (ent->content != NULL)) {
3265 xmlChar *rep;
3266 rep = xmlStringDecodeEntities(ctxt, ent->content,
3267 XML_SUBSTITUTE_REF, 0, 0, 0);
3268 if (rep != NULL)
3269 xmlFree(rep);
3270 }
3271
3272 /*
3273 * Just output the reference
3274 */
3275 buf[len++] = '&';
3276 if (len > buf_size - i - 10) {
3277 growBuffer(buf);
3278 }
3279 for (;i > 0;i--)
3280 buf[len++] = *cur++;
3281 buf[len++] = ';';
3282 }
3283 }
3284 } else {
3285 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003286 if ((len != 0) || (!normalize)) {
3287 if ((!normalize) || (!in_space)) {
3288 COPY_BUF(l,buf,len,0x20);
3289 if (len > buf_size - 10) {
3290 growBuffer(buf);
3291 }
3292 }
3293 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003294 }
3295 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003296 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003297 COPY_BUF(l,buf,len,c);
3298 if (len > buf_size - 10) {
3299 growBuffer(buf);
3300 }
3301 }
3302 NEXTL(l);
3303 }
3304 GROW;
3305 c = CUR_CHAR(l);
3306 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003307 if ((in_space) && (normalize)) {
3308 while (buf[len - 1] == 0x20) len--;
3309 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003310 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003311 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003312 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003313 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003314 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3315 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003316 } else
3317 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003318 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003319 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003320
3321mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003322 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003323 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003324}
3325
3326/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003327 * xmlParseAttValue:
3328 * @ctxt: an XML parser context
3329 *
3330 * parse a value for an attribute
3331 * Note: the parser won't do substitution of entities here, this
3332 * will be handled later in xmlStringGetNodeList
3333 *
3334 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3335 * "'" ([^<&'] | Reference)* "'"
3336 *
3337 * 3.3.3 Attribute-Value Normalization:
3338 * Before the value of an attribute is passed to the application or
3339 * checked for validity, the XML processor must normalize it as follows:
3340 * - a character reference is processed by appending the referenced
3341 * character to the attribute value
3342 * - an entity reference is processed by recursively processing the
3343 * replacement text of the entity
3344 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3345 * appending #x20 to the normalized value, except that only a single
3346 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3347 * parsed entity or the literal entity value of an internal parsed entity
3348 * - other characters are processed by appending them to the normalized value
3349 * If the declared value is not CDATA, then the XML processor must further
3350 * process the normalized attribute value by discarding any leading and
3351 * trailing space (#x20) characters, and by replacing sequences of space
3352 * (#x20) characters by a single space (#x20) character.
3353 * All attributes for which no declaration has been read should be treated
3354 * by a non-validating parser as if declared CDATA.
3355 *
3356 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3357 */
3358
3359
3360xmlChar *
3361xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003362 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003363}
3364
3365/**
Owen Taylor3473f882001-02-23 17:55:21 +00003366 * xmlParseSystemLiteral:
3367 * @ctxt: an XML parser context
3368 *
3369 * parse an XML Literal
3370 *
3371 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3372 *
3373 * Returns the SystemLiteral parsed or NULL
3374 */
3375
3376xmlChar *
3377xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3378 xmlChar *buf = NULL;
3379 int len = 0;
3380 int size = XML_PARSER_BUFFER_SIZE;
3381 int cur, l;
3382 xmlChar stop;
3383 int state = ctxt->instate;
3384 int count = 0;
3385
3386 SHRINK;
3387 if (RAW == '"') {
3388 NEXT;
3389 stop = '"';
3390 } else if (RAW == '\'') {
3391 NEXT;
3392 stop = '\'';
3393 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003394 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003395 return(NULL);
3396 }
3397
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003398 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003399 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003400 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003401 return(NULL);
3402 }
3403 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3404 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003405 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (len + 5 >= size) {
3407 size *= 2;
3408 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3409 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003410 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003411 ctxt->instate = (xmlParserInputState) state;
3412 return(NULL);
3413 }
3414 }
3415 count++;
3416 if (count > 50) {
3417 GROW;
3418 count = 0;
3419 }
3420 COPY_BUF(l,buf,len,cur);
3421 NEXTL(l);
3422 cur = CUR_CHAR(l);
3423 if (cur == 0) {
3424 GROW;
3425 SHRINK;
3426 cur = CUR_CHAR(l);
3427 }
3428 }
3429 buf[len] = 0;
3430 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003431 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 } else {
3434 NEXT;
3435 }
3436 return(buf);
3437}
3438
3439/**
3440 * xmlParsePubidLiteral:
3441 * @ctxt: an XML parser context
3442 *
3443 * parse an XML public literal
3444 *
3445 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3446 *
3447 * Returns the PubidLiteral parsed or NULL.
3448 */
3449
3450xmlChar *
3451xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3452 xmlChar *buf = NULL;
3453 int len = 0;
3454 int size = XML_PARSER_BUFFER_SIZE;
3455 xmlChar cur;
3456 xmlChar stop;
3457 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003458 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003459
3460 SHRINK;
3461 if (RAW == '"') {
3462 NEXT;
3463 stop = '"';
3464 } else if (RAW == '\'') {
3465 NEXT;
3466 stop = '\'';
3467 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 return(NULL);
3470 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 return(NULL);
3475 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003476 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003477 cur = CUR;
3478 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3479 if (len + 1 >= size) {
3480 size *= 2;
3481 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3482 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003483 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return(NULL);
3485 }
3486 }
3487 buf[len++] = cur;
3488 count++;
3489 if (count > 50) {
3490 GROW;
3491 count = 0;
3492 }
3493 NEXT;
3494 cur = CUR;
3495 if (cur == 0) {
3496 GROW;
3497 SHRINK;
3498 cur = CUR;
3499 }
3500 }
3501 buf[len] = 0;
3502 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 } else {
3505 NEXT;
3506 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003507 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 return(buf);
3509}
3510
Daniel Veillard48b2f892001-02-25 16:11:03 +00003511void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003512/**
3513 * xmlParseCharData:
3514 * @ctxt: an XML parser context
3515 * @cdata: int indicating whether we are within a CDATA section
3516 *
3517 * parse a CharData section.
3518 * if we are within a CDATA section ']]>' marks an end of section.
3519 *
3520 * The right angle bracket (>) may be represented using the string "&gt;",
3521 * and must, for compatibility, be escaped using "&gt;" or a character
3522 * reference when it appears in the string "]]>" in content, when that
3523 * string is not marking the end of a CDATA section.
3524 *
3525 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3526 */
3527
3528void
3529xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003530 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003531 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003532 int line = ctxt->input->line;
3533 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003534
3535 SHRINK;
3536 GROW;
3537 /*
3538 * Accelerated common case where input don't need to be
3539 * modified before passing it to the handler.
3540 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003541 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 in = ctxt->input->cur;
3543 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003544get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003545 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3546 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003547 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003549 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003550 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003551 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003552 ctxt->input->line++;
3553 in++;
3554 }
3555 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003556 }
3557 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003558 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003559 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003560 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003561 return;
3562 }
3563 in++;
3564 goto get_more;
3565 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003566 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003567 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003568 if ((ctxt->sax->ignorableWhitespace !=
3569 ctxt->sax->characters) &&
3570 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003571 const xmlChar *tmp = ctxt->input->cur;
3572 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003573
Daniel Veillarda7374592001-05-10 14:17:55 +00003574 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003575 ctxt->sax->ignorableWhitespace(ctxt->userData,
3576 tmp, nbchar);
3577 } else if (ctxt->sax->characters != NULL)
3578 ctxt->sax->characters(ctxt->userData,
3579 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003580 line = ctxt->input->line;
3581 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003582 } else {
3583 if (ctxt->sax->characters != NULL)
3584 ctxt->sax->characters(ctxt->userData,
3585 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003586 line = ctxt->input->line;
3587 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003588 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003589 }
3590 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 if (*in == 0xD) {
3592 in++;
3593 if (*in == 0xA) {
3594 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003595 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003596 ctxt->input->line++;
3597 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003599 in--;
3600 }
3601 if (*in == '<') {
3602 return;
3603 }
3604 if (*in == '&') {
3605 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003606 }
3607 SHRINK;
3608 GROW;
3609 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003610 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003611 nbchar = 0;
3612 }
Daniel Veillard50582112001-03-26 22:52:16 +00003613 ctxt->input->line = line;
3614 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003615 xmlParseCharDataComplex(ctxt, cdata);
3616}
3617
Daniel Veillard01c13b52002-12-10 15:19:08 +00003618/**
3619 * xmlParseCharDataComplex:
3620 * @ctxt: an XML parser context
3621 * @cdata: int indicating whether we are within a CDATA section
3622 *
3623 * parse a CharData section.this is the fallback function
3624 * of xmlParseCharData() when the parsing requires handling
3625 * of non-ASCII characters.
3626 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003627void
3628xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003629 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3630 int nbchar = 0;
3631 int cur, l;
3632 int count = 0;
3633
3634 SHRINK;
3635 GROW;
3636 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003637 while ((cur != '<') && /* checked */
3638 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003639 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003640 if ((cur == ']') && (NXT(1) == ']') &&
3641 (NXT(2) == '>')) {
3642 if (cdata) break;
3643 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003644 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003645 }
3646 }
3647 COPY_BUF(l,buf,nbchar,cur);
3648 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003649 buf[nbchar] = 0;
3650
Owen Taylor3473f882001-02-23 17:55:21 +00003651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003652 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003653 */
3654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3655 if (areBlanks(ctxt, buf, nbchar)) {
3656 if (ctxt->sax->ignorableWhitespace != NULL)
3657 ctxt->sax->ignorableWhitespace(ctxt->userData,
3658 buf, nbchar);
3659 } else {
3660 if (ctxt->sax->characters != NULL)
3661 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3662 }
3663 }
3664 nbchar = 0;
3665 }
3666 count++;
3667 if (count > 50) {
3668 GROW;
3669 count = 0;
3670 }
3671 NEXTL(l);
3672 cur = CUR_CHAR(l);
3673 }
3674 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003675 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003678 */
3679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3680 if (areBlanks(ctxt, buf, nbchar)) {
3681 if (ctxt->sax->ignorableWhitespace != NULL)
3682 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3683 } else {
3684 if (ctxt->sax->characters != NULL)
3685 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3686 }
3687 }
3688 }
3689}
3690
3691/**
3692 * xmlParseExternalID:
3693 * @ctxt: an XML parser context
3694 * @publicID: a xmlChar** receiving PubidLiteral
3695 * @strict: indicate whether we should restrict parsing to only
3696 * production [75], see NOTE below
3697 *
3698 * Parse an External ID or a Public ID
3699 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003700 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003701 * 'PUBLIC' S PubidLiteral S SystemLiteral
3702 *
3703 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3704 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3705 *
3706 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3707 *
3708 * Returns the function returns SystemLiteral and in the second
3709 * case publicID receives PubidLiteral, is strict is off
3710 * it is possible to return NULL and have publicID set.
3711 */
3712
3713xmlChar *
3714xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3715 xmlChar *URI = NULL;
3716
3717 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003718
3719 *publicID = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00003720 if (memcmp(CUR_PTR, "SYSTEM", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003721 SKIP(6);
3722 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3724 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 SKIP_BLANKS;
3727 URI = xmlParseSystemLiteral(ctxt);
3728 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00003731 } else if (memcmp(CUR_PTR, "PUBLIC", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003732 SKIP(6);
3733 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003735 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 SKIP_BLANKS;
3738 *publicID = xmlParsePubidLiteral(ctxt);
3739 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003740 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
3742 if (strict) {
3743 /*
3744 * We don't handle [83] so "S SystemLiteral" is required.
3745 */
3746 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003747 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003748 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003749 }
3750 } else {
3751 /*
3752 * We handle [83] so we return immediately, if
3753 * "S SystemLiteral" is not detected. From a purely parsing
3754 * point of view that's a nice mess.
3755 */
3756 const xmlChar *ptr;
3757 GROW;
3758
3759 ptr = CUR_PTR;
3760 if (!IS_BLANK(*ptr)) return(NULL);
3761
3762 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3763 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3764 }
3765 SKIP_BLANKS;
3766 URI = xmlParseSystemLiteral(ctxt);
3767 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003768 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 return(URI);
3772}
3773
3774/**
3775 * xmlParseComment:
3776 * @ctxt: an XML parser context
3777 *
3778 * Skip an XML (SGML) comment <!-- .... -->
3779 * The spec says that "For compatibility, the string "--" (double-hyphen)
3780 * must not occur within comments. "
3781 *
3782 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3783 */
3784void
3785xmlParseComment(xmlParserCtxtPtr ctxt) {
3786 xmlChar *buf = NULL;
3787 int len;
3788 int size = XML_PARSER_BUFFER_SIZE;
3789 int q, ql;
3790 int r, rl;
3791 int cur, l;
3792 xmlParserInputState state;
3793 xmlParserInputPtr input = ctxt->input;
3794 int count = 0;
3795
3796 /*
3797 * Check that there is a comment right here.
3798 */
3799 if ((RAW != '<') || (NXT(1) != '!') ||
3800 (NXT(2) != '-') || (NXT(3) != '-')) return;
3801
3802 state = ctxt->instate;
3803 ctxt->instate = XML_PARSER_COMMENT;
3804 SHRINK;
3805 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003806 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 ctxt->instate = state;
3810 return;
3811 }
3812 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003813 if (q == 0)
3814 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003815 NEXTL(ql);
3816 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003817 if (r == 0)
3818 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 NEXTL(rl);
3820 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003821 if (cur == 0)
3822 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003823 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003824 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003825 ((cur != '>') ||
3826 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003827 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003828 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003829 }
3830 if (len + 5 >= size) {
3831 size *= 2;
3832 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3833 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003834 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003835 ctxt->instate = state;
3836 return;
3837 }
3838 }
3839 COPY_BUF(ql,buf,len,q);
3840 q = r;
3841 ql = rl;
3842 r = cur;
3843 rl = l;
3844
3845 count++;
3846 if (count > 50) {
3847 GROW;
3848 count = 0;
3849 }
3850 NEXTL(l);
3851 cur = CUR_CHAR(l);
3852 if (cur == 0) {
3853 SHRINK;
3854 GROW;
3855 cur = CUR_CHAR(l);
3856 }
3857 }
3858 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003859 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003861 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003862 xmlFree(buf);
3863 } else {
3864 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003867 }
3868 NEXT;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870 (!ctxt->disableSAX))
3871 ctxt->sax->comment(ctxt->userData, buf);
3872 xmlFree(buf);
3873 }
3874 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003875 return;
3876not_terminated:
3877 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3878 "Comment not terminated\n", NULL);
3879 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003880}
3881
3882/**
3883 * xmlParsePITarget:
3884 * @ctxt: an XML parser context
3885 *
3886 * parse the name of a PI
3887 *
3888 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3889 *
3890 * Returns the PITarget name or NULL
3891 */
3892
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003893const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003894xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003895 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003896
3897 name = xmlParseName(ctxt);
3898 if ((name != NULL) &&
3899 ((name[0] == 'x') || (name[0] == 'X')) &&
3900 ((name[1] == 'm') || (name[1] == 'M')) &&
3901 ((name[2] == 'l') || (name[2] == 'L'))) {
3902 int i;
3903 if ((name[0] == 'x') && (name[1] == 'm') &&
3904 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003905 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003906 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003907 return(name);
3908 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003909 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003910 return(name);
3911 }
3912 for (i = 0;;i++) {
3913 if (xmlW3CPIs[i] == NULL) break;
3914 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3915 return(name);
3916 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003917 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3918 "xmlParsePITarget: invalid name prefix 'xml'\n",
3919 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003920 }
3921 return(name);
3922}
3923
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003924#ifdef LIBXML_CATALOG_ENABLED
3925/**
3926 * xmlParseCatalogPI:
3927 * @ctxt: an XML parser context
3928 * @catalog: the PI value string
3929 *
3930 * parse an XML Catalog Processing Instruction.
3931 *
3932 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3933 *
3934 * Occurs only if allowed by the user and if happening in the Misc
3935 * part of the document before any doctype informations
3936 * This will add the given catalog to the parsing context in order
3937 * to be used if there is a resolution need further down in the document
3938 */
3939
3940static void
3941xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3942 xmlChar *URL = NULL;
3943 const xmlChar *tmp, *base;
3944 xmlChar marker;
3945
3946 tmp = catalog;
3947 while (IS_BLANK(*tmp)) tmp++;
3948 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3949 goto error;
3950 tmp += 7;
3951 while (IS_BLANK(*tmp)) tmp++;
3952 if (*tmp != '=') {
3953 return;
3954 }
3955 tmp++;
3956 while (IS_BLANK(*tmp)) tmp++;
3957 marker = *tmp;
3958 if ((marker != '\'') && (marker != '"'))
3959 goto error;
3960 tmp++;
3961 base = tmp;
3962 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3963 if (*tmp == 0)
3964 goto error;
3965 URL = xmlStrndup(base, tmp - base);
3966 tmp++;
3967 while (IS_BLANK(*tmp)) tmp++;
3968 if (*tmp != 0)
3969 goto error;
3970
3971 if (URL != NULL) {
3972 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3973 xmlFree(URL);
3974 }
3975 return;
3976
3977error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003978 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3979 "Catalog PI syntax error: %s\n",
3980 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003981 if (URL != NULL)
3982 xmlFree(URL);
3983}
3984#endif
3985
Owen Taylor3473f882001-02-23 17:55:21 +00003986/**
3987 * xmlParsePI:
3988 * @ctxt: an XML parser context
3989 *
3990 * parse an XML Processing Instruction.
3991 *
3992 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3993 *
3994 * The processing is transfered to SAX once parsed.
3995 */
3996
3997void
3998xmlParsePI(xmlParserCtxtPtr ctxt) {
3999 xmlChar *buf = NULL;
4000 int len = 0;
4001 int size = XML_PARSER_BUFFER_SIZE;
4002 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004003 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004004 xmlParserInputState state;
4005 int count = 0;
4006
4007 if ((RAW == '<') && (NXT(1) == '?')) {
4008 xmlParserInputPtr input = ctxt->input;
4009 state = ctxt->instate;
4010 ctxt->instate = XML_PARSER_PI;
4011 /*
4012 * this is a Processing Instruction.
4013 */
4014 SKIP(2);
4015 SHRINK;
4016
4017 /*
4018 * Parse the target name and check for special support like
4019 * namespace.
4020 */
4021 target = xmlParsePITarget(ctxt);
4022 if (target != NULL) {
4023 if ((RAW == '?') && (NXT(1) == '>')) {
4024 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004025 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4026 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004027 }
4028 SKIP(2);
4029
4030 /*
4031 * SAX: PI detected.
4032 */
4033 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4034 (ctxt->sax->processingInstruction != NULL))
4035 ctxt->sax->processingInstruction(ctxt->userData,
4036 target, NULL);
4037 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004038 return;
4039 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004040 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 ctxt->instate = state;
4044 return;
4045 }
4046 cur = CUR;
4047 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004048 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4049 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004050 }
4051 SKIP_BLANKS;
4052 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004053 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004054 ((cur != '?') || (NXT(1) != '>'))) {
4055 if (len + 5 >= size) {
4056 size *= 2;
4057 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4058 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004059 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 ctxt->instate = state;
4061 return;
4062 }
4063 }
4064 count++;
4065 if (count > 50) {
4066 GROW;
4067 count = 0;
4068 }
4069 COPY_BUF(l,buf,len,cur);
4070 NEXTL(l);
4071 cur = CUR_CHAR(l);
4072 if (cur == 0) {
4073 SHRINK;
4074 GROW;
4075 cur = CUR_CHAR(l);
4076 }
4077 }
4078 buf[len] = 0;
4079 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004080 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4081 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004082 } else {
4083 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4085 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004086 }
4087 SKIP(2);
4088
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004089#ifdef LIBXML_CATALOG_ENABLED
4090 if (((state == XML_PARSER_MISC) ||
4091 (state == XML_PARSER_START)) &&
4092 (xmlStrEqual(target, XML_CATALOG_PI))) {
4093 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4094 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4095 (allow == XML_CATA_ALLOW_ALL))
4096 xmlParseCatalogPI(ctxt, buf);
4097 }
4098#endif
4099
4100
Owen Taylor3473f882001-02-23 17:55:21 +00004101 /*
4102 * SAX: PI detected.
4103 */
4104 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4105 (ctxt->sax->processingInstruction != NULL))
4106 ctxt->sax->processingInstruction(ctxt->userData,
4107 target, buf);
4108 }
4109 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004110 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004111 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004112 }
4113 ctxt->instate = state;
4114 }
4115}
4116
4117/**
4118 * xmlParseNotationDecl:
4119 * @ctxt: an XML parser context
4120 *
4121 * parse a notation declaration
4122 *
4123 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4124 *
4125 * Hence there is actually 3 choices:
4126 * 'PUBLIC' S PubidLiteral
4127 * 'PUBLIC' S PubidLiteral S SystemLiteral
4128 * and 'SYSTEM' S SystemLiteral
4129 *
4130 * See the NOTE on xmlParseExternalID().
4131 */
4132
4133void
4134xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004135 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004136 xmlChar *Pubid;
4137 xmlChar *Systemid;
4138
Daniel Veillard8f597c32003-10-06 08:19:27 +00004139 if (memcmp(CUR_PTR, "<!NOTATION", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004140 xmlParserInputPtr input = ctxt->input;
4141 SHRINK;
4142 SKIP(10);
4143 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004144 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4145 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004146 return;
4147 }
4148 SKIP_BLANKS;
4149
Daniel Veillard76d66f42001-05-16 21:05:17 +00004150 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004151 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004152 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 return;
4154 }
4155 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004157 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004158 return;
4159 }
4160 SKIP_BLANKS;
4161
4162 /*
4163 * Parse the IDs.
4164 */
4165 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4166 SKIP_BLANKS;
4167
4168 if (RAW == '>') {
4169 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004170 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4171 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004172 }
4173 NEXT;
4174 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4175 (ctxt->sax->notationDecl != NULL))
4176 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4177 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004178 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004179 }
Owen Taylor3473f882001-02-23 17:55:21 +00004180 if (Systemid != NULL) xmlFree(Systemid);
4181 if (Pubid != NULL) xmlFree(Pubid);
4182 }
4183}
4184
4185/**
4186 * xmlParseEntityDecl:
4187 * @ctxt: an XML parser context
4188 *
4189 * parse <!ENTITY declarations
4190 *
4191 * [70] EntityDecl ::= GEDecl | PEDecl
4192 *
4193 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4194 *
4195 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4196 *
4197 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4198 *
4199 * [74] PEDef ::= EntityValue | ExternalID
4200 *
4201 * [76] NDataDecl ::= S 'NDATA' S Name
4202 *
4203 * [ VC: Notation Declared ]
4204 * The Name must match the declared name of a notation.
4205 */
4206
4207void
4208xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004209 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004210 xmlChar *value = NULL;
4211 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004212 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004213 int isParameter = 0;
4214 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004215 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004216
4217 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004218 if (memcmp(CUR_PTR, "<!ENTITY", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004219 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 SHRINK;
4221 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004222 skipped = SKIP_BLANKS;
4223 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004224 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4225 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004226 }
Owen Taylor3473f882001-02-23 17:55:21 +00004227
4228 if (RAW == '%') {
4229 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004230 skipped = SKIP_BLANKS;
4231 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004232 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4233 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004234 }
Owen Taylor3473f882001-02-23 17:55:21 +00004235 isParameter = 1;
4236 }
4237
Daniel Veillard76d66f42001-05-16 21:05:17 +00004238 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004239 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004240 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4241 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return;
4243 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004244 skipped = SKIP_BLANKS;
4245 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004246 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4247 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
Owen Taylor3473f882001-02-23 17:55:21 +00004249
Daniel Veillardf5582f12002-06-11 10:08:16 +00004250 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004251 /*
4252 * handle the various case of definitions...
4253 */
4254 if (isParameter) {
4255 if ((RAW == '"') || (RAW == '\'')) {
4256 value = xmlParseEntityValue(ctxt, &orig);
4257 if (value) {
4258 if ((ctxt->sax != NULL) &&
4259 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4260 ctxt->sax->entityDecl(ctxt->userData, name,
4261 XML_INTERNAL_PARAMETER_ENTITY,
4262 NULL, NULL, value);
4263 }
4264 } else {
4265 URI = xmlParseExternalID(ctxt, &literal, 1);
4266 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004267 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 if (URI) {
4270 xmlURIPtr uri;
4271
4272 uri = xmlParseURI((const char *) URI);
4273 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004274 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4275 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004276 /*
4277 * This really ought to be a well formedness error
4278 * but the XML Core WG decided otherwise c.f. issue
4279 * E26 of the XML erratas.
4280 */
Owen Taylor3473f882001-02-23 17:55:21 +00004281 } else {
4282 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004283 /*
4284 * Okay this is foolish to block those but not
4285 * invalid URIs.
4286 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 } else {
4289 if ((ctxt->sax != NULL) &&
4290 (!ctxt->disableSAX) &&
4291 (ctxt->sax->entityDecl != NULL))
4292 ctxt->sax->entityDecl(ctxt->userData, name,
4293 XML_EXTERNAL_PARAMETER_ENTITY,
4294 literal, URI, NULL);
4295 }
4296 xmlFreeURI(uri);
4297 }
4298 }
4299 }
4300 } else {
4301 if ((RAW == '"') || (RAW == '\'')) {
4302 value = xmlParseEntityValue(ctxt, &orig);
4303 if ((ctxt->sax != NULL) &&
4304 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4305 ctxt->sax->entityDecl(ctxt->userData, name,
4306 XML_INTERNAL_GENERAL_ENTITY,
4307 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004308 /*
4309 * For expat compatibility in SAX mode.
4310 */
4311 if ((ctxt->myDoc == NULL) ||
4312 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4313 if (ctxt->myDoc == NULL) {
4314 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4315 }
4316 if (ctxt->myDoc->intSubset == NULL)
4317 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4318 BAD_CAST "fake", NULL, NULL);
4319
Daniel Veillard1af9a412003-08-20 22:54:39 +00004320 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4321 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004322 }
Owen Taylor3473f882001-02-23 17:55:21 +00004323 } else {
4324 URI = xmlParseExternalID(ctxt, &literal, 1);
4325 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004326 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 }
4328 if (URI) {
4329 xmlURIPtr uri;
4330
4331 uri = xmlParseURI((const char *)URI);
4332 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004333 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4334 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004335 /*
4336 * This really ought to be a well formedness error
4337 * but the XML Core WG decided otherwise c.f. issue
4338 * E26 of the XML erratas.
4339 */
Owen Taylor3473f882001-02-23 17:55:21 +00004340 } else {
4341 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004342 /*
4343 * Okay this is foolish to block those but not
4344 * invalid URIs.
4345 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004346 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 xmlFreeURI(uri);
4349 }
4350 }
4351 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004352 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4353 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004356 if (memcmp(CUR_PTR, "NDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004357 SKIP(5);
4358 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4360 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004361 }
4362 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004363 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4365 (ctxt->sax->unparsedEntityDecl != NULL))
4366 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4367 literal, URI, ndata);
4368 } else {
4369 if ((ctxt->sax != NULL) &&
4370 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4371 ctxt->sax->entityDecl(ctxt->userData, name,
4372 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4373 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004374 /*
4375 * For expat compatibility in SAX mode.
4376 * assuming the entity repalcement was asked for
4377 */
4378 if ((ctxt->replaceEntities != 0) &&
4379 ((ctxt->myDoc == NULL) ||
4380 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4381 if (ctxt->myDoc == NULL) {
4382 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4383 }
4384
4385 if (ctxt->myDoc->intSubset == NULL)
4386 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4387 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004388 xmlSAX2EntityDecl(ctxt, name,
4389 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4390 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004391 }
Owen Taylor3473f882001-02-23 17:55:21 +00004392 }
4393 }
4394 }
4395 SKIP_BLANKS;
4396 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004397 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004398 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 } else {
4400 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004401 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4402 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004403 }
4404 NEXT;
4405 }
4406 if (orig != NULL) {
4407 /*
4408 * Ugly mechanism to save the raw entity value.
4409 */
4410 xmlEntityPtr cur = NULL;
4411
4412 if (isParameter) {
4413 if ((ctxt->sax != NULL) &&
4414 (ctxt->sax->getParameterEntity != NULL))
4415 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4416 } else {
4417 if ((ctxt->sax != NULL) &&
4418 (ctxt->sax->getEntity != NULL))
4419 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004420 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004421 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004422 }
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 if (cur != NULL) {
4425 if (cur->orig != NULL)
4426 xmlFree(orig);
4427 else
4428 cur->orig = orig;
4429 } else
4430 xmlFree(orig);
4431 }
Owen Taylor3473f882001-02-23 17:55:21 +00004432 if (value != NULL) xmlFree(value);
4433 if (URI != NULL) xmlFree(URI);
4434 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004435 }
4436}
4437
4438/**
4439 * xmlParseDefaultDecl:
4440 * @ctxt: an XML parser context
4441 * @value: Receive a possible fixed default value for the attribute
4442 *
4443 * Parse an attribute default declaration
4444 *
4445 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4446 *
4447 * [ VC: Required Attribute ]
4448 * if the default declaration is the keyword #REQUIRED, then the
4449 * attribute must be specified for all elements of the type in the
4450 * attribute-list declaration.
4451 *
4452 * [ VC: Attribute Default Legal ]
4453 * The declared default value must meet the lexical constraints of
4454 * the declared attribute type c.f. xmlValidateAttributeDecl()
4455 *
4456 * [ VC: Fixed Attribute Default ]
4457 * if an attribute has a default value declared with the #FIXED
4458 * keyword, instances of that attribute must match the default value.
4459 *
4460 * [ WFC: No < in Attribute Values ]
4461 * handled in xmlParseAttValue()
4462 *
4463 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4464 * or XML_ATTRIBUTE_FIXED.
4465 */
4466
4467int
4468xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4469 int val;
4470 xmlChar *ret;
4471
4472 *value = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004473 if (memcmp(CUR_PTR, "#REQUIRED", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004474 SKIP(9);
4475 return(XML_ATTRIBUTE_REQUIRED);
4476 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00004477 if (memcmp(CUR_PTR, "#IMPLIED", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004478 SKIP(8);
4479 return(XML_ATTRIBUTE_IMPLIED);
4480 }
4481 val = XML_ATTRIBUTE_NONE;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004482 if (memcmp(CUR_PTR, "#FIXED", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004483 SKIP(6);
4484 val = XML_ATTRIBUTE_FIXED;
4485 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004486 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4487 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 SKIP_BLANKS;
4490 }
4491 ret = xmlParseAttValue(ctxt);
4492 ctxt->instate = XML_PARSER_DTD;
4493 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004494 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004495 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004496 } else
4497 *value = ret;
4498 return(val);
4499}
4500
4501/**
4502 * xmlParseNotationType:
4503 * @ctxt: an XML parser context
4504 *
4505 * parse an Notation attribute type.
4506 *
4507 * Note: the leading 'NOTATION' S part has already being parsed...
4508 *
4509 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4510 *
4511 * [ VC: Notation Attributes ]
4512 * Values of this type must match one of the notation names included
4513 * in the declaration; all notation names in the declaration must be declared.
4514 *
4515 * Returns: the notation attribute tree built while parsing
4516 */
4517
4518xmlEnumerationPtr
4519xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004520 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004521 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4522
4523 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004524 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004525 return(NULL);
4526 }
4527 SHRINK;
4528 do {
4529 NEXT;
4530 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004531 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004532 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004533 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4534 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004535 return(ret);
4536 }
4537 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004538 if (cur == NULL) return(ret);
4539 if (last == NULL) ret = last = cur;
4540 else {
4541 last->next = cur;
4542 last = cur;
4543 }
4544 SKIP_BLANKS;
4545 } while (RAW == '|');
4546 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004547 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004548 if ((last != NULL) && (last != ret))
4549 xmlFreeEnumeration(last);
4550 return(ret);
4551 }
4552 NEXT;
4553 return(ret);
4554}
4555
4556/**
4557 * xmlParseEnumerationType:
4558 * @ctxt: an XML parser context
4559 *
4560 * parse an Enumeration attribute type.
4561 *
4562 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4563 *
4564 * [ VC: Enumeration ]
4565 * Values of this type must match one of the Nmtoken tokens in
4566 * the declaration
4567 *
4568 * Returns: the enumeration attribute tree built while parsing
4569 */
4570
4571xmlEnumerationPtr
4572xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4573 xmlChar *name;
4574 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4575
4576 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004577 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004578 return(NULL);
4579 }
4580 SHRINK;
4581 do {
4582 NEXT;
4583 SKIP_BLANKS;
4584 name = xmlParseNmtoken(ctxt);
4585 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004586 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004587 return(ret);
4588 }
4589 cur = xmlCreateEnumeration(name);
4590 xmlFree(name);
4591 if (cur == NULL) return(ret);
4592 if (last == NULL) ret = last = cur;
4593 else {
4594 last->next = cur;
4595 last = cur;
4596 }
4597 SKIP_BLANKS;
4598 } while (RAW == '|');
4599 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004600 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 return(ret);
4602 }
4603 NEXT;
4604 return(ret);
4605}
4606
4607/**
4608 * xmlParseEnumeratedType:
4609 * @ctxt: an XML parser context
4610 * @tree: the enumeration tree built while parsing
4611 *
4612 * parse an Enumerated attribute type.
4613 *
4614 * [57] EnumeratedType ::= NotationType | Enumeration
4615 *
4616 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4617 *
4618 *
4619 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4620 */
4621
4622int
4623xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00004624 if (memcmp(CUR_PTR, "NOTATION", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 SKIP(8);
4626 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4628 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004629 return(0);
4630 }
4631 SKIP_BLANKS;
4632 *tree = xmlParseNotationType(ctxt);
4633 if (*tree == NULL) return(0);
4634 return(XML_ATTRIBUTE_NOTATION);
4635 }
4636 *tree = xmlParseEnumerationType(ctxt);
4637 if (*tree == NULL) return(0);
4638 return(XML_ATTRIBUTE_ENUMERATION);
4639}
4640
4641/**
4642 * xmlParseAttributeType:
4643 * @ctxt: an XML parser context
4644 * @tree: the enumeration tree built while parsing
4645 *
4646 * parse the Attribute list def for an element
4647 *
4648 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4649 *
4650 * [55] StringType ::= 'CDATA'
4651 *
4652 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4653 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4654 *
4655 * Validity constraints for attribute values syntax are checked in
4656 * xmlValidateAttributeValue()
4657 *
4658 * [ VC: ID ]
4659 * Values of type ID must match the Name production. A name must not
4660 * appear more than once in an XML document as a value of this type;
4661 * i.e., ID values must uniquely identify the elements which bear them.
4662 *
4663 * [ VC: One ID per Element Type ]
4664 * No element type may have more than one ID attribute specified.
4665 *
4666 * [ VC: ID Attribute Default ]
4667 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4668 *
4669 * [ VC: IDREF ]
4670 * Values of type IDREF must match the Name production, and values
4671 * of type IDREFS must match Names; each IDREF Name must match the value
4672 * of an ID attribute on some element in the XML document; i.e. IDREF
4673 * values must match the value of some ID attribute.
4674 *
4675 * [ VC: Entity Name ]
4676 * Values of type ENTITY must match the Name production, values
4677 * of type ENTITIES must match Names; each Entity Name must match the
4678 * name of an unparsed entity declared in the DTD.
4679 *
4680 * [ VC: Name Token ]
4681 * Values of type NMTOKEN must match the Nmtoken production; values
4682 * of type NMTOKENS must match Nmtokens.
4683 *
4684 * Returns the attribute type
4685 */
4686int
4687xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4688 SHRINK;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004689 if (memcmp(CUR_PTR, "CDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004690 SKIP(5);
4691 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004692 } else if (memcmp(CUR_PTR, "IDREFS", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004693 SKIP(6);
4694 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004695 } else if (memcmp(CUR_PTR, "IDREF", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004696 SKIP(5);
4697 return(XML_ATTRIBUTE_IDREF);
4698 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4699 SKIP(2);
4700 return(XML_ATTRIBUTE_ID);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004701 } else if (memcmp(CUR_PTR, "ENTITY", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004702 SKIP(6);
4703 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004704 } else if (memcmp(CUR_PTR, "ENTITIES", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004705 SKIP(8);
4706 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004707 } else if (memcmp(CUR_PTR, "NMTOKENS", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004708 SKIP(8);
4709 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004710 } else if (memcmp(CUR_PTR, "NMTOKEN", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004711 SKIP(7);
4712 return(XML_ATTRIBUTE_NMTOKEN);
4713 }
4714 return(xmlParseEnumeratedType(ctxt, tree));
4715}
4716
4717/**
4718 * xmlParseAttributeListDecl:
4719 * @ctxt: an XML parser context
4720 *
4721 * : parse the Attribute list def for an element
4722 *
4723 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4724 *
4725 * [53] AttDef ::= S Name S AttType S DefaultDecl
4726 *
4727 */
4728void
4729xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004730 const xmlChar *elemName;
4731 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004732 xmlEnumerationPtr tree;
4733
Daniel Veillard8f597c32003-10-06 08:19:27 +00004734 if (memcmp(CUR_PTR, "<!ATTLIST", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004735 xmlParserInputPtr input = ctxt->input;
4736
4737 SKIP(9);
4738 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004739 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004740 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004741 }
4742 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004743 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4746 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004747 return;
4748 }
4749 SKIP_BLANKS;
4750 GROW;
4751 while (RAW != '>') {
4752 const xmlChar *check = CUR_PTR;
4753 int type;
4754 int def;
4755 xmlChar *defaultValue = NULL;
4756
4757 GROW;
4758 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004759 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004761 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4762 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004763 break;
4764 }
4765 GROW;
4766 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004768 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004769 if (defaultValue != NULL)
4770 xmlFree(defaultValue);
4771 break;
4772 }
4773 SKIP_BLANKS;
4774
4775 type = xmlParseAttributeType(ctxt, &tree);
4776 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004777 if (defaultValue != NULL)
4778 xmlFree(defaultValue);
4779 break;
4780 }
4781
4782 GROW;
4783 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (defaultValue != NULL)
4787 xmlFree(defaultValue);
4788 if (tree != NULL)
4789 xmlFreeEnumeration(tree);
4790 break;
4791 }
4792 SKIP_BLANKS;
4793
4794 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4795 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004796 if (defaultValue != NULL)
4797 xmlFree(defaultValue);
4798 if (tree != NULL)
4799 xmlFreeEnumeration(tree);
4800 break;
4801 }
4802
4803 GROW;
4804 if (RAW != '>') {
4805 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004806 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004807 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (defaultValue != NULL)
4809 xmlFree(defaultValue);
4810 if (tree != NULL)
4811 xmlFreeEnumeration(tree);
4812 break;
4813 }
4814 SKIP_BLANKS;
4815 }
4816 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004817 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4818 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004819 if (defaultValue != NULL)
4820 xmlFree(defaultValue);
4821 if (tree != NULL)
4822 xmlFreeEnumeration(tree);
4823 break;
4824 }
4825 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4826 (ctxt->sax->attributeDecl != NULL))
4827 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4828 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004829 else if (tree != NULL)
4830 xmlFreeEnumeration(tree);
4831
4832 if ((ctxt->sax2) && (defaultValue != NULL) &&
4833 (def != XML_ATTRIBUTE_IMPLIED) &&
4834 (def != XML_ATTRIBUTE_REQUIRED)) {
4835 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4836 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004837 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4838 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4839 }
Owen Taylor3473f882001-02-23 17:55:21 +00004840 if (defaultValue != NULL)
4841 xmlFree(defaultValue);
4842 GROW;
4843 }
4844 if (RAW == '>') {
4845 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004846 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4847 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004848 }
4849 NEXT;
4850 }
Owen Taylor3473f882001-02-23 17:55:21 +00004851 }
4852}
4853
4854/**
4855 * xmlParseElementMixedContentDecl:
4856 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004857 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004858 *
4859 * parse the declaration for a Mixed Element content
4860 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4861 *
4862 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4863 * '(' S? '#PCDATA' S? ')'
4864 *
4865 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4866 *
4867 * [ VC: No Duplicate Types ]
4868 * The same name must not appear more than once in a single
4869 * mixed-content declaration.
4870 *
4871 * returns: the list of the xmlElementContentPtr describing the element choices
4872 */
4873xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004874xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004875 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004876 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004877
4878 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004879 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004880 SKIP(7);
4881 SKIP_BLANKS;
4882 SHRINK;
4883 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004884 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004885 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4886"Element content declaration doesn't start and stop in the same entity\n",
4887 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004888 }
Owen Taylor3473f882001-02-23 17:55:21 +00004889 NEXT;
4890 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4891 if (RAW == '*') {
4892 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4893 NEXT;
4894 }
4895 return(ret);
4896 }
4897 if ((RAW == '(') || (RAW == '|')) {
4898 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4899 if (ret == NULL) return(NULL);
4900 }
4901 while (RAW == '|') {
4902 NEXT;
4903 if (elem == NULL) {
4904 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4905 if (ret == NULL) return(NULL);
4906 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004907 if (cur != NULL)
4908 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004909 cur = ret;
4910 } else {
4911 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4912 if (n == NULL) return(NULL);
4913 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004914 if (n->c1 != NULL)
4915 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004916 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004917 if (n != NULL)
4918 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004919 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004920 }
4921 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004922 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004923 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004925 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004926 xmlFreeElementContent(cur);
4927 return(NULL);
4928 }
4929 SKIP_BLANKS;
4930 GROW;
4931 }
4932 if ((RAW == ')') && (NXT(1) == '*')) {
4933 if (elem != NULL) {
4934 cur->c2 = xmlNewElementContent(elem,
4935 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004936 if (cur->c2 != NULL)
4937 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
4939 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004941 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4942"Element content declaration doesn't start and stop in the same entity\n",
4943 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004944 }
Owen Taylor3473f882001-02-23 17:55:21 +00004945 SKIP(2);
4946 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004947 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004948 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004949 return(NULL);
4950 }
4951
4952 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004953 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004954 }
4955 return(ret);
4956}
4957
4958/**
4959 * xmlParseElementChildrenContentDecl:
4960 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004961 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004962 *
4963 * parse the declaration for a Mixed Element content
4964 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4965 *
4966 *
4967 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4968 *
4969 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4970 *
4971 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4972 *
4973 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4974 *
4975 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4976 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004977 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004978 * opening or closing parentheses in a choice, seq, or Mixed
4979 * construct is contained in the replacement text for a parameter
4980 * entity, both must be contained in the same replacement text. For
4981 * interoperability, if a parameter-entity reference appears in a
4982 * choice, seq, or Mixed construct, its replacement text should not
4983 * be empty, and neither the first nor last non-blank character of
4984 * the replacement text should be a connector (| or ,).
4985 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004986 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004987 * hierarchy.
4988 */
4989xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004990xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004991 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004992 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004993 xmlChar type = 0;
4994
4995 SKIP_BLANKS;
4996 GROW;
4997 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004998 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004999
Owen Taylor3473f882001-02-23 17:55:21 +00005000 /* Recurse on first child */
5001 NEXT;
5002 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005003 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005004 SKIP_BLANKS;
5005 GROW;
5006 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005007 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005008 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005009 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005010 return(NULL);
5011 }
5012 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005013 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005014 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005015 return(NULL);
5016 }
Owen Taylor3473f882001-02-23 17:55:21 +00005017 GROW;
5018 if (RAW == '?') {
5019 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5020 NEXT;
5021 } else if (RAW == '*') {
5022 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5023 NEXT;
5024 } else if (RAW == '+') {
5025 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5026 NEXT;
5027 } else {
5028 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030 GROW;
5031 }
5032 SKIP_BLANKS;
5033 SHRINK;
5034 while (RAW != ')') {
5035 /*
5036 * Each loop we parse one separator and one element.
5037 */
5038 if (RAW == ',') {
5039 if (type == 0) type = CUR;
5040
5041 /*
5042 * Detect "Name | Name , Name" error
5043 */
5044 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005045 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005046 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005047 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005048 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005049 xmlFreeElementContent(last);
5050 if (ret != NULL)
5051 xmlFreeElementContent(ret);
5052 return(NULL);
5053 }
5054 NEXT;
5055
5056 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5057 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005058 if ((last != NULL) && (last != ret))
5059 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005060 xmlFreeElementContent(ret);
5061 return(NULL);
5062 }
5063 if (last == NULL) {
5064 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005065 if (ret != NULL)
5066 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 ret = cur = op;
5068 } else {
5069 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005070 if (op != NULL)
5071 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005073 if (last != NULL)
5074 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005075 cur =op;
5076 last = NULL;
5077 }
5078 } else if (RAW == '|') {
5079 if (type == 0) type = CUR;
5080
5081 /*
5082 * Detect "Name , Name | Name" error
5083 */
5084 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005085 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005086 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005088 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005089 xmlFreeElementContent(last);
5090 if (ret != NULL)
5091 xmlFreeElementContent(ret);
5092 return(NULL);
5093 }
5094 NEXT;
5095
5096 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5097 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005098 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005099 xmlFreeElementContent(last);
5100 if (ret != NULL)
5101 xmlFreeElementContent(ret);
5102 return(NULL);
5103 }
5104 if (last == NULL) {
5105 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005106 if (ret != NULL)
5107 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 ret = cur = op;
5109 } else {
5110 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005111 if (op != NULL)
5112 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005113 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005114 if (last != NULL)
5115 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 cur =op;
5117 last = NULL;
5118 }
5119 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005120 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005121 if (ret != NULL)
5122 xmlFreeElementContent(ret);
5123 return(NULL);
5124 }
5125 GROW;
5126 SKIP_BLANKS;
5127 GROW;
5128 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005129 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005130 /* Recurse on second child */
5131 NEXT;
5132 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005133 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005134 SKIP_BLANKS;
5135 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 if (ret != NULL)
5140 xmlFreeElementContent(ret);
5141 return(NULL);
5142 }
5143 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 if (RAW == '?') {
5145 last->ocur = XML_ELEMENT_CONTENT_OPT;
5146 NEXT;
5147 } else if (RAW == '*') {
5148 last->ocur = XML_ELEMENT_CONTENT_MULT;
5149 NEXT;
5150 } else if (RAW == '+') {
5151 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5152 NEXT;
5153 } else {
5154 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5155 }
5156 }
5157 SKIP_BLANKS;
5158 GROW;
5159 }
5160 if ((cur != NULL) && (last != NULL)) {
5161 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005162 if (last != NULL)
5163 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005165 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005166 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5167"Element content declaration doesn't start and stop in the same entity\n",
5168 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005169 }
Owen Taylor3473f882001-02-23 17:55:21 +00005170 NEXT;
5171 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005172 if (ret != NULL)
5173 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005174 NEXT;
5175 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005176 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005177 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005178 cur = ret;
5179 /*
5180 * Some normalization:
5181 * (a | b* | c?)* == (a | b | c)*
5182 */
5183 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5184 if ((cur->c1 != NULL) &&
5185 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5186 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5187 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5188 if ((cur->c2 != NULL) &&
5189 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5190 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5191 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5192 cur = cur->c2;
5193 }
5194 }
Owen Taylor3473f882001-02-23 17:55:21 +00005195 NEXT;
5196 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005197 if (ret != NULL) {
5198 int found = 0;
5199
Daniel Veillarde470df72001-04-18 21:41:07 +00005200 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005201 /*
5202 * Some normalization:
5203 * (a | b*)+ == (a | b)*
5204 * (a | b?)+ == (a | b)*
5205 */
5206 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5207 if ((cur->c1 != NULL) &&
5208 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5209 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5210 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5211 found = 1;
5212 }
5213 if ((cur->c2 != NULL) &&
5214 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5215 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5216 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5217 found = 1;
5218 }
5219 cur = cur->c2;
5220 }
5221 if (found)
5222 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5223 }
Owen Taylor3473f882001-02-23 17:55:21 +00005224 NEXT;
5225 }
5226 return(ret);
5227}
5228
5229/**
5230 * xmlParseElementContentDecl:
5231 * @ctxt: an XML parser context
5232 * @name: the name of the element being defined.
5233 * @result: the Element Content pointer will be stored here if any
5234 *
5235 * parse the declaration for an Element content either Mixed or Children,
5236 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5237 *
5238 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5239 *
5240 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5241 */
5242
5243int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005244xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005245 xmlElementContentPtr *result) {
5246
5247 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005248 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005249 int res;
5250
5251 *result = NULL;
5252
5253 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005254 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005255 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005256 return(-1);
5257 }
5258 NEXT;
5259 GROW;
5260 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005261 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005262 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005263 res = XML_ELEMENT_TYPE_MIXED;
5264 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005265 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 res = XML_ELEMENT_TYPE_ELEMENT;
5267 }
Owen Taylor3473f882001-02-23 17:55:21 +00005268 SKIP_BLANKS;
5269 *result = tree;
5270 return(res);
5271}
5272
5273/**
5274 * xmlParseElementDecl:
5275 * @ctxt: an XML parser context
5276 *
5277 * parse an Element declaration.
5278 *
5279 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5280 *
5281 * [ VC: Unique Element Type Declaration ]
5282 * No element type may be declared more than once
5283 *
5284 * Returns the type of the element, or -1 in case of error
5285 */
5286int
5287xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005288 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005289 int ret = -1;
5290 xmlElementContentPtr content = NULL;
5291
5292 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005293 if (memcmp(CUR_PTR, "<!ELEMENT", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005294 xmlParserInputPtr input = ctxt->input;
5295
5296 SKIP(9);
5297 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5299 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005300 }
5301 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005302 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005304 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5305 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005306 return(-1);
5307 }
5308 while ((RAW == 0) && (ctxt->inputNr > 1))
5309 xmlPopInput(ctxt);
5310 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005311 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5312 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
5314 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005315 if (memcmp(CUR_PTR, "EMPTY", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005316 SKIP(5);
5317 /*
5318 * Element must always be empty.
5319 */
5320 ret = XML_ELEMENT_TYPE_EMPTY;
5321 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5322 (NXT(2) == 'Y')) {
5323 SKIP(3);
5324 /*
5325 * Element is a generic container.
5326 */
5327 ret = XML_ELEMENT_TYPE_ANY;
5328 } else if (RAW == '(') {
5329 ret = xmlParseElementContentDecl(ctxt, name, &content);
5330 } else {
5331 /*
5332 * [ WFC: PEs in Internal Subset ] error handling.
5333 */
5334 if ((RAW == '%') && (ctxt->external == 0) &&
5335 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005336 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005337 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005338 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005339 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005340 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5341 }
Owen Taylor3473f882001-02-23 17:55:21 +00005342 return(-1);
5343 }
5344
5345 SKIP_BLANKS;
5346 /*
5347 * Pop-up of finished entities.
5348 */
5349 while ((RAW == 0) && (ctxt->inputNr > 1))
5350 xmlPopInput(ctxt);
5351 SKIP_BLANKS;
5352
5353 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 } else {
5356 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5358 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360
5361 NEXT;
5362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5363 (ctxt->sax->elementDecl != NULL))
5364 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5365 content);
5366 }
5367 if (content != NULL) {
5368 xmlFreeElementContent(content);
5369 }
Owen Taylor3473f882001-02-23 17:55:21 +00005370 }
5371 return(ret);
5372}
5373
5374/**
Owen Taylor3473f882001-02-23 17:55:21 +00005375 * xmlParseConditionalSections
5376 * @ctxt: an XML parser context
5377 *
5378 * [61] conditionalSect ::= includeSect | ignoreSect
5379 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5380 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5381 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5382 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5383 */
5384
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005385static void
Owen Taylor3473f882001-02-23 17:55:21 +00005386xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5387 SKIP(3);
5388 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005389 if (memcmp(CUR_PTR, "INCLUDE", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005390 SKIP(7);
5391 SKIP_BLANKS;
5392 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005393 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 } else {
5395 NEXT;
5396 }
5397 if (xmlParserDebugEntities) {
5398 if ((ctxt->input != NULL) && (ctxt->input->filename))
5399 xmlGenericError(xmlGenericErrorContext,
5400 "%s(%d): ", ctxt->input->filename,
5401 ctxt->input->line);
5402 xmlGenericError(xmlGenericErrorContext,
5403 "Entering INCLUDE Conditional Section\n");
5404 }
5405
5406 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5407 (NXT(2) != '>'))) {
5408 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005409 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005410
5411 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5412 xmlParseConditionalSections(ctxt);
5413 } else if (IS_BLANK(CUR)) {
5414 NEXT;
5415 } else if (RAW == '%') {
5416 xmlParsePEReference(ctxt);
5417 } else
5418 xmlParseMarkupDecl(ctxt);
5419
5420 /*
5421 * Pop-up of finished entities.
5422 */
5423 while ((RAW == 0) && (ctxt->inputNr > 1))
5424 xmlPopInput(ctxt);
5425
Daniel Veillardfdc91562002-07-01 21:52:03 +00005426 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005427 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005428 break;
5429 }
5430 }
5431 if (xmlParserDebugEntities) {
5432 if ((ctxt->input != NULL) && (ctxt->input->filename))
5433 xmlGenericError(xmlGenericErrorContext,
5434 "%s(%d): ", ctxt->input->filename,
5435 ctxt->input->line);
5436 xmlGenericError(xmlGenericErrorContext,
5437 "Leaving INCLUDE Conditional Section\n");
5438 }
5439
Daniel Veillard8f597c32003-10-06 08:19:27 +00005440 } else if (memcmp(CUR_PTR, "IGNORE", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005441 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005442 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005443 int depth = 0;
5444
5445 SKIP(6);
5446 SKIP_BLANKS;
5447 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005448 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005449 } else {
5450 NEXT;
5451 }
5452 if (xmlParserDebugEntities) {
5453 if ((ctxt->input != NULL) && (ctxt->input->filename))
5454 xmlGenericError(xmlGenericErrorContext,
5455 "%s(%d): ", ctxt->input->filename,
5456 ctxt->input->line);
5457 xmlGenericError(xmlGenericErrorContext,
5458 "Entering IGNORE Conditional Section\n");
5459 }
5460
5461 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005462 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005463 * But disable SAX event generating DTD building in the meantime
5464 */
5465 state = ctxt->disableSAX;
5466 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005467 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005468 ctxt->instate = XML_PARSER_IGNORE;
5469
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005470 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005471 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5472 depth++;
5473 SKIP(3);
5474 continue;
5475 }
5476 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5477 if (--depth >= 0) SKIP(3);
5478 continue;
5479 }
5480 NEXT;
5481 continue;
5482 }
5483
5484 ctxt->disableSAX = state;
5485 ctxt->instate = instate;
5486
5487 if (xmlParserDebugEntities) {
5488 if ((ctxt->input != NULL) && (ctxt->input->filename))
5489 xmlGenericError(xmlGenericErrorContext,
5490 "%s(%d): ", ctxt->input->filename,
5491 ctxt->input->line);
5492 xmlGenericError(xmlGenericErrorContext,
5493 "Leaving IGNORE Conditional Section\n");
5494 }
5495
5496 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005497 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 }
5499
5500 if (RAW == 0)
5501 SHRINK;
5502
5503 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005504 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005505 } else {
5506 SKIP(3);
5507 }
5508}
5509
5510/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005511 * xmlParseMarkupDecl:
5512 * @ctxt: an XML parser context
5513 *
5514 * parse Markup declarations
5515 *
5516 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5517 * NotationDecl | PI | Comment
5518 *
5519 * [ VC: Proper Declaration/PE Nesting ]
5520 * Parameter-entity replacement text must be properly nested with
5521 * markup declarations. That is to say, if either the first character
5522 * or the last character of a markup declaration (markupdecl above) is
5523 * contained in the replacement text for a parameter-entity reference,
5524 * both must be contained in the same replacement text.
5525 *
5526 * [ WFC: PEs in Internal Subset ]
5527 * In the internal DTD subset, parameter-entity references can occur
5528 * only where markup declarations can occur, not within markup declarations.
5529 * (This does not apply to references that occur in external parameter
5530 * entities or to the external subset.)
5531 */
5532void
5533xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5534 GROW;
5535 xmlParseElementDecl(ctxt);
5536 xmlParseAttributeListDecl(ctxt);
5537 xmlParseEntityDecl(ctxt);
5538 xmlParseNotationDecl(ctxt);
5539 xmlParsePI(ctxt);
5540 xmlParseComment(ctxt);
5541 /*
5542 * This is only for internal subset. On external entities,
5543 * the replacement is done before parsing stage
5544 */
5545 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5546 xmlParsePEReference(ctxt);
5547
5548 /*
5549 * Conditional sections are allowed from entities included
5550 * by PE References in the internal subset.
5551 */
5552 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5553 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5554 xmlParseConditionalSections(ctxt);
5555 }
5556 }
5557
5558 ctxt->instate = XML_PARSER_DTD;
5559}
5560
5561/**
5562 * xmlParseTextDecl:
5563 * @ctxt: an XML parser context
5564 *
5565 * parse an XML declaration header for external entities
5566 *
5567 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5568 *
5569 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5570 */
5571
5572void
5573xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5574 xmlChar *version;
5575
5576 /*
5577 * We know that '<?xml' is here.
5578 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00005579 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005580 SKIP(5);
5581 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005582 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005583 return;
5584 }
5585
5586 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5588 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005589 }
5590 SKIP_BLANKS;
5591
5592 /*
5593 * We may have the VersionInfo here.
5594 */
5595 version = xmlParseVersionInfo(ctxt);
5596 if (version == NULL)
5597 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005598 else {
5599 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5601 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005602 }
5603 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005604 ctxt->input->version = version;
5605
5606 /*
5607 * We must have the encoding declaration
5608 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005609 xmlParseEncodingDecl(ctxt);
5610 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5611 /*
5612 * The XML REC instructs us to stop parsing right here
5613 */
5614 return;
5615 }
5616
5617 SKIP_BLANKS;
5618 if ((RAW == '?') && (NXT(1) == '>')) {
5619 SKIP(2);
5620 } else if (RAW == '>') {
5621 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005622 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005623 NEXT;
5624 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005625 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005626 MOVETO_ENDTAG(CUR_PTR);
5627 NEXT;
5628 }
5629}
5630
5631/**
Owen Taylor3473f882001-02-23 17:55:21 +00005632 * xmlParseExternalSubset:
5633 * @ctxt: an XML parser context
5634 * @ExternalID: the external identifier
5635 * @SystemID: the system identifier (or URL)
5636 *
5637 * parse Markup declarations from an external subset
5638 *
5639 * [30] extSubset ::= textDecl? extSubsetDecl
5640 *
5641 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5642 */
5643void
5644xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5645 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005646 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005647 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005648 if (memcmp(CUR_PTR, "<?xml", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005649 xmlParseTextDecl(ctxt);
5650 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5651 /*
5652 * The XML REC instructs us to stop parsing right here
5653 */
5654 ctxt->instate = XML_PARSER_EOF;
5655 return;
5656 }
5657 }
5658 if (ctxt->myDoc == NULL) {
5659 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5660 }
5661 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5662 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5663
5664 ctxt->instate = XML_PARSER_DTD;
5665 ctxt->external = 1;
5666 while (((RAW == '<') && (NXT(1) == '?')) ||
5667 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005668 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005669 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005670 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005671
5672 GROW;
5673 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5674 xmlParseConditionalSections(ctxt);
5675 } else if (IS_BLANK(CUR)) {
5676 NEXT;
5677 } else if (RAW == '%') {
5678 xmlParsePEReference(ctxt);
5679 } else
5680 xmlParseMarkupDecl(ctxt);
5681
5682 /*
5683 * Pop-up of finished entities.
5684 */
5685 while ((RAW == 0) && (ctxt->inputNr > 1))
5686 xmlPopInput(ctxt);
5687
Daniel Veillardfdc91562002-07-01 21:52:03 +00005688 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005689 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005690 break;
5691 }
5692 }
5693
5694 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005695 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005696 }
5697
5698}
5699
5700/**
5701 * xmlParseReference:
5702 * @ctxt: an XML parser context
5703 *
5704 * parse and handle entity references in content, depending on the SAX
5705 * interface, this may end-up in a call to character() if this is a
5706 * CharRef, a predefined entity, if there is no reference() callback.
5707 * or if the parser was asked to switch to that mode.
5708 *
5709 * [67] Reference ::= EntityRef | CharRef
5710 */
5711void
5712xmlParseReference(xmlParserCtxtPtr ctxt) {
5713 xmlEntityPtr ent;
5714 xmlChar *val;
5715 if (RAW != '&') return;
5716
5717 if (NXT(1) == '#') {
5718 int i = 0;
5719 xmlChar out[10];
5720 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005721 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005722
5723 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5724 /*
5725 * So we are using non-UTF-8 buffers
5726 * Check that the char fit on 8bits, if not
5727 * generate a CharRef.
5728 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005729 if (value <= 0xFF) {
5730 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005731 out[1] = 0;
5732 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5733 (!ctxt->disableSAX))
5734 ctxt->sax->characters(ctxt->userData, out, 1);
5735 } else {
5736 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005737 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005739 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005740 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5741 (!ctxt->disableSAX))
5742 ctxt->sax->reference(ctxt->userData, out);
5743 }
5744 } else {
5745 /*
5746 * Just encode the value in UTF-8
5747 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005748 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005749 out[i] = 0;
5750 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5751 (!ctxt->disableSAX))
5752 ctxt->sax->characters(ctxt->userData, out, i);
5753 }
5754 } else {
5755 ent = xmlParseEntityRef(ctxt);
5756 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005757 if (!ctxt->wellFormed)
5758 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005759 if ((ent->name != NULL) &&
5760 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5761 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005762 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005763
5764
5765 /*
5766 * The first reference to the entity trigger a parsing phase
5767 * where the ent->children is filled with the result from
5768 * the parsing.
5769 */
5770 if (ent->children == NULL) {
5771 xmlChar *value;
5772 value = ent->content;
5773
5774 /*
5775 * Check that this entity is well formed
5776 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005777 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005778 (value[1] == 0) && (value[0] == '<') &&
5779 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5780 /*
5781 * DONE: get definite answer on this !!!
5782 * Lots of entity decls are used to declare a single
5783 * char
5784 * <!ENTITY lt "<">
5785 * Which seems to be valid since
5786 * 2.4: The ampersand character (&) and the left angle
5787 * bracket (<) may appear in their literal form only
5788 * when used ... They are also legal within the literal
5789 * entity value of an internal entity declaration;i
5790 * see "4.3.2 Well-Formed Parsed Entities".
5791 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5792 * Looking at the OASIS test suite and James Clark
5793 * tests, this is broken. However the XML REC uses
5794 * it. Is the XML REC not well-formed ????
5795 * This is a hack to avoid this problem
5796 *
5797 * ANSWER: since lt gt amp .. are already defined,
5798 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005799 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005800 * is lousy but acceptable.
5801 */
5802 list = xmlNewDocText(ctxt->myDoc, value);
5803 if (list != NULL) {
5804 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5805 (ent->children == NULL)) {
5806 ent->children = list;
5807 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005808 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005809 list->parent = (xmlNodePtr) ent;
5810 } else {
5811 xmlFreeNodeList(list);
5812 }
5813 } else if (list != NULL) {
5814 xmlFreeNodeList(list);
5815 }
5816 } else {
5817 /*
5818 * 4.3.2: An internal general parsed entity is well-formed
5819 * if its replacement text matches the production labeled
5820 * content.
5821 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005822
5823 void *user_data;
5824 /*
5825 * This is a bit hackish but this seems the best
5826 * way to make sure both SAX and DOM entity support
5827 * behaves okay.
5828 */
5829 if (ctxt->userData == ctxt)
5830 user_data = NULL;
5831 else
5832 user_data = ctxt->userData;
5833
Owen Taylor3473f882001-02-23 17:55:21 +00005834 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5835 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005836 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5837 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 ctxt->depth--;
5839 } else if (ent->etype ==
5840 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5841 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005842 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005843 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005844 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 ctxt->depth--;
5846 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005847 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005848 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5849 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005850 }
5851 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005852 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005853 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005854 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005855 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5856 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005857 (ent->children == NULL)) {
5858 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005859 if (ctxt->replaceEntities) {
5860 /*
5861 * Prune it directly in the generated document
5862 * except for single text nodes.
5863 */
5864 if ((list->type == XML_TEXT_NODE) &&
5865 (list->next == NULL)) {
5866 list->parent = (xmlNodePtr) ent;
5867 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005868 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005869 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005870 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005871 while (list != NULL) {
5872 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005873 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005874 if (list->next == NULL)
5875 ent->last = list;
5876 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005877 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005878 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005879#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005880 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5881 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005882#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005883 }
5884 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005885 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005886 while (list != NULL) {
5887 list->parent = (xmlNodePtr) ent;
5888 if (list->next == NULL)
5889 ent->last = list;
5890 list = list->next;
5891 }
Owen Taylor3473f882001-02-23 17:55:21 +00005892 }
5893 } else {
5894 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005895 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005896 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005897 } else if ((ret != XML_ERR_OK) &&
5898 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005899 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 } else if (list != NULL) {
5901 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005902 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 }
5904 }
5905 }
5906 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5907 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5908 /*
5909 * Create a node.
5910 */
5911 ctxt->sax->reference(ctxt->userData, ent->name);
5912 return;
5913 } else if (ctxt->replaceEntities) {
5914 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5915 /*
5916 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005917 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005918 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005919 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005920 if ((list == NULL) && (ent->owner == 0)) {
5921 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005922 cur = ent->children;
5923 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005924 nw = xmlCopyNode(cur, 1);
5925 if (nw != NULL) {
5926 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005927 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005928 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005929 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005930 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005931 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005932 if (cur == ent->last)
5933 break;
5934 cur = cur->next;
5935 }
Daniel Veillard81273902003-09-30 00:43:48 +00005936#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005937 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005938 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005939#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005940 } else if (list == NULL) {
5941 xmlNodePtr nw = NULL, cur, next, last,
5942 firstChild = NULL;
5943 /*
5944 * Copy the entity child list and make it the new
5945 * entity child list. The goal is to make sure any
5946 * ID or REF referenced will be the one from the
5947 * document content and not the entity copy.
5948 */
5949 cur = ent->children;
5950 ent->children = NULL;
5951 last = ent->last;
5952 ent->last = NULL;
5953 while (cur != NULL) {
5954 next = cur->next;
5955 cur->next = NULL;
5956 cur->parent = NULL;
5957 nw = xmlCopyNode(cur, 1);
5958 if (nw != NULL) {
5959 nw->_private = cur->_private;
5960 if (firstChild == NULL){
5961 firstChild = cur;
5962 }
5963 xmlAddChild((xmlNodePtr) ent, nw);
5964 xmlAddChild(ctxt->node, cur);
5965 }
5966 if (cur == last)
5967 break;
5968 cur = next;
5969 }
5970 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005971#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005972 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5973 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005974#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005975 } else {
5976 /*
5977 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005978 * node with a possible previous text one which
5979 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005980 */
5981 if (ent->children->type == XML_TEXT_NODE)
5982 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5983 if ((ent->last != ent->children) &&
5984 (ent->last->type == XML_TEXT_NODE))
5985 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5986 xmlAddChildList(ctxt->node, ent->children);
5987 }
5988
Owen Taylor3473f882001-02-23 17:55:21 +00005989 /*
5990 * This is to avoid a nasty side effect, see
5991 * characters() in SAX.c
5992 */
5993 ctxt->nodemem = 0;
5994 ctxt->nodelen = 0;
5995 return;
5996 } else {
5997 /*
5998 * Probably running in SAX mode
5999 */
6000 xmlParserInputPtr input;
6001
6002 input = xmlNewEntityInputStream(ctxt, ent);
6003 xmlPushInput(ctxt, input);
6004 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00006005 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006006 xmlParseTextDecl(ctxt);
6007 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6008 /*
6009 * The XML REC instructs us to stop parsing right here
6010 */
6011 ctxt->instate = XML_PARSER_EOF;
6012 return;
6013 }
6014 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006015 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6016 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006017 }
6018 }
6019 return;
6020 }
6021 }
6022 } else {
6023 val = ent->content;
6024 if (val == NULL) return;
6025 /*
6026 * inline the entity.
6027 */
6028 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6029 (!ctxt->disableSAX))
6030 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6031 }
6032 }
6033}
6034
6035/**
6036 * xmlParseEntityRef:
6037 * @ctxt: an XML parser context
6038 *
6039 * parse ENTITY references declarations
6040 *
6041 * [68] EntityRef ::= '&' Name ';'
6042 *
6043 * [ WFC: Entity Declared ]
6044 * In a document without any DTD, a document with only an internal DTD
6045 * subset which contains no parameter entity references, or a document
6046 * with "standalone='yes'", the Name given in the entity reference
6047 * must match that in an entity declaration, except that well-formed
6048 * documents need not declare any of the following entities: amp, lt,
6049 * gt, apos, quot. The declaration of a parameter entity must precede
6050 * any reference to it. Similarly, the declaration of a general entity
6051 * must precede any reference to it which appears in a default value in an
6052 * attribute-list declaration. Note that if entities are declared in the
6053 * external subset or in external parameter entities, a non-validating
6054 * processor is not obligated to read and process their declarations;
6055 * for such documents, the rule that an entity must be declared is a
6056 * well-formedness constraint only if standalone='yes'.
6057 *
6058 * [ WFC: Parsed Entity ]
6059 * An entity reference must not contain the name of an unparsed entity
6060 *
6061 * Returns the xmlEntityPtr if found, or NULL otherwise.
6062 */
6063xmlEntityPtr
6064xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006065 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006066 xmlEntityPtr ent = NULL;
6067
6068 GROW;
6069
6070 if (RAW == '&') {
6071 NEXT;
6072 name = xmlParseName(ctxt);
6073 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006074 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6075 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006076 } else {
6077 if (RAW == ';') {
6078 NEXT;
6079 /*
6080 * Ask first SAX for entity resolution, otherwise try the
6081 * predefined set.
6082 */
6083 if (ctxt->sax != NULL) {
6084 if (ctxt->sax->getEntity != NULL)
6085 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006086 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006087 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006088 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6089 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006090 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006091 }
Owen Taylor3473f882001-02-23 17:55:21 +00006092 }
6093 /*
6094 * [ WFC: Entity Declared ]
6095 * In a document without any DTD, a document with only an
6096 * internal DTD subset which contains no parameter entity
6097 * references, or a document with "standalone='yes'", the
6098 * Name given in the entity reference must match that in an
6099 * entity declaration, except that well-formed documents
6100 * need not declare any of the following entities: amp, lt,
6101 * gt, apos, quot.
6102 * The declaration of a parameter entity must precede any
6103 * reference to it.
6104 * Similarly, the declaration of a general entity must
6105 * precede any reference to it which appears in a default
6106 * value in an attribute-list declaration. Note that if
6107 * entities are declared in the external subset or in
6108 * external parameter entities, a non-validating processor
6109 * is not obligated to read and process their declarations;
6110 * for such documents, the rule that an entity must be
6111 * declared is a well-formedness constraint only if
6112 * standalone='yes'.
6113 */
6114 if (ent == NULL) {
6115 if ((ctxt->standalone == 1) ||
6116 ((ctxt->hasExternalSubset == 0) &&
6117 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006118 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006119 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006121 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006122 "Entity '%s' not defined\n", name);
6123 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006124 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006125 }
6126
6127 /*
6128 * [ WFC: Parsed Entity ]
6129 * An entity reference must not contain the name of an
6130 * unparsed entity
6131 */
6132 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006133 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006134 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006135 }
6136
6137 /*
6138 * [ WFC: No External Entity References ]
6139 * Attribute values cannot contain direct or indirect
6140 * entity references to external entities.
6141 */
6142 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6143 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006144 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6145 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006146 }
6147 /*
6148 * [ WFC: No < in Attribute Values ]
6149 * The replacement text of any entity referred to directly or
6150 * indirectly in an attribute value (other than "&lt;") must
6151 * not contain a <.
6152 */
6153 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6154 (ent != NULL) &&
6155 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6156 (ent->content != NULL) &&
6157 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006158 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006159 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006160 }
6161
6162 /*
6163 * Internal check, no parameter entities here ...
6164 */
6165 else {
6166 switch (ent->etype) {
6167 case XML_INTERNAL_PARAMETER_ENTITY:
6168 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006169 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6170 "Attempt to reference the parameter entity '%s'\n",
6171 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 break;
6173 default:
6174 break;
6175 }
6176 }
6177
6178 /*
6179 * [ WFC: No Recursion ]
6180 * A parsed entity must not contain a recursive reference
6181 * to itself, either directly or indirectly.
6182 * Done somewhere else
6183 */
6184
6185 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006186 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006187 }
Owen Taylor3473f882001-02-23 17:55:21 +00006188 }
6189 }
6190 return(ent);
6191}
6192
6193/**
6194 * xmlParseStringEntityRef:
6195 * @ctxt: an XML parser context
6196 * @str: a pointer to an index in the string
6197 *
6198 * parse ENTITY references declarations, but this version parses it from
6199 * a string value.
6200 *
6201 * [68] EntityRef ::= '&' Name ';'
6202 *
6203 * [ WFC: Entity Declared ]
6204 * In a document without any DTD, a document with only an internal DTD
6205 * subset which contains no parameter entity references, or a document
6206 * with "standalone='yes'", the Name given in the entity reference
6207 * must match that in an entity declaration, except that well-formed
6208 * documents need not declare any of the following entities: amp, lt,
6209 * gt, apos, quot. The declaration of a parameter entity must precede
6210 * any reference to it. Similarly, the declaration of a general entity
6211 * must precede any reference to it which appears in a default value in an
6212 * attribute-list declaration. Note that if entities are declared in the
6213 * external subset or in external parameter entities, a non-validating
6214 * processor is not obligated to read and process their declarations;
6215 * for such documents, the rule that an entity must be declared is a
6216 * well-formedness constraint only if standalone='yes'.
6217 *
6218 * [ WFC: Parsed Entity ]
6219 * An entity reference must not contain the name of an unparsed entity
6220 *
6221 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6222 * is updated to the current location in the string.
6223 */
6224xmlEntityPtr
6225xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6226 xmlChar *name;
6227 const xmlChar *ptr;
6228 xmlChar cur;
6229 xmlEntityPtr ent = NULL;
6230
6231 if ((str == NULL) || (*str == NULL))
6232 return(NULL);
6233 ptr = *str;
6234 cur = *ptr;
6235 if (cur == '&') {
6236 ptr++;
6237 cur = *ptr;
6238 name = xmlParseStringName(ctxt, &ptr);
6239 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006240 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6241 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006242 } else {
6243 if (*ptr == ';') {
6244 ptr++;
6245 /*
6246 * Ask first SAX for entity resolution, otherwise try the
6247 * predefined set.
6248 */
6249 if (ctxt->sax != NULL) {
6250 if (ctxt->sax->getEntity != NULL)
6251 ent = ctxt->sax->getEntity(ctxt->userData, name);
6252 if (ent == NULL)
6253 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006254 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006255 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006256 }
Owen Taylor3473f882001-02-23 17:55:21 +00006257 }
6258 /*
6259 * [ WFC: Entity Declared ]
6260 * In a document without any DTD, a document with only an
6261 * internal DTD subset which contains no parameter entity
6262 * references, or a document with "standalone='yes'", the
6263 * Name given in the entity reference must match that in an
6264 * entity declaration, except that well-formed documents
6265 * need not declare any of the following entities: amp, lt,
6266 * gt, apos, quot.
6267 * The declaration of a parameter entity must precede any
6268 * reference to it.
6269 * Similarly, the declaration of a general entity must
6270 * precede any reference to it which appears in a default
6271 * value in an attribute-list declaration. Note that if
6272 * entities are declared in the external subset or in
6273 * external parameter entities, a non-validating processor
6274 * is not obligated to read and process their declarations;
6275 * for such documents, the rule that an entity must be
6276 * declared is a well-formedness constraint only if
6277 * standalone='yes'.
6278 */
6279 if (ent == NULL) {
6280 if ((ctxt->standalone == 1) ||
6281 ((ctxt->hasExternalSubset == 0) &&
6282 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006283 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006284 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006285 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006286 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006287 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006288 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006289 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006290 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006291 }
6292
6293 /*
6294 * [ WFC: Parsed Entity ]
6295 * An entity reference must not contain the name of an
6296 * unparsed entity
6297 */
6298 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006299 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006300 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 }
6302
6303 /*
6304 * [ WFC: No External Entity References ]
6305 * Attribute values cannot contain direct or indirect
6306 * entity references to external entities.
6307 */
6308 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6309 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006310 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006311 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
6313 /*
6314 * [ WFC: No < in Attribute Values ]
6315 * The replacement text of any entity referred to directly or
6316 * indirectly in an attribute value (other than "&lt;") must
6317 * not contain a <.
6318 */
6319 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6320 (ent != NULL) &&
6321 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6322 (ent->content != NULL) &&
6323 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006324 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6325 "'<' in entity '%s' is not allowed in attributes values\n",
6326 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 }
6328
6329 /*
6330 * Internal check, no parameter entities here ...
6331 */
6332 else {
6333 switch (ent->etype) {
6334 case XML_INTERNAL_PARAMETER_ENTITY:
6335 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006336 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6337 "Attempt to reference the parameter entity '%s'\n",
6338 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 break;
6340 default:
6341 break;
6342 }
6343 }
6344
6345 /*
6346 * [ WFC: No Recursion ]
6347 * A parsed entity must not contain a recursive reference
6348 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006349 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006350 */
6351
6352 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006353 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006354 }
6355 xmlFree(name);
6356 }
6357 }
6358 *str = ptr;
6359 return(ent);
6360}
6361
6362/**
6363 * xmlParsePEReference:
6364 * @ctxt: an XML parser context
6365 *
6366 * parse PEReference declarations
6367 * The entity content is handled directly by pushing it's content as
6368 * a new input stream.
6369 *
6370 * [69] PEReference ::= '%' Name ';'
6371 *
6372 * [ WFC: No Recursion ]
6373 * A parsed entity must not contain a recursive
6374 * reference to itself, either directly or indirectly.
6375 *
6376 * [ WFC: Entity Declared ]
6377 * In a document without any DTD, a document with only an internal DTD
6378 * subset which contains no parameter entity references, or a document
6379 * with "standalone='yes'", ... ... The declaration of a parameter
6380 * entity must precede any reference to it...
6381 *
6382 * [ VC: Entity Declared ]
6383 * In a document with an external subset or external parameter entities
6384 * with "standalone='no'", ... ... The declaration of a parameter entity
6385 * must precede any reference to it...
6386 *
6387 * [ WFC: In DTD ]
6388 * Parameter-entity references may only appear in the DTD.
6389 * NOTE: misleading but this is handled.
6390 */
6391void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006392xmlParsePEReference(xmlParserCtxtPtr ctxt)
6393{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006394 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006395 xmlEntityPtr entity = NULL;
6396 xmlParserInputPtr input;
6397
6398 if (RAW == '%') {
6399 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006400 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006401 if (name == NULL) {
6402 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6403 "xmlParsePEReference: no name\n");
6404 } else {
6405 if (RAW == ';') {
6406 NEXT;
6407 if ((ctxt->sax != NULL) &&
6408 (ctxt->sax->getParameterEntity != NULL))
6409 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6410 name);
6411 if (entity == NULL) {
6412 /*
6413 * [ WFC: Entity Declared ]
6414 * In a document without any DTD, a document with only an
6415 * internal DTD subset which contains no parameter entity
6416 * references, or a document with "standalone='yes'", ...
6417 * ... The declaration of a parameter entity must precede
6418 * any reference to it...
6419 */
6420 if ((ctxt->standalone == 1) ||
6421 ((ctxt->hasExternalSubset == 0) &&
6422 (ctxt->hasPErefs == 0))) {
6423 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6424 "PEReference: %%%s; not found\n",
6425 name);
6426 } else {
6427 /*
6428 * [ VC: Entity Declared ]
6429 * In a document with an external subset or external
6430 * parameter entities with "standalone='no'", ...
6431 * ... The declaration of a parameter entity must
6432 * precede any reference to it...
6433 */
6434 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6435 "PEReference: %%%s; not found\n",
6436 name, NULL);
6437 ctxt->valid = 0;
6438 }
6439 } else {
6440 /*
6441 * Internal checking in case the entity quest barfed
6442 */
6443 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6444 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6445 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6446 "Internal: %%%s; is not a parameter entity\n",
6447 name, NULL);
6448 } else if (ctxt->input->free != deallocblankswrapper) {
6449 input =
6450 xmlNewBlanksWrapperInputStream(ctxt, entity);
6451 xmlPushInput(ctxt, input);
6452 } else {
6453 /*
6454 * TODO !!!
6455 * handle the extra spaces added before and after
6456 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6457 */
6458 input = xmlNewEntityInputStream(ctxt, entity);
6459 xmlPushInput(ctxt, input);
6460 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6461 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
6462 (IS_BLANK(NXT(5)))) {
6463 xmlParseTextDecl(ctxt);
6464 if (ctxt->errNo ==
6465 XML_ERR_UNSUPPORTED_ENCODING) {
6466 /*
6467 * The XML REC instructs us to stop parsing
6468 * right here
6469 */
6470 ctxt->instate = XML_PARSER_EOF;
6471 return;
6472 }
6473 }
6474 }
6475 }
6476 ctxt->hasPErefs = 1;
6477 } else {
6478 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6479 }
6480 }
Owen Taylor3473f882001-02-23 17:55:21 +00006481 }
6482}
6483
6484/**
6485 * xmlParseStringPEReference:
6486 * @ctxt: an XML parser context
6487 * @str: a pointer to an index in the string
6488 *
6489 * parse PEReference declarations
6490 *
6491 * [69] PEReference ::= '%' Name ';'
6492 *
6493 * [ WFC: No Recursion ]
6494 * A parsed entity must not contain a recursive
6495 * reference to itself, either directly or indirectly.
6496 *
6497 * [ WFC: Entity Declared ]
6498 * In a document without any DTD, a document with only an internal DTD
6499 * subset which contains no parameter entity references, or a document
6500 * with "standalone='yes'", ... ... The declaration of a parameter
6501 * entity must precede any reference to it...
6502 *
6503 * [ VC: Entity Declared ]
6504 * In a document with an external subset or external parameter entities
6505 * with "standalone='no'", ... ... The declaration of a parameter entity
6506 * must precede any reference to it...
6507 *
6508 * [ WFC: In DTD ]
6509 * Parameter-entity references may only appear in the DTD.
6510 * NOTE: misleading but this is handled.
6511 *
6512 * Returns the string of the entity content.
6513 * str is updated to the current value of the index
6514 */
6515xmlEntityPtr
6516xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6517 const xmlChar *ptr;
6518 xmlChar cur;
6519 xmlChar *name;
6520 xmlEntityPtr entity = NULL;
6521
6522 if ((str == NULL) || (*str == NULL)) return(NULL);
6523 ptr = *str;
6524 cur = *ptr;
6525 if (cur == '%') {
6526 ptr++;
6527 cur = *ptr;
6528 name = xmlParseStringName(ctxt, &ptr);
6529 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006532 } else {
6533 cur = *ptr;
6534 if (cur == ';') {
6535 ptr++;
6536 cur = *ptr;
6537 if ((ctxt->sax != NULL) &&
6538 (ctxt->sax->getParameterEntity != NULL))
6539 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6540 name);
6541 if (entity == NULL) {
6542 /*
6543 * [ WFC: Entity Declared ]
6544 * In a document without any DTD, a document with only an
6545 * internal DTD subset which contains no parameter entity
6546 * references, or a document with "standalone='yes'", ...
6547 * ... The declaration of a parameter entity must precede
6548 * any reference to it...
6549 */
6550 if ((ctxt->standalone == 1) ||
6551 ((ctxt->hasExternalSubset == 0) &&
6552 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006553 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006554 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006555 } else {
6556 /*
6557 * [ VC: Entity Declared ]
6558 * In a document with an external subset or external
6559 * parameter entities with "standalone='no'", ...
6560 * ... The declaration of a parameter entity must
6561 * precede any reference to it...
6562 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006563 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6564 "PEReference: %%%s; not found\n",
6565 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006566 ctxt->valid = 0;
6567 }
6568 } else {
6569 /*
6570 * Internal checking in case the entity quest barfed
6571 */
6572 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6573 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006574 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6575 "%%%s; is not a parameter entity\n",
6576 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 }
6578 }
6579 ctxt->hasPErefs = 1;
6580 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006581 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006582 }
6583 xmlFree(name);
6584 }
6585 }
6586 *str = ptr;
6587 return(entity);
6588}
6589
6590/**
6591 * xmlParseDocTypeDecl:
6592 * @ctxt: an XML parser context
6593 *
6594 * parse a DOCTYPE declaration
6595 *
6596 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6597 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6598 *
6599 * [ VC: Root Element Type ]
6600 * The Name in the document type declaration must match the element
6601 * type of the root element.
6602 */
6603
6604void
6605xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006606 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006607 xmlChar *ExternalID = NULL;
6608 xmlChar *URI = NULL;
6609
6610 /*
6611 * We know that '<!DOCTYPE' has been detected.
6612 */
6613 SKIP(9);
6614
6615 SKIP_BLANKS;
6616
6617 /*
6618 * Parse the DOCTYPE name.
6619 */
6620 name = xmlParseName(ctxt);
6621 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006622 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6623 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006624 }
6625 ctxt->intSubName = name;
6626
6627 SKIP_BLANKS;
6628
6629 /*
6630 * Check for SystemID and ExternalID
6631 */
6632 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6633
6634 if ((URI != NULL) || (ExternalID != NULL)) {
6635 ctxt->hasExternalSubset = 1;
6636 }
6637 ctxt->extSubURI = URI;
6638 ctxt->extSubSystem = ExternalID;
6639
6640 SKIP_BLANKS;
6641
6642 /*
6643 * Create and update the internal subset.
6644 */
6645 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6646 (!ctxt->disableSAX))
6647 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6648
6649 /*
6650 * Is there any internal subset declarations ?
6651 * they are handled separately in xmlParseInternalSubset()
6652 */
6653 if (RAW == '[')
6654 return;
6655
6656 /*
6657 * We should be at the end of the DOCTYPE declaration.
6658 */
6659 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006660 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006661 }
6662 NEXT;
6663}
6664
6665/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006666 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006667 * @ctxt: an XML parser context
6668 *
6669 * parse the internal subset declaration
6670 *
6671 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6672 */
6673
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006674static void
Owen Taylor3473f882001-02-23 17:55:21 +00006675xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6676 /*
6677 * Is there any DTD definition ?
6678 */
6679 if (RAW == '[') {
6680 ctxt->instate = XML_PARSER_DTD;
6681 NEXT;
6682 /*
6683 * Parse the succession of Markup declarations and
6684 * PEReferences.
6685 * Subsequence (markupdecl | PEReference | S)*
6686 */
6687 while (RAW != ']') {
6688 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006689 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006690
6691 SKIP_BLANKS;
6692 xmlParseMarkupDecl(ctxt);
6693 xmlParsePEReference(ctxt);
6694
6695 /*
6696 * Pop-up of finished entities.
6697 */
6698 while ((RAW == 0) && (ctxt->inputNr > 1))
6699 xmlPopInput(ctxt);
6700
6701 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006702 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006703 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006704 break;
6705 }
6706 }
6707 if (RAW == ']') {
6708 NEXT;
6709 SKIP_BLANKS;
6710 }
6711 }
6712
6713 /*
6714 * We should be at the end of the DOCTYPE declaration.
6715 */
6716 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006717 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006718 }
6719 NEXT;
6720}
6721
Daniel Veillard81273902003-09-30 00:43:48 +00006722#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006723/**
6724 * xmlParseAttribute:
6725 * @ctxt: an XML parser context
6726 * @value: a xmlChar ** used to store the value of the attribute
6727 *
6728 * parse an attribute
6729 *
6730 * [41] Attribute ::= Name Eq AttValue
6731 *
6732 * [ WFC: No External Entity References ]
6733 * Attribute values cannot contain direct or indirect entity references
6734 * to external entities.
6735 *
6736 * [ WFC: No < in Attribute Values ]
6737 * The replacement text of any entity referred to directly or indirectly in
6738 * an attribute value (other than "&lt;") must not contain a <.
6739 *
6740 * [ VC: Attribute Value Type ]
6741 * The attribute must have been declared; the value must be of the type
6742 * declared for it.
6743 *
6744 * [25] Eq ::= S? '=' S?
6745 *
6746 * With namespace:
6747 *
6748 * [NS 11] Attribute ::= QName Eq AttValue
6749 *
6750 * Also the case QName == xmlns:??? is handled independently as a namespace
6751 * definition.
6752 *
6753 * Returns the attribute name, and the value in *value.
6754 */
6755
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006756const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006757xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006758 const xmlChar *name;
6759 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006760
6761 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006762 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006763 name = xmlParseName(ctxt);
6764 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006766 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006767 return(NULL);
6768 }
6769
6770 /*
6771 * read the value
6772 */
6773 SKIP_BLANKS;
6774 if (RAW == '=') {
6775 NEXT;
6776 SKIP_BLANKS;
6777 val = xmlParseAttValue(ctxt);
6778 ctxt->instate = XML_PARSER_CONTENT;
6779 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006780 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006781 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006782 return(NULL);
6783 }
6784
6785 /*
6786 * Check that xml:lang conforms to the specification
6787 * No more registered as an error, just generate a warning now
6788 * since this was deprecated in XML second edition
6789 */
6790 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6791 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006792 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6793 "Malformed value for xml:lang : %s\n",
6794 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006795 }
6796 }
6797
6798 /*
6799 * Check that xml:space conforms to the specification
6800 */
6801 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6802 if (xmlStrEqual(val, BAD_CAST "default"))
6803 *(ctxt->space) = 0;
6804 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6805 *(ctxt->space) = 1;
6806 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006807 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006808"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006809 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006810 }
6811 }
6812
6813 *value = val;
6814 return(name);
6815}
6816
6817/**
6818 * xmlParseStartTag:
6819 * @ctxt: an XML parser context
6820 *
6821 * parse a start of tag either for rule element or
6822 * EmptyElement. In both case we don't parse the tag closing chars.
6823 *
6824 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6825 *
6826 * [ WFC: Unique Att Spec ]
6827 * No attribute name may appear more than once in the same start-tag or
6828 * empty-element tag.
6829 *
6830 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6831 *
6832 * [ WFC: Unique Att Spec ]
6833 * No attribute name may appear more than once in the same start-tag or
6834 * empty-element tag.
6835 *
6836 * With namespace:
6837 *
6838 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6839 *
6840 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6841 *
6842 * Returns the element name parsed
6843 */
6844
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006845const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006846xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006847 const xmlChar *name;
6848 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006849 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006850 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006851 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006852 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006853 int i;
6854
6855 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006856 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006857
6858 name = xmlParseName(ctxt);
6859 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006860 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006861 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006862 return(NULL);
6863 }
6864
6865 /*
6866 * Now parse the attributes, it ends up with the ending
6867 *
6868 * (S Attribute)* S?
6869 */
6870 SKIP_BLANKS;
6871 GROW;
6872
Daniel Veillard21a0f912001-02-25 19:54:14 +00006873 while ((RAW != '>') &&
6874 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006875 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006876 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006877 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006878
6879 attname = xmlParseAttribute(ctxt, &attvalue);
6880 if ((attname != NULL) && (attvalue != NULL)) {
6881 /*
6882 * [ WFC: Unique Att Spec ]
6883 * No attribute name may appear more than once in the same
6884 * start-tag or empty-element tag.
6885 */
6886 for (i = 0; i < nbatts;i += 2) {
6887 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006888 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006889 xmlFree(attvalue);
6890 goto failed;
6891 }
6892 }
Owen Taylor3473f882001-02-23 17:55:21 +00006893 /*
6894 * Add the pair to atts
6895 */
6896 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006897 maxatts = 22; /* allow for 10 attrs by default */
6898 atts = (const xmlChar **)
6899 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006900 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006901 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006902 if (attvalue != NULL)
6903 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006904 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006905 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006906 ctxt->atts = atts;
6907 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006909 const xmlChar **n;
6910
Owen Taylor3473f882001-02-23 17:55:21 +00006911 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006912 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006913 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006914 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006915 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006916 if (attvalue != NULL)
6917 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006918 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006919 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006920 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006921 ctxt->atts = atts;
6922 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006923 }
6924 atts[nbatts++] = attname;
6925 atts[nbatts++] = attvalue;
6926 atts[nbatts] = NULL;
6927 atts[nbatts + 1] = NULL;
6928 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006929 if (attvalue != NULL)
6930 xmlFree(attvalue);
6931 }
6932
6933failed:
6934
Daniel Veillard3772de32002-12-17 10:31:45 +00006935 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006936 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6937 break;
6938 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006939 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6940 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006941 }
6942 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006943 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6944 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006945 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6946 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006947 break;
6948 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006949 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006950 GROW;
6951 }
6952
6953 /*
6954 * SAX: Start of Element !
6955 */
6956 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006957 (!ctxt->disableSAX)) {
6958 if (nbatts > 0)
6959 ctxt->sax->startElement(ctxt->userData, name, atts);
6960 else
6961 ctxt->sax->startElement(ctxt->userData, name, NULL);
6962 }
Owen Taylor3473f882001-02-23 17:55:21 +00006963
6964 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006965 /* Free only the content strings */
6966 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006967 if (atts[i] != NULL)
6968 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006969 }
6970 return(name);
6971}
6972
6973/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006974 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006975 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006976 * @line: line of the start tag
6977 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006978 *
6979 * parse an end of tag
6980 *
6981 * [42] ETag ::= '</' Name S? '>'
6982 *
6983 * With namespace
6984 *
6985 * [NS 9] ETag ::= '</' QName S? '>'
6986 */
6987
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006988static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006989xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006990 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006991
6992 GROW;
6993 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006994 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006995 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006996 return;
6997 }
6998 SKIP(2);
6999
Daniel Veillard46de64e2002-05-29 08:21:33 +00007000 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007001
7002 /*
7003 * We should definitely be at the ending "S? '>'" part
7004 */
7005 GROW;
7006 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007007 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007008 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007009 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007010 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007011
7012 /*
7013 * [ WFC: Element Type Match ]
7014 * The Name in an element's end-tag must match the element type in the
7015 * start-tag.
7016 *
7017 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007018 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007019 if (name == NULL) name = BAD_CAST "unparseable";
7020 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007021 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007022 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007023 }
7024
7025 /*
7026 * SAX: End of Tag
7027 */
7028 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7029 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007030 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007031
Daniel Veillarde57ec792003-09-10 10:50:59 +00007032 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007034 return;
7035}
7036
7037/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007038 * xmlParseEndTag:
7039 * @ctxt: an XML parser context
7040 *
7041 * parse an end of tag
7042 *
7043 * [42] ETag ::= '</' Name S? '>'
7044 *
7045 * With namespace
7046 *
7047 * [NS 9] ETag ::= '</' QName S? '>'
7048 */
7049
7050void
7051xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007052 xmlParseEndTag1(ctxt, 0);
7053}
Daniel Veillard81273902003-09-30 00:43:48 +00007054#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007055
7056/************************************************************************
7057 * *
7058 * SAX 2 specific operations *
7059 * *
7060 ************************************************************************/
7061
7062static const xmlChar *
7063xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7064 int len = 0, l;
7065 int c;
7066 int count = 0;
7067
7068 /*
7069 * Handler for more complex cases
7070 */
7071 GROW;
7072 c = CUR_CHAR(l);
7073 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007074 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007075 return(NULL);
7076 }
7077
7078 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007079 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007080 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007081 (IS_COMBINING(c)) ||
7082 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007083 if (count++ > 100) {
7084 count = 0;
7085 GROW;
7086 }
7087 len += l;
7088 NEXTL(l);
7089 c = CUR_CHAR(l);
7090 }
7091 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7092}
7093
7094/*
7095 * xmlGetNamespace:
7096 * @ctxt: an XML parser context
7097 * @prefix: the prefix to lookup
7098 *
7099 * Lookup the namespace name for the @prefix (which ca be NULL)
7100 * The prefix must come from the @ctxt->dict dictionnary
7101 *
7102 * Returns the namespace name or NULL if not bound
7103 */
7104static const xmlChar *
7105xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7106 int i;
7107
Daniel Veillarde57ec792003-09-10 10:50:59 +00007108 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007109 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007110 if (ctxt->nsTab[i] == prefix) {
7111 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7112 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007113 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007114 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007115 return(NULL);
7116}
7117
7118/**
7119 * xmlParseNCName:
7120 * @ctxt: an XML parser context
7121 *
7122 * parse an XML name.
7123 *
7124 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7125 * CombiningChar | Extender
7126 *
7127 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7128 *
7129 * Returns the Name parsed or NULL
7130 */
7131
7132static const xmlChar *
7133xmlParseNCName(xmlParserCtxtPtr ctxt) {
7134 const xmlChar *in;
7135 const xmlChar *ret;
7136 int count = 0;
7137
7138 /*
7139 * Accelerator for simple ASCII names
7140 */
7141 in = ctxt->input->cur;
7142 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7143 ((*in >= 0x41) && (*in <= 0x5A)) ||
7144 (*in == '_')) {
7145 in++;
7146 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7147 ((*in >= 0x41) && (*in <= 0x5A)) ||
7148 ((*in >= 0x30) && (*in <= 0x39)) ||
7149 (*in == '_') || (*in == '-') ||
7150 (*in == '.'))
7151 in++;
7152 if ((*in > 0) && (*in < 0x80)) {
7153 count = in - ctxt->input->cur;
7154 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7155 ctxt->input->cur = in;
7156 ctxt->nbChars += count;
7157 ctxt->input->col += count;
7158 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007159 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007160 }
7161 return(ret);
7162 }
7163 }
7164 return(xmlParseNCNameComplex(ctxt));
7165}
7166
7167/**
7168 * xmlParseQName:
7169 * @ctxt: an XML parser context
7170 * @prefix: pointer to store the prefix part
7171 *
7172 * parse an XML Namespace QName
7173 *
7174 * [6] QName ::= (Prefix ':')? LocalPart
7175 * [7] Prefix ::= NCName
7176 * [8] LocalPart ::= NCName
7177 *
7178 * Returns the Name parsed or NULL
7179 */
7180
7181static const xmlChar *
7182xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7183 const xmlChar *l, *p;
7184
7185 GROW;
7186
7187 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007188 if (l == NULL) {
7189 if (CUR == ':') {
7190 l = xmlParseName(ctxt);
7191 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007192 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7193 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007194 *prefix = NULL;
7195 return(l);
7196 }
7197 }
7198 return(NULL);
7199 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007200 if (CUR == ':') {
7201 NEXT;
7202 p = l;
7203 l = xmlParseNCName(ctxt);
7204 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007205 xmlChar *tmp;
7206
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007207 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7208 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007209 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7210 p = xmlDictLookup(ctxt->dict, tmp, -1);
7211 if (tmp != NULL) xmlFree(tmp);
7212 *prefix = NULL;
7213 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007214 }
7215 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007216 xmlChar *tmp;
7217
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007218 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7219 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007220 NEXT;
7221 tmp = (xmlChar *) xmlParseName(ctxt);
7222 if (tmp != NULL) {
7223 tmp = xmlBuildQName(tmp, l, NULL, 0);
7224 l = xmlDictLookup(ctxt->dict, tmp, -1);
7225 if (tmp != NULL) xmlFree(tmp);
7226 *prefix = p;
7227 return(l);
7228 }
7229 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7230 l = xmlDictLookup(ctxt->dict, tmp, -1);
7231 if (tmp != NULL) xmlFree(tmp);
7232 *prefix = p;
7233 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007234 }
7235 *prefix = p;
7236 } else
7237 *prefix = NULL;
7238 return(l);
7239}
7240
7241/**
7242 * xmlParseQNameAndCompare:
7243 * @ctxt: an XML parser context
7244 * @name: the localname
7245 * @prefix: the prefix, if any.
7246 *
7247 * parse an XML name and compares for match
7248 * (specialized for endtag parsing)
7249 *
7250 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7251 * and the name for mismatch
7252 */
7253
7254static const xmlChar *
7255xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7256 xmlChar const *prefix) {
7257 const xmlChar *cmp = name;
7258 const xmlChar *in;
7259 const xmlChar *ret;
7260 const xmlChar *prefix2;
7261
7262 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7263
7264 GROW;
7265 in = ctxt->input->cur;
7266
7267 cmp = prefix;
7268 while (*in != 0 && *in == *cmp) {
7269 ++in;
7270 ++cmp;
7271 }
7272 if ((*cmp == 0) && (*in == ':')) {
7273 in++;
7274 cmp = name;
7275 while (*in != 0 && *in == *cmp) {
7276 ++in;
7277 ++cmp;
7278 }
7279 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7280 /* success */
7281 ctxt->input->cur = in;
7282 return((const xmlChar*) 1);
7283 }
7284 }
7285 /*
7286 * all strings coms from the dictionary, equality can be done directly
7287 */
7288 ret = xmlParseQName (ctxt, &prefix2);
7289 if ((ret == name) && (prefix == prefix2))
7290 return((const xmlChar*) 1);
7291 return ret;
7292}
7293
7294/**
7295 * xmlParseAttValueInternal:
7296 * @ctxt: an XML parser context
7297 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007298 * @alloc: whether the attribute was reallocated as a new string
7299 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007300 *
7301 * parse a value for an attribute.
7302 * NOTE: if no normalization is needed, the routine will return pointers
7303 * directly from the data buffer.
7304 *
7305 * 3.3.3 Attribute-Value Normalization:
7306 * Before the value of an attribute is passed to the application or
7307 * checked for validity, the XML processor must normalize it as follows:
7308 * - a character reference is processed by appending the referenced
7309 * character to the attribute value
7310 * - an entity reference is processed by recursively processing the
7311 * replacement text of the entity
7312 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7313 * appending #x20 to the normalized value, except that only a single
7314 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7315 * parsed entity or the literal entity value of an internal parsed entity
7316 * - other characters are processed by appending them to the normalized value
7317 * If the declared value is not CDATA, then the XML processor must further
7318 * process the normalized attribute value by discarding any leading and
7319 * trailing space (#x20) characters, and by replacing sequences of space
7320 * (#x20) characters by a single space (#x20) character.
7321 * All attributes for which no declaration has been read should be treated
7322 * by a non-validating parser as if declared CDATA.
7323 *
7324 * Returns the AttValue parsed or NULL. The value has to be freed by the
7325 * caller if it was copied, this can be detected by val[*len] == 0.
7326 */
7327
7328static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007329xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7330 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007331{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007332 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007333 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 xmlChar *ret = NULL;
7335
7336 GROW;
7337 in = (xmlChar *) CUR_PTR;
7338 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007339 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007340 return (NULL);
7341 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007343
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007344 /*
7345 * try to handle in this routine the most common case where no
7346 * allocation of a new string is required and where content is
7347 * pure ASCII.
7348 */
7349 limit = *in++;
7350 end = ctxt->input->end;
7351 start = in;
7352 if (in >= end) {
7353 const xmlChar *oldbase = ctxt->input->base;
7354 GROW;
7355 if (oldbase != ctxt->input->base) {
7356 long delta = ctxt->input->base - oldbase;
7357 start = start + delta;
7358 in = in + delta;
7359 }
7360 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007362 if (normalize) {
7363 /*
7364 * Skip any leading spaces
7365 */
7366 while ((in < end) && (*in != limit) &&
7367 ((*in == 0x20) || (*in == 0x9) ||
7368 (*in == 0xA) || (*in == 0xD))) {
7369 in++;
7370 start = in;
7371 if (in >= end) {
7372 const xmlChar *oldbase = ctxt->input->base;
7373 GROW;
7374 if (oldbase != ctxt->input->base) {
7375 long delta = ctxt->input->base - oldbase;
7376 start = start + delta;
7377 in = in + delta;
7378 }
7379 end = ctxt->input->end;
7380 }
7381 }
7382 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7383 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7384 if ((*in++ == 0x20) && (*in == 0x20)) break;
7385 if (in >= end) {
7386 const xmlChar *oldbase = ctxt->input->base;
7387 GROW;
7388 if (oldbase != ctxt->input->base) {
7389 long delta = ctxt->input->base - oldbase;
7390 start = start + delta;
7391 in = in + delta;
7392 }
7393 end = ctxt->input->end;
7394 }
7395 }
7396 last = in;
7397 /*
7398 * skip the trailing blanks
7399 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007400 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007401 while ((in < end) && (*in != limit) &&
7402 ((*in == 0x20) || (*in == 0x9) ||
7403 (*in == 0xA) || (*in == 0xD))) {
7404 in++;
7405 if (in >= end) {
7406 const xmlChar *oldbase = ctxt->input->base;
7407 GROW;
7408 if (oldbase != ctxt->input->base) {
7409 long delta = ctxt->input->base - oldbase;
7410 start = start + delta;
7411 in = in + delta;
7412 last = last + delta;
7413 }
7414 end = ctxt->input->end;
7415 }
7416 }
7417 if (*in != limit) goto need_complex;
7418 } else {
7419 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7420 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7421 in++;
7422 if (in >= end) {
7423 const xmlChar *oldbase = ctxt->input->base;
7424 GROW;
7425 if (oldbase != ctxt->input->base) {
7426 long delta = ctxt->input->base - oldbase;
7427 start = start + delta;
7428 in = in + delta;
7429 }
7430 end = ctxt->input->end;
7431 }
7432 }
7433 last = in;
7434 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007436 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007438 *len = last - start;
7439 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007441 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007442 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007443 }
7444 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007445 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007446 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007447need_complex:
7448 if (alloc) *alloc = 1;
7449 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450}
7451
7452/**
7453 * xmlParseAttribute2:
7454 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007455 * @pref: the element prefix
7456 * @elem: the element name
7457 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007458 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007459 * @len: an int * to save the length of the attribute
7460 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 *
7462 * parse an attribute in the new SAX2 framework.
7463 *
7464 * Returns the attribute name, and the value in *value, .
7465 */
7466
7467static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007468xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7469 const xmlChar *pref, const xmlChar *elem,
7470 const xmlChar **prefix, xmlChar **value,
7471 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 const xmlChar *name;
7473 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007474 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007475
7476 *value = NULL;
7477 GROW;
7478 name = xmlParseQName(ctxt, prefix);
7479 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007480 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7481 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007482 return(NULL);
7483 }
7484
7485 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007486 * get the type if needed
7487 */
7488 if (ctxt->attsSpecial != NULL) {
7489 int type;
7490
7491 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7492 pref, elem, *prefix, name);
7493 if (type != 0) normalize = 1;
7494 }
7495
7496 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007497 * read the value
7498 */
7499 SKIP_BLANKS;
7500 if (RAW == '=') {
7501 NEXT;
7502 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007503 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504 ctxt->instate = XML_PARSER_CONTENT;
7505 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007506 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007507 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007508 return(NULL);
7509 }
7510
7511 /*
7512 * Check that xml:lang conforms to the specification
7513 * No more registered as an error, just generate a warning now
7514 * since this was deprecated in XML second edition
7515 */
7516 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7517 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007518 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7519 "Malformed value for xml:lang : %s\n",
7520 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 }
7522 }
7523
7524 /*
7525 * Check that xml:space conforms to the specification
7526 */
7527 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7528 if (xmlStrEqual(val, BAD_CAST "default"))
7529 *(ctxt->space) = 0;
7530 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7531 *(ctxt->space) = 1;
7532 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007533 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7535 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007536 }
7537 }
7538
7539 *value = val;
7540 return(name);
7541}
7542
7543/**
7544 * xmlParseStartTag2:
7545 * @ctxt: an XML parser context
7546 *
7547 * parse a start of tag either for rule element or
7548 * EmptyElement. In both case we don't parse the tag closing chars.
7549 * This routine is called when running SAX2 parsing
7550 *
7551 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7552 *
7553 * [ WFC: Unique Att Spec ]
7554 * No attribute name may appear more than once in the same start-tag or
7555 * empty-element tag.
7556 *
7557 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7558 *
7559 * [ WFC: Unique Att Spec ]
7560 * No attribute name may appear more than once in the same start-tag or
7561 * empty-element tag.
7562 *
7563 * With namespace:
7564 *
7565 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7566 *
7567 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7568 *
7569 * Returns the element name parsed
7570 */
7571
7572static const xmlChar *
7573xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7574 const xmlChar **URI) {
7575 const xmlChar *localname;
7576 const xmlChar *prefix;
7577 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007578 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007579 const xmlChar *nsname;
7580 xmlChar *attvalue;
7581 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007582 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007583 int nratts, nbatts, nbdef;
7584 int i, j, nbNs, attval;
7585 const xmlChar *base;
7586 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007587
7588 if (RAW != '<') return(NULL);
7589 NEXT1;
7590
7591 /*
7592 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7593 * point since the attribute values may be stored as pointers to
7594 * the buffer and calling SHRINK would destroy them !
7595 * The Shrinking is only possible once the full set of attribute
7596 * callbacks have been done.
7597 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007598reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007599 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007600 base = ctxt->input->base;
7601 cur = ctxt->input->cur - ctxt->input->base;
7602 nbatts = 0;
7603 nratts = 0;
7604 nbdef = 0;
7605 nbNs = 0;
7606 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007607
7608 localname = xmlParseQName(ctxt, &prefix);
7609 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007610 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7611 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007612 return(NULL);
7613 }
7614
7615 /*
7616 * Now parse the attributes, it ends up with the ending
7617 *
7618 * (S Attribute)* S?
7619 */
7620 SKIP_BLANKS;
7621 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007622 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007623
7624 while ((RAW != '>') &&
7625 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007626 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 const xmlChar *q = CUR_PTR;
7628 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007629 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007630
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007631 attname = xmlParseAttribute2(ctxt, prefix, localname,
7632 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 if ((attname != NULL) && (attvalue != NULL)) {
7634 if (len < 0) len = xmlStrlen(attvalue);
7635 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007636 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7637 xmlURIPtr uri;
7638
7639 if (*URL != 0) {
7640 uri = xmlParseURI((const char *) URL);
7641 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007642 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7643 "xmlns: %s not a valid URI\n",
7644 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007645 } else {
7646 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007647 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7648 "xmlns: URI %s is not absolute\n",
7649 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007650 }
7651 xmlFreeURI(uri);
7652 }
7653 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007655 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007657 for (j = 1;j <= nbNs;j++)
7658 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7659 break;
7660 if (j <= nbNs)
7661 xmlErrAttributeDup(ctxt, NULL, attname);
7662 else
7663 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007664 if (alloc != 0) xmlFree(attvalue);
7665 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007666 continue;
7667 }
7668 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007669 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7670 xmlURIPtr uri;
7671
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007672 if (attname == ctxt->str_xml) {
7673 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007674 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7675 "xml namespace prefix mapped to wrong URI\n",
7676 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007677 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007678 /*
7679 * Do not keep a namespace definition node
7680 */
7681 if (alloc != 0) xmlFree(attvalue);
7682 SKIP_BLANKS;
7683 continue;
7684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007685 uri = xmlParseURI((const char *) URL);
7686 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007687 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7688 "xmlns:%s: '%s' is not a valid URI\n",
7689 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007691 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007692 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7693 "xmlns:%s: URI %s is not absolute\n",
7694 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007695 }
7696 xmlFreeURI(uri);
7697 }
7698
Daniel Veillard0fb18932003-09-07 09:14:37 +00007699 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007700 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007702 for (j = 1;j <= nbNs;j++)
7703 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7704 break;
7705 if (j <= nbNs)
7706 xmlErrAttributeDup(ctxt, aprefix, attname);
7707 else
7708 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007709 if (alloc != 0) xmlFree(attvalue);
7710 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007711 continue;
7712 }
7713
7714 /*
7715 * Add the pair to atts
7716 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007717 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7718 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007719 if (attvalue[len] == 0)
7720 xmlFree(attvalue);
7721 goto failed;
7722 }
7723 maxatts = ctxt->maxatts;
7724 atts = ctxt->atts;
7725 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007726 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 atts[nbatts++] = attname;
7728 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007729 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 atts[nbatts++] = attvalue;
7731 attvalue += len;
7732 atts[nbatts++] = attvalue;
7733 /*
7734 * tag if some deallocation is needed
7735 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007736 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 } else {
7738 if ((attvalue != NULL) && (attvalue[len] == 0))
7739 xmlFree(attvalue);
7740 }
7741
7742failed:
7743
7744 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007745 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007746 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7747 break;
7748 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007749 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7750 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751 }
7752 SKIP_BLANKS;
7753 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7754 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007755 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 break;
7758 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007760 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007761 }
7762
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007764 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007765 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007767 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7768 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007769 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007770 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007771 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007773 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 /*
7775 * [ WFC: Unique Att Spec ]
7776 * No attribute name may appear more than once in the same
7777 * start-tag or empty-element tag.
7778 * As extended by the Namespace in XML REC.
7779 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007780 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 if (atts[i] == atts[j]) {
7782 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007783 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007784 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007786 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007787 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007788 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007789 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007790 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007791 }
7792 }
7793 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007794 }
7795
7796 /*
7797 * The attributes defaulting
7798 */
7799 if (ctxt->attsDefault != NULL) {
7800 xmlDefAttrsPtr defaults;
7801
7802 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7803 if (defaults != NULL) {
7804 for (i = 0;i < defaults->nbAttrs;i++) {
7805 attname = defaults->values[4 * i];
7806 aprefix = defaults->values[4 * i + 1];
7807
7808 /*
7809 * special work for namespaces defaulted defs
7810 */
7811 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7812 /*
7813 * check that it's not a defined namespace
7814 */
7815 for (j = 1;j <= nbNs;j++)
7816 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7817 break;
7818 if (j <= nbNs) continue;
7819
7820 nsname = xmlGetNamespace(ctxt, NULL);
7821 if (nsname != defaults->values[4 * i + 2]) {
7822 if (nsPush(ctxt, NULL,
7823 defaults->values[4 * i + 2]) > 0)
7824 nbNs++;
7825 }
7826 } else if (aprefix == ctxt->str_xmlns) {
7827 /*
7828 * check that it's not a defined namespace
7829 */
7830 for (j = 1;j <= nbNs;j++)
7831 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7832 break;
7833 if (j <= nbNs) continue;
7834
7835 nsname = xmlGetNamespace(ctxt, attname);
7836 if (nsname != defaults->values[2]) {
7837 if (nsPush(ctxt, attname,
7838 defaults->values[4 * i + 2]) > 0)
7839 nbNs++;
7840 }
7841 } else {
7842 /*
7843 * check that it's not a defined attribute
7844 */
7845 for (j = 0;j < nbatts;j+=5) {
7846 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7847 break;
7848 }
7849 if (j < nbatts) continue;
7850
7851 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7852 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007853 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007854 }
7855 maxatts = ctxt->maxatts;
7856 atts = ctxt->atts;
7857 }
7858 atts[nbatts++] = attname;
7859 atts[nbatts++] = aprefix;
7860 if (aprefix == NULL)
7861 atts[nbatts++] = NULL;
7862 else
7863 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7864 atts[nbatts++] = defaults->values[4 * i + 2];
7865 atts[nbatts++] = defaults->values[4 * i + 3];
7866 nbdef++;
7867 }
7868 }
7869 }
7870 }
7871
7872 nsname = xmlGetNamespace(ctxt, prefix);
7873 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007874 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7875 "Namespace prefix %s on %s is not defined\n",
7876 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007877 }
7878 *pref = prefix;
7879 *URI = nsname;
7880
7881 /*
7882 * SAX: Start of Element !
7883 */
7884 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7885 (!ctxt->disableSAX)) {
7886 if (nbNs > 0)
7887 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7888 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7889 nbatts / 5, nbdef, atts);
7890 else
7891 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7892 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7893 }
7894
7895 /*
7896 * Free up attribute allocated strings if needed
7897 */
7898 if (attval != 0) {
7899 for (i = 3,j = 0; j < nratts;i += 5,j++)
7900 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7901 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 }
7903
7904 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007905
7906base_changed:
7907 /*
7908 * the attribute strings are valid iif the base didn't changed
7909 */
7910 if (attval != 0) {
7911 for (i = 3,j = 0; j < nratts;i += 5,j++)
7912 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7913 xmlFree((xmlChar *) atts[i]);
7914 }
7915 ctxt->input->cur = ctxt->input->base + cur;
7916 if (ctxt->wellFormed == 1) {
7917 goto reparse;
7918 }
7919 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007920}
7921
7922/**
7923 * xmlParseEndTag2:
7924 * @ctxt: an XML parser context
7925 * @line: line of the start tag
7926 * @nsNr: number of namespaces on the start tag
7927 *
7928 * parse an end of tag
7929 *
7930 * [42] ETag ::= '</' Name S? '>'
7931 *
7932 * With namespace
7933 *
7934 * [NS 9] ETag ::= '</' QName S? '>'
7935 */
7936
7937static void
7938xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7939 const xmlChar *URI, int line, int nsNr) {
7940 const xmlChar *name;
7941
7942 GROW;
7943 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007944 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 return;
7946 }
7947 SKIP(2);
7948
7949 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7950
7951 /*
7952 * We should definitely be at the ending "S? '>'" part
7953 */
7954 GROW;
7955 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007956 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007957 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958 } else
7959 NEXT1;
7960
7961 /*
7962 * [ WFC: Element Type Match ]
7963 * The Name in an element's end-tag must match the element type in the
7964 * start-tag.
7965 *
7966 */
7967 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007968 if (name == NULL) name = BAD_CAST "unparseable";
7969 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007971 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 }
7973
7974 /*
7975 * SAX: End of Tag
7976 */
7977 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7978 (!ctxt->disableSAX))
7979 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7980
Daniel Veillard0fb18932003-09-07 09:14:37 +00007981 spacePop(ctxt);
7982 if (nsNr != 0)
7983 nsPop(ctxt, nsNr);
7984 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007985}
7986
7987/**
Owen Taylor3473f882001-02-23 17:55:21 +00007988 * xmlParseCDSect:
7989 * @ctxt: an XML parser context
7990 *
7991 * Parse escaped pure raw content.
7992 *
7993 * [18] CDSect ::= CDStart CData CDEnd
7994 *
7995 * [19] CDStart ::= '<![CDATA['
7996 *
7997 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7998 *
7999 * [21] CDEnd ::= ']]>'
8000 */
8001void
8002xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8003 xmlChar *buf = NULL;
8004 int len = 0;
8005 int size = XML_PARSER_BUFFER_SIZE;
8006 int r, rl;
8007 int s, sl;
8008 int cur, l;
8009 int count = 0;
8010
Daniel Veillard8f597c32003-10-06 08:19:27 +00008011 /* Check 2.6.0 was NXT(0) not RAW */
8012 if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008013 SKIP(9);
8014 } else
8015 return;
8016
8017 ctxt->instate = XML_PARSER_CDATA_SECTION;
8018 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008019 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008020 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008021 ctxt->instate = XML_PARSER_CONTENT;
8022 return;
8023 }
8024 NEXTL(rl);
8025 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008026 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008027 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008028 ctxt->instate = XML_PARSER_CONTENT;
8029 return;
8030 }
8031 NEXTL(sl);
8032 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008033 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008034 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008035 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008036 return;
8037 }
William M. Brack871611b2003-10-18 04:53:14 +00008038 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008039 ((r != ']') || (s != ']') || (cur != '>'))) {
8040 if (len + 5 >= size) {
8041 size *= 2;
8042 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8043 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008044 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008045 return;
8046 }
8047 }
8048 COPY_BUF(rl,buf,len,r);
8049 r = s;
8050 rl = sl;
8051 s = cur;
8052 sl = l;
8053 count++;
8054 if (count > 50) {
8055 GROW;
8056 count = 0;
8057 }
8058 NEXTL(l);
8059 cur = CUR_CHAR(l);
8060 }
8061 buf[len] = 0;
8062 ctxt->instate = XML_PARSER_CONTENT;
8063 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008064 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008065 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008066 xmlFree(buf);
8067 return;
8068 }
8069 NEXTL(l);
8070
8071 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008072 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008073 */
8074 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8075 if (ctxt->sax->cdataBlock != NULL)
8076 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008077 else if (ctxt->sax->characters != NULL)
8078 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 }
8080 xmlFree(buf);
8081}
8082
8083/**
8084 * xmlParseContent:
8085 * @ctxt: an XML parser context
8086 *
8087 * Parse a content:
8088 *
8089 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8090 */
8091
8092void
8093xmlParseContent(xmlParserCtxtPtr ctxt) {
8094 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008095 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008096 ((RAW != '<') || (NXT(1) != '/'))) {
8097 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008098 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008099 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008100
8101 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008102 * First case : a Processing Instruction.
8103 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008104 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008105 xmlParsePI(ctxt);
8106 }
8107
8108 /*
8109 * Second case : a CDSection
8110 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008111 /* 2.6.0 test was *cur not RAW */
8112 else if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008113 xmlParseCDSect(ctxt);
8114 }
8115
8116 /*
8117 * Third case : a comment
8118 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008119 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008120 (NXT(2) == '-') && (NXT(3) == '-')) {
8121 xmlParseComment(ctxt);
8122 ctxt->instate = XML_PARSER_CONTENT;
8123 }
8124
8125 /*
8126 * Fourth case : a sub-element.
8127 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008128 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008129 xmlParseElement(ctxt);
8130 }
8131
8132 /*
8133 * Fifth case : a reference. If if has not been resolved,
8134 * parsing returns it's Name, create the node
8135 */
8136
Daniel Veillard21a0f912001-02-25 19:54:14 +00008137 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008138 xmlParseReference(ctxt);
8139 }
8140
8141 /*
8142 * Last case, text. Note that References are handled directly.
8143 */
8144 else {
8145 xmlParseCharData(ctxt, 0);
8146 }
8147
8148 GROW;
8149 /*
8150 * Pop-up of finished entities.
8151 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008152 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008153 xmlPopInput(ctxt);
8154 SHRINK;
8155
Daniel Veillardfdc91562002-07-01 21:52:03 +00008156 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008157 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8158 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008159 ctxt->instate = XML_PARSER_EOF;
8160 break;
8161 }
8162 }
8163}
8164
8165/**
8166 * xmlParseElement:
8167 * @ctxt: an XML parser context
8168 *
8169 * parse an XML element, this is highly recursive
8170 *
8171 * [39] element ::= EmptyElemTag | STag content ETag
8172 *
8173 * [ WFC: Element Type Match ]
8174 * The Name in an element's end-tag must match the element type in the
8175 * start-tag.
8176 *
Owen Taylor3473f882001-02-23 17:55:21 +00008177 */
8178
8179void
8180xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008181 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008182 const xmlChar *prefix;
8183 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008184 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008185 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008186 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008188
8189 /* Capture start position */
8190 if (ctxt->record_info) {
8191 node_info.begin_pos = ctxt->input->consumed +
8192 (CUR_PTR - ctxt->input->base);
8193 node_info.begin_line = ctxt->input->line;
8194 }
8195
8196 if (ctxt->spaceNr == 0)
8197 spacePush(ctxt, -1);
8198 else
8199 spacePush(ctxt, *ctxt->space);
8200
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008201 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008202#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008204#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008206#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 else
8208 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008209#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008210 if (name == NULL) {
8211 spacePop(ctxt);
8212 return;
8213 }
8214 namePush(ctxt, name);
8215 ret = ctxt->node;
8216
Daniel Veillard4432df22003-09-28 18:58:27 +00008217#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008218 /*
8219 * [ VC: Root Element Type ]
8220 * The Name in the document type declaration must match the element
8221 * type of the root element.
8222 */
8223 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8224 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8225 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008226#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008227
8228 /*
8229 * Check for an Empty Element.
8230 */
8231 if ((RAW == '/') && (NXT(1) == '>')) {
8232 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 if (ctxt->sax2) {
8234 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8235 (!ctxt->disableSAX))
8236 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008237#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008238 } else {
8239 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8240 (!ctxt->disableSAX))
8241 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008242#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008243 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008244 namePop(ctxt);
8245 spacePop(ctxt);
8246 if (nsNr != ctxt->nsNr)
8247 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008248 if ( ret != NULL && ctxt->record_info ) {
8249 node_info.end_pos = ctxt->input->consumed +
8250 (CUR_PTR - ctxt->input->base);
8251 node_info.end_line = ctxt->input->line;
8252 node_info.node = ret;
8253 xmlParserAddNodeInfo(ctxt, &node_info);
8254 }
8255 return;
8256 }
8257 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008258 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008259 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008260 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8261 "Couldn't find end of Start Tag %s line %d\n",
8262 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008263
8264 /*
8265 * end of parsing of this node.
8266 */
8267 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008269 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 if (nsNr != ctxt->nsNr)
8271 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008272
8273 /*
8274 * Capture end position and add node
8275 */
8276 if ( ret != NULL && ctxt->record_info ) {
8277 node_info.end_pos = ctxt->input->consumed +
8278 (CUR_PTR - ctxt->input->base);
8279 node_info.end_line = ctxt->input->line;
8280 node_info.node = ret;
8281 xmlParserAddNodeInfo(ctxt, &node_info);
8282 }
8283 return;
8284 }
8285
8286 /*
8287 * Parse the content of the element:
8288 */
8289 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008290 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008291 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008292 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008293 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008294
8295 /*
8296 * end of parsing of this node.
8297 */
8298 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008299 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008300 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008301 if (nsNr != ctxt->nsNr)
8302 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008303 return;
8304 }
8305
8306 /*
8307 * parse the end of tag: '</' should be here.
8308 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008309 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008310 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008311 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008312 }
8313#ifdef LIBXML_SAX1_ENABLED
8314 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008316#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008317
8318 /*
8319 * Capture end position and add node
8320 */
8321 if ( ret != NULL && ctxt->record_info ) {
8322 node_info.end_pos = ctxt->input->consumed +
8323 (CUR_PTR - ctxt->input->base);
8324 node_info.end_line = ctxt->input->line;
8325 node_info.node = ret;
8326 xmlParserAddNodeInfo(ctxt, &node_info);
8327 }
8328}
8329
8330/**
8331 * xmlParseVersionNum:
8332 * @ctxt: an XML parser context
8333 *
8334 * parse the XML version value.
8335 *
8336 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8337 *
8338 * Returns the string giving the XML version number, or NULL
8339 */
8340xmlChar *
8341xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8342 xmlChar *buf = NULL;
8343 int len = 0;
8344 int size = 10;
8345 xmlChar cur;
8346
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008347 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008348 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008349 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008350 return(NULL);
8351 }
8352 cur = CUR;
8353 while (((cur >= 'a') && (cur <= 'z')) ||
8354 ((cur >= 'A') && (cur <= 'Z')) ||
8355 ((cur >= '0') && (cur <= '9')) ||
8356 (cur == '_') || (cur == '.') ||
8357 (cur == ':') || (cur == '-')) {
8358 if (len + 1 >= size) {
8359 size *= 2;
8360 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8361 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008362 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008363 return(NULL);
8364 }
8365 }
8366 buf[len++] = cur;
8367 NEXT;
8368 cur=CUR;
8369 }
8370 buf[len] = 0;
8371 return(buf);
8372}
8373
8374/**
8375 * xmlParseVersionInfo:
8376 * @ctxt: an XML parser context
8377 *
8378 * parse the XML version.
8379 *
8380 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8381 *
8382 * [25] Eq ::= S? '=' S?
8383 *
8384 * Returns the version string, e.g. "1.0"
8385 */
8386
8387xmlChar *
8388xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8389 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008390
Daniel Veillard8f597c32003-10-06 08:19:27 +00008391 if (memcmp(CUR_PTR, "version", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008392 SKIP(7);
8393 SKIP_BLANKS;
8394 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008395 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008396 return(NULL);
8397 }
8398 NEXT;
8399 SKIP_BLANKS;
8400 if (RAW == '"') {
8401 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008402 version = xmlParseVersionNum(ctxt);
8403 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 } else
8406 NEXT;
8407 } else if (RAW == '\''){
8408 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008409 version = xmlParseVersionNum(ctxt);
8410 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008412 } else
8413 NEXT;
8414 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008416 }
8417 }
8418 return(version);
8419}
8420
8421/**
8422 * xmlParseEncName:
8423 * @ctxt: an XML parser context
8424 *
8425 * parse the XML encoding name
8426 *
8427 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8428 *
8429 * Returns the encoding name value or NULL
8430 */
8431xmlChar *
8432xmlParseEncName(xmlParserCtxtPtr ctxt) {
8433 xmlChar *buf = NULL;
8434 int len = 0;
8435 int size = 10;
8436 xmlChar cur;
8437
8438 cur = CUR;
8439 if (((cur >= 'a') && (cur <= 'z')) ||
8440 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008441 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008442 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 return(NULL);
8445 }
8446
8447 buf[len++] = cur;
8448 NEXT;
8449 cur = CUR;
8450 while (((cur >= 'a') && (cur <= 'z')) ||
8451 ((cur >= 'A') && (cur <= 'Z')) ||
8452 ((cur >= '0') && (cur <= '9')) ||
8453 (cur == '.') || (cur == '_') ||
8454 (cur == '-')) {
8455 if (len + 1 >= size) {
8456 size *= 2;
8457 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8458 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008459 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008460 return(NULL);
8461 }
8462 }
8463 buf[len++] = cur;
8464 NEXT;
8465 cur = CUR;
8466 if (cur == 0) {
8467 SHRINK;
8468 GROW;
8469 cur = CUR;
8470 }
8471 }
8472 buf[len] = 0;
8473 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008474 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008475 }
8476 return(buf);
8477}
8478
8479/**
8480 * xmlParseEncodingDecl:
8481 * @ctxt: an XML parser context
8482 *
8483 * parse the XML encoding declaration
8484 *
8485 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8486 *
8487 * this setups the conversion filters.
8488 *
8489 * Returns the encoding value or NULL
8490 */
8491
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008492const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008493xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8494 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008495
8496 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008497 if (memcmp(CUR_PTR, "encoding", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008498 SKIP(8);
8499 SKIP_BLANKS;
8500 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008501 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008502 return(NULL);
8503 }
8504 NEXT;
8505 SKIP_BLANKS;
8506 if (RAW == '"') {
8507 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008508 encoding = xmlParseEncName(ctxt);
8509 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008510 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008511 } else
8512 NEXT;
8513 } else if (RAW == '\''){
8514 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 encoding = xmlParseEncName(ctxt);
8516 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008517 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008518 } else
8519 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008520 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008521 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008522 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008523 /*
8524 * UTF-16 encoding stwich has already taken place at this stage,
8525 * more over the little-endian/big-endian selection is already done
8526 */
8527 if ((encoding != NULL) &&
8528 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8529 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008530 if (ctxt->encoding != NULL)
8531 xmlFree((xmlChar *) ctxt->encoding);
8532 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008533 }
8534 /*
8535 * UTF-8 encoding is handled natively
8536 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008537 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008538 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8539 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008540 if (ctxt->encoding != NULL)
8541 xmlFree((xmlChar *) ctxt->encoding);
8542 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008543 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008544 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008545 xmlCharEncodingHandlerPtr handler;
8546
8547 if (ctxt->input->encoding != NULL)
8548 xmlFree((xmlChar *) ctxt->input->encoding);
8549 ctxt->input->encoding = encoding;
8550
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008551 handler = xmlFindCharEncodingHandler((const char *) encoding);
8552 if (handler != NULL) {
8553 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008554 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008555 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008556 "Unsupported encoding %s\n", encoding);
8557 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008558 }
8559 }
8560 }
8561 return(encoding);
8562}
8563
8564/**
8565 * xmlParseSDDecl:
8566 * @ctxt: an XML parser context
8567 *
8568 * parse the XML standalone declaration
8569 *
8570 * [32] SDDecl ::= S 'standalone' Eq
8571 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8572 *
8573 * [ VC: Standalone Document Declaration ]
8574 * TODO The standalone document declaration must have the value "no"
8575 * if any external markup declarations contain declarations of:
8576 * - attributes with default values, if elements to which these
8577 * attributes apply appear in the document without specifications
8578 * of values for these attributes, or
8579 * - entities (other than amp, lt, gt, apos, quot), if references
8580 * to those entities appear in the document, or
8581 * - attributes with values subject to normalization, where the
8582 * attribute appears in the document with a value which will change
8583 * as a result of normalization, or
8584 * - element types with element content, if white space occurs directly
8585 * within any instance of those types.
8586 *
8587 * Returns 1 if standalone, 0 otherwise
8588 */
8589
8590int
8591xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8592 int standalone = -1;
8593
8594 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008595 if (memcmp(CUR_PTR, "standalone", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008596 SKIP(10);
8597 SKIP_BLANKS;
8598 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008599 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008600 return(standalone);
8601 }
8602 NEXT;
8603 SKIP_BLANKS;
8604 if (RAW == '\''){
8605 NEXT;
8606 if ((RAW == 'n') && (NXT(1) == 'o')) {
8607 standalone = 0;
8608 SKIP(2);
8609 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8610 (NXT(2) == 's')) {
8611 standalone = 1;
8612 SKIP(3);
8613 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008614 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008615 }
8616 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008617 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008618 } else
8619 NEXT;
8620 } else if (RAW == '"'){
8621 NEXT;
8622 if ((RAW == 'n') && (NXT(1) == 'o')) {
8623 standalone = 0;
8624 SKIP(2);
8625 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8626 (NXT(2) == 's')) {
8627 standalone = 1;
8628 SKIP(3);
8629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008630 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008631 }
8632 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008633 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 } else
8635 NEXT;
8636 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008637 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008638 }
8639 }
8640 return(standalone);
8641}
8642
8643/**
8644 * xmlParseXMLDecl:
8645 * @ctxt: an XML parser context
8646 *
8647 * parse an XML declaration header
8648 *
8649 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8650 */
8651
8652void
8653xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8654 xmlChar *version;
8655
8656 /*
8657 * We know that '<?xml' is here.
8658 */
8659 SKIP(5);
8660
8661 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008662 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8663 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008664 }
8665 SKIP_BLANKS;
8666
8667 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008668 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008669 */
8670 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008671 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008672 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008673 } else {
8674 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8675 /*
8676 * TODO: Blueberry should be detected here
8677 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008678 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8679 "Unsupported version '%s'\n",
8680 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008681 }
8682 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008683 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008684 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008685 }
Owen Taylor3473f882001-02-23 17:55:21 +00008686
8687 /*
8688 * We may have the encoding declaration
8689 */
8690 if (!IS_BLANK(RAW)) {
8691 if ((RAW == '?') && (NXT(1) == '>')) {
8692 SKIP(2);
8693 return;
8694 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008696 }
8697 xmlParseEncodingDecl(ctxt);
8698 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8699 /*
8700 * The XML REC instructs us to stop parsing right here
8701 */
8702 return;
8703 }
8704
8705 /*
8706 * We may have the standalone status.
8707 */
8708 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8709 if ((RAW == '?') && (NXT(1) == '>')) {
8710 SKIP(2);
8711 return;
8712 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008714 }
8715 SKIP_BLANKS;
8716 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8717
8718 SKIP_BLANKS;
8719 if ((RAW == '?') && (NXT(1) == '>')) {
8720 SKIP(2);
8721 } else if (RAW == '>') {
8722 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008723 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008724 NEXT;
8725 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008726 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008727 MOVETO_ENDTAG(CUR_PTR);
8728 NEXT;
8729 }
8730}
8731
8732/**
8733 * xmlParseMisc:
8734 * @ctxt: an XML parser context
8735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008736 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008737 *
8738 * [27] Misc ::= Comment | PI | S
8739 */
8740
8741void
8742xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008743 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillard8f597c32003-10-06 08:19:27 +00008744 (memcmp(CUR_PTR, "<!--", 4) == 0) ||
Daniel Veillard561b7f82002-03-20 21:55:57 +00008745 IS_BLANK(CUR)) {
8746 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008747 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008748 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008749 NEXT;
8750 } else
8751 xmlParseComment(ctxt);
8752 }
8753}
8754
8755/**
8756 * xmlParseDocument:
8757 * @ctxt: an XML parser context
8758 *
8759 * parse an XML document (and build a tree if using the standard SAX
8760 * interface).
8761 *
8762 * [1] document ::= prolog element Misc*
8763 *
8764 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8765 *
8766 * Returns 0, -1 in case of error. the parser context is augmented
8767 * as a result of the parsing.
8768 */
8769
8770int
8771xmlParseDocument(xmlParserCtxtPtr ctxt) {
8772 xmlChar start[4];
8773 xmlCharEncoding enc;
8774
8775 xmlInitParser();
8776
8777 GROW;
8778
8779 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008780 * SAX: detecting the level.
8781 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008782 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008783
8784 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008785 * SAX: beginning of the document processing.
8786 */
8787 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8788 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8789
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008790 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8791 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008792 /*
8793 * Get the 4 first bytes and decode the charset
8794 * if enc != XML_CHAR_ENCODING_NONE
8795 * plug some encoding conversion routines.
8796 */
8797 start[0] = RAW;
8798 start[1] = NXT(1);
8799 start[2] = NXT(2);
8800 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008801 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008802 if (enc != XML_CHAR_ENCODING_NONE) {
8803 xmlSwitchEncoding(ctxt, enc);
8804 }
Owen Taylor3473f882001-02-23 17:55:21 +00008805 }
8806
8807
8808 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008810 }
8811
8812 /*
8813 * Check for the XMLDecl in the Prolog.
8814 */
8815 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008816 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008817
8818 /*
8819 * Note that we will switch encoding on the fly.
8820 */
8821 xmlParseXMLDecl(ctxt);
8822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8823 /*
8824 * The XML REC instructs us to stop parsing right here
8825 */
8826 return(-1);
8827 }
8828 ctxt->standalone = ctxt->input->standalone;
8829 SKIP_BLANKS;
8830 } else {
8831 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8832 }
8833 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8834 ctxt->sax->startDocument(ctxt->userData);
8835
8836 /*
8837 * The Misc part of the Prolog
8838 */
8839 GROW;
8840 xmlParseMisc(ctxt);
8841
8842 /*
8843 * Then possibly doc type declaration(s) and more Misc
8844 * (doctypedecl Misc*)?
8845 */
8846 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008847 if (memcmp(CUR_PTR, "<!DOCTYPE", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008848
8849 ctxt->inSubset = 1;
8850 xmlParseDocTypeDecl(ctxt);
8851 if (RAW == '[') {
8852 ctxt->instate = XML_PARSER_DTD;
8853 xmlParseInternalSubset(ctxt);
8854 }
8855
8856 /*
8857 * Create and update the external subset.
8858 */
8859 ctxt->inSubset = 2;
8860 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8861 (!ctxt->disableSAX))
8862 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8863 ctxt->extSubSystem, ctxt->extSubURI);
8864 ctxt->inSubset = 0;
8865
8866
8867 ctxt->instate = XML_PARSER_PROLOG;
8868 xmlParseMisc(ctxt);
8869 }
8870
8871 /*
8872 * Time to start parsing the tree itself
8873 */
8874 GROW;
8875 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008876 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8877 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008878 } else {
8879 ctxt->instate = XML_PARSER_CONTENT;
8880 xmlParseElement(ctxt);
8881 ctxt->instate = XML_PARSER_EPILOG;
8882
8883
8884 /*
8885 * The Misc part at the end
8886 */
8887 xmlParseMisc(ctxt);
8888
Daniel Veillard561b7f82002-03-20 21:55:57 +00008889 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008890 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008891 }
8892 ctxt->instate = XML_PARSER_EOF;
8893 }
8894
8895 /*
8896 * SAX: end of the document processing.
8897 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008899 ctxt->sax->endDocument(ctxt->userData);
8900
Daniel Veillard5997aca2002-03-18 18:36:20 +00008901 /*
8902 * Remove locally kept entity definitions if the tree was not built
8903 */
8904 if ((ctxt->myDoc != NULL) &&
8905 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8906 xmlFreeDoc(ctxt->myDoc);
8907 ctxt->myDoc = NULL;
8908 }
8909
Daniel Veillardc7612992002-02-17 22:47:37 +00008910 if (! ctxt->wellFormed) {
8911 ctxt->valid = 0;
8912 return(-1);
8913 }
Owen Taylor3473f882001-02-23 17:55:21 +00008914 return(0);
8915}
8916
8917/**
8918 * xmlParseExtParsedEnt:
8919 * @ctxt: an XML parser context
8920 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008921 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008922 * An external general parsed entity is well-formed if it matches the
8923 * production labeled extParsedEnt.
8924 *
8925 * [78] extParsedEnt ::= TextDecl? content
8926 *
8927 * Returns 0, -1 in case of error. the parser context is augmented
8928 * as a result of the parsing.
8929 */
8930
8931int
8932xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8933 xmlChar start[4];
8934 xmlCharEncoding enc;
8935
8936 xmlDefaultSAXHandlerInit();
8937
Daniel Veillard309f81d2003-09-23 09:02:53 +00008938 xmlDetectSAX2(ctxt);
8939
Owen Taylor3473f882001-02-23 17:55:21 +00008940 GROW;
8941
8942 /*
8943 * SAX: beginning of the document processing.
8944 */
8945 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8946 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8947
8948 /*
8949 * Get the 4 first bytes and decode the charset
8950 * if enc != XML_CHAR_ENCODING_NONE
8951 * plug some encoding conversion routines.
8952 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008953 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8954 start[0] = RAW;
8955 start[1] = NXT(1);
8956 start[2] = NXT(2);
8957 start[3] = NXT(3);
8958 enc = xmlDetectCharEncoding(start, 4);
8959 if (enc != XML_CHAR_ENCODING_NONE) {
8960 xmlSwitchEncoding(ctxt, enc);
8961 }
Owen Taylor3473f882001-02-23 17:55:21 +00008962 }
8963
8964
8965 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 }
8968
8969 /*
8970 * Check for the XMLDecl in the Prolog.
8971 */
8972 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008973 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008974
8975 /*
8976 * Note that we will switch encoding on the fly.
8977 */
8978 xmlParseXMLDecl(ctxt);
8979 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8980 /*
8981 * The XML REC instructs us to stop parsing right here
8982 */
8983 return(-1);
8984 }
8985 SKIP_BLANKS;
8986 } else {
8987 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8988 }
8989 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8990 ctxt->sax->startDocument(ctxt->userData);
8991
8992 /*
8993 * Doing validity checking on chunk doesn't make sense
8994 */
8995 ctxt->instate = XML_PARSER_CONTENT;
8996 ctxt->validate = 0;
8997 ctxt->loadsubset = 0;
8998 ctxt->depth = 0;
8999
9000 xmlParseContent(ctxt);
9001
9002 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009003 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009004 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009005 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009006 }
9007
9008 /*
9009 * SAX: end of the document processing.
9010 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009011 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009012 ctxt->sax->endDocument(ctxt->userData);
9013
9014 if (! ctxt->wellFormed) return(-1);
9015 return(0);
9016}
9017
Daniel Veillard73b013f2003-09-30 12:36:01 +00009018#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009019/************************************************************************
9020 * *
9021 * Progressive parsing interfaces *
9022 * *
9023 ************************************************************************/
9024
9025/**
9026 * xmlParseLookupSequence:
9027 * @ctxt: an XML parser context
9028 * @first: the first char to lookup
9029 * @next: the next char to lookup or zero
9030 * @third: the next char to lookup or zero
9031 *
9032 * Try to find if a sequence (first, next, third) or just (first next) or
9033 * (first) is available in the input stream.
9034 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9035 * to avoid rescanning sequences of bytes, it DOES change the state of the
9036 * parser, do not use liberally.
9037 *
9038 * Returns the index to the current parsing point if the full sequence
9039 * is available, -1 otherwise.
9040 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009041static int
Owen Taylor3473f882001-02-23 17:55:21 +00009042xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9043 xmlChar next, xmlChar third) {
9044 int base, len;
9045 xmlParserInputPtr in;
9046 const xmlChar *buf;
9047
9048 in = ctxt->input;
9049 if (in == NULL) return(-1);
9050 base = in->cur - in->base;
9051 if (base < 0) return(-1);
9052 if (ctxt->checkIndex > base)
9053 base = ctxt->checkIndex;
9054 if (in->buf == NULL) {
9055 buf = in->base;
9056 len = in->length;
9057 } else {
9058 buf = in->buf->buffer->content;
9059 len = in->buf->buffer->use;
9060 }
9061 /* take into account the sequence length */
9062 if (third) len -= 2;
9063 else if (next) len --;
9064 for (;base < len;base++) {
9065 if (buf[base] == first) {
9066 if (third != 0) {
9067 if ((buf[base + 1] != next) ||
9068 (buf[base + 2] != third)) continue;
9069 } else if (next != 0) {
9070 if (buf[base + 1] != next) continue;
9071 }
9072 ctxt->checkIndex = 0;
9073#ifdef DEBUG_PUSH
9074 if (next == 0)
9075 xmlGenericError(xmlGenericErrorContext,
9076 "PP: lookup '%c' found at %d\n",
9077 first, base);
9078 else if (third == 0)
9079 xmlGenericError(xmlGenericErrorContext,
9080 "PP: lookup '%c%c' found at %d\n",
9081 first, next, base);
9082 else
9083 xmlGenericError(xmlGenericErrorContext,
9084 "PP: lookup '%c%c%c' found at %d\n",
9085 first, next, third, base);
9086#endif
9087 return(base - (in->cur - in->base));
9088 }
9089 }
9090 ctxt->checkIndex = base;
9091#ifdef DEBUG_PUSH
9092 if (next == 0)
9093 xmlGenericError(xmlGenericErrorContext,
9094 "PP: lookup '%c' failed\n", first);
9095 else if (third == 0)
9096 xmlGenericError(xmlGenericErrorContext,
9097 "PP: lookup '%c%c' failed\n", first, next);
9098 else
9099 xmlGenericError(xmlGenericErrorContext,
9100 "PP: lookup '%c%c%c' failed\n", first, next, third);
9101#endif
9102 return(-1);
9103}
9104
9105/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009106 * xmlParseGetLasts:
9107 * @ctxt: an XML parser context
9108 * @lastlt: pointer to store the last '<' from the input
9109 * @lastgt: pointer to store the last '>' from the input
9110 *
9111 * Lookup the last < and > in the current chunk
9112 */
9113static void
9114xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9115 const xmlChar **lastgt) {
9116 const xmlChar *tmp;
9117
9118 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9119 xmlGenericError(xmlGenericErrorContext,
9120 "Internal error: xmlParseGetLasts\n");
9121 return;
9122 }
9123 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9124 tmp = ctxt->input->end;
9125 tmp--;
9126 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9127 (*tmp != '>')) tmp--;
9128 if (tmp < ctxt->input->base) {
9129 *lastlt = NULL;
9130 *lastgt = NULL;
9131 } else if (*tmp == '<') {
9132 *lastlt = tmp;
9133 tmp--;
9134 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9135 if (tmp < ctxt->input->base)
9136 *lastgt = NULL;
9137 else
9138 *lastgt = tmp;
9139 } else {
9140 *lastgt = tmp;
9141 tmp--;
9142 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9143 if (tmp < ctxt->input->base)
9144 *lastlt = NULL;
9145 else
9146 *lastlt = tmp;
9147 }
9148
9149 } else {
9150 *lastlt = NULL;
9151 *lastgt = NULL;
9152 }
9153}
9154/**
Owen Taylor3473f882001-02-23 17:55:21 +00009155 * xmlParseTryOrFinish:
9156 * @ctxt: an XML parser context
9157 * @terminate: last chunk indicator
9158 *
9159 * Try to progress on parsing
9160 *
9161 * Returns zero if no parsing was possible
9162 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009163static int
Owen Taylor3473f882001-02-23 17:55:21 +00009164xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9165 int ret = 0;
9166 int avail;
9167 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009168 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009169
9170#ifdef DEBUG_PUSH
9171 switch (ctxt->instate) {
9172 case XML_PARSER_EOF:
9173 xmlGenericError(xmlGenericErrorContext,
9174 "PP: try EOF\n"); break;
9175 case XML_PARSER_START:
9176 xmlGenericError(xmlGenericErrorContext,
9177 "PP: try START\n"); break;
9178 case XML_PARSER_MISC:
9179 xmlGenericError(xmlGenericErrorContext,
9180 "PP: try MISC\n");break;
9181 case XML_PARSER_COMMENT:
9182 xmlGenericError(xmlGenericErrorContext,
9183 "PP: try COMMENT\n");break;
9184 case XML_PARSER_PROLOG:
9185 xmlGenericError(xmlGenericErrorContext,
9186 "PP: try PROLOG\n");break;
9187 case XML_PARSER_START_TAG:
9188 xmlGenericError(xmlGenericErrorContext,
9189 "PP: try START_TAG\n");break;
9190 case XML_PARSER_CONTENT:
9191 xmlGenericError(xmlGenericErrorContext,
9192 "PP: try CONTENT\n");break;
9193 case XML_PARSER_CDATA_SECTION:
9194 xmlGenericError(xmlGenericErrorContext,
9195 "PP: try CDATA_SECTION\n");break;
9196 case XML_PARSER_END_TAG:
9197 xmlGenericError(xmlGenericErrorContext,
9198 "PP: try END_TAG\n");break;
9199 case XML_PARSER_ENTITY_DECL:
9200 xmlGenericError(xmlGenericErrorContext,
9201 "PP: try ENTITY_DECL\n");break;
9202 case XML_PARSER_ENTITY_VALUE:
9203 xmlGenericError(xmlGenericErrorContext,
9204 "PP: try ENTITY_VALUE\n");break;
9205 case XML_PARSER_ATTRIBUTE_VALUE:
9206 xmlGenericError(xmlGenericErrorContext,
9207 "PP: try ATTRIBUTE_VALUE\n");break;
9208 case XML_PARSER_DTD:
9209 xmlGenericError(xmlGenericErrorContext,
9210 "PP: try DTD\n");break;
9211 case XML_PARSER_EPILOG:
9212 xmlGenericError(xmlGenericErrorContext,
9213 "PP: try EPILOG\n");break;
9214 case XML_PARSER_PI:
9215 xmlGenericError(xmlGenericErrorContext,
9216 "PP: try PI\n");break;
9217 case XML_PARSER_IGNORE:
9218 xmlGenericError(xmlGenericErrorContext,
9219 "PP: try IGNORE\n");break;
9220 }
9221#endif
9222
Daniel Veillarda880b122003-04-21 21:36:41 +00009223 if (ctxt->input->cur - ctxt->input->base > 4096) {
9224 xmlSHRINK(ctxt);
9225 ctxt->checkIndex = 0;
9226 }
9227 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009228
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009230 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9231 return(0);
9232
9233
Owen Taylor3473f882001-02-23 17:55:21 +00009234 /*
9235 * Pop-up of finished entities.
9236 */
9237 while ((RAW == 0) && (ctxt->inputNr > 1))
9238 xmlPopInput(ctxt);
9239
9240 if (ctxt->input ==NULL) break;
9241 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009242 avail = ctxt->input->length -
9243 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009244 else {
9245 /*
9246 * If we are operating on converted input, try to flush
9247 * remainng chars to avoid them stalling in the non-converted
9248 * buffer.
9249 */
9250 if ((ctxt->input->buf->raw != NULL) &&
9251 (ctxt->input->buf->raw->use > 0)) {
9252 int base = ctxt->input->base -
9253 ctxt->input->buf->buffer->content;
9254 int current = ctxt->input->cur - ctxt->input->base;
9255
9256 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9257 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9258 ctxt->input->cur = ctxt->input->base + current;
9259 ctxt->input->end =
9260 &ctxt->input->buf->buffer->content[
9261 ctxt->input->buf->buffer->use];
9262 }
9263 avail = ctxt->input->buf->buffer->use -
9264 (ctxt->input->cur - ctxt->input->base);
9265 }
Owen Taylor3473f882001-02-23 17:55:21 +00009266 if (avail < 1)
9267 goto done;
9268 switch (ctxt->instate) {
9269 case XML_PARSER_EOF:
9270 /*
9271 * Document parsing is done !
9272 */
9273 goto done;
9274 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009275 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9276 xmlChar start[4];
9277 xmlCharEncoding enc;
9278
9279 /*
9280 * Very first chars read from the document flow.
9281 */
9282 if (avail < 4)
9283 goto done;
9284
9285 /*
9286 * Get the 4 first bytes and decode the charset
9287 * if enc != XML_CHAR_ENCODING_NONE
9288 * plug some encoding conversion routines.
9289 */
9290 start[0] = RAW;
9291 start[1] = NXT(1);
9292 start[2] = NXT(2);
9293 start[3] = NXT(3);
9294 enc = xmlDetectCharEncoding(start, 4);
9295 if (enc != XML_CHAR_ENCODING_NONE) {
9296 xmlSwitchEncoding(ctxt, enc);
9297 }
9298 break;
9299 }
Owen Taylor3473f882001-02-23 17:55:21 +00009300
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009301 if (avail < 2)
9302 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009303 cur = ctxt->input->cur[0];
9304 next = ctxt->input->cur[1];
9305 if (cur == 0) {
9306 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9307 ctxt->sax->setDocumentLocator(ctxt->userData,
9308 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009309 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009310 ctxt->instate = XML_PARSER_EOF;
9311#ifdef DEBUG_PUSH
9312 xmlGenericError(xmlGenericErrorContext,
9313 "PP: entering EOF\n");
9314#endif
9315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9316 ctxt->sax->endDocument(ctxt->userData);
9317 goto done;
9318 }
9319 if ((cur == '<') && (next == '?')) {
9320 /* PI or XML decl */
9321 if (avail < 5) return(ret);
9322 if ((!terminate) &&
9323 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9324 return(ret);
9325 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9326 ctxt->sax->setDocumentLocator(ctxt->userData,
9327 &xmlDefaultSAXLocator);
9328 if ((ctxt->input->cur[2] == 'x') &&
9329 (ctxt->input->cur[3] == 'm') &&
9330 (ctxt->input->cur[4] == 'l') &&
9331 (IS_BLANK(ctxt->input->cur[5]))) {
9332 ret += 5;
9333#ifdef DEBUG_PUSH
9334 xmlGenericError(xmlGenericErrorContext,
9335 "PP: Parsing XML Decl\n");
9336#endif
9337 xmlParseXMLDecl(ctxt);
9338 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9339 /*
9340 * The XML REC instructs us to stop parsing right
9341 * here
9342 */
9343 ctxt->instate = XML_PARSER_EOF;
9344 return(0);
9345 }
9346 ctxt->standalone = ctxt->input->standalone;
9347 if ((ctxt->encoding == NULL) &&
9348 (ctxt->input->encoding != NULL))
9349 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9350 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9351 (!ctxt->disableSAX))
9352 ctxt->sax->startDocument(ctxt->userData);
9353 ctxt->instate = XML_PARSER_MISC;
9354#ifdef DEBUG_PUSH
9355 xmlGenericError(xmlGenericErrorContext,
9356 "PP: entering MISC\n");
9357#endif
9358 } else {
9359 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9360 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9361 (!ctxt->disableSAX))
9362 ctxt->sax->startDocument(ctxt->userData);
9363 ctxt->instate = XML_PARSER_MISC;
9364#ifdef DEBUG_PUSH
9365 xmlGenericError(xmlGenericErrorContext,
9366 "PP: entering MISC\n");
9367#endif
9368 }
9369 } else {
9370 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9371 ctxt->sax->setDocumentLocator(ctxt->userData,
9372 &xmlDefaultSAXLocator);
9373 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9374 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9375 (!ctxt->disableSAX))
9376 ctxt->sax->startDocument(ctxt->userData);
9377 ctxt->instate = XML_PARSER_MISC;
9378#ifdef DEBUG_PUSH
9379 xmlGenericError(xmlGenericErrorContext,
9380 "PP: entering MISC\n");
9381#endif
9382 }
9383 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009384 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009385 const xmlChar *name;
9386 const xmlChar *prefix;
9387 const xmlChar *URI;
9388 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009389
9390 if ((avail < 2) && (ctxt->inputNr == 1))
9391 goto done;
9392 cur = ctxt->input->cur[0];
9393 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009394 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009395 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009396 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9397 ctxt->sax->endDocument(ctxt->userData);
9398 goto done;
9399 }
9400 if (!terminate) {
9401 if (ctxt->progressive) {
9402 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9403 goto done;
9404 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9405 goto done;
9406 }
9407 }
9408 if (ctxt->spaceNr == 0)
9409 spacePush(ctxt, -1);
9410 else
9411 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009412#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009413 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009414#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009415 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009416#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009417 else
9418 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009419#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009420 if (name == NULL) {
9421 spacePop(ctxt);
9422 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009423 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9424 ctxt->sax->endDocument(ctxt->userData);
9425 goto done;
9426 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009427#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009428 /*
9429 * [ VC: Root Element Type ]
9430 * The Name in the document type declaration must match
9431 * the element type of the root element.
9432 */
9433 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9434 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9435 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009436#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009437
9438 /*
9439 * Check for an Empty Element.
9440 */
9441 if ((RAW == '/') && (NXT(1) == '>')) {
9442 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009443
9444 if (ctxt->sax2) {
9445 if ((ctxt->sax != NULL) &&
9446 (ctxt->sax->endElementNs != NULL) &&
9447 (!ctxt->disableSAX))
9448 ctxt->sax->endElementNs(ctxt->userData, name,
9449 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009450#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009451 } else {
9452 if ((ctxt->sax != NULL) &&
9453 (ctxt->sax->endElement != NULL) &&
9454 (!ctxt->disableSAX))
9455 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009456#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009457 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009458 spacePop(ctxt);
9459 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009461 } else {
9462 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009463 }
9464 break;
9465 }
9466 if (RAW == '>') {
9467 NEXT;
9468 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009469 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009470 "Couldn't find end of Start Tag %s\n",
9471 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009473 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009474 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009475 if (ctxt->sax2)
9476 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009477#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009478 else
9479 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009480#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009481
Daniel Veillarda880b122003-04-21 21:36:41 +00009482 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009483 break;
9484 }
9485 case XML_PARSER_CONTENT: {
9486 const xmlChar *test;
9487 unsigned int cons;
9488 if ((avail < 2) && (ctxt->inputNr == 1))
9489 goto done;
9490 cur = ctxt->input->cur[0];
9491 next = ctxt->input->cur[1];
9492
9493 test = CUR_PTR;
9494 cons = ctxt->input->consumed;
9495 if ((cur == '<') && (next == '/')) {
9496 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 break;
9498 } else if ((cur == '<') && (next == '?')) {
9499 if ((!terminate) &&
9500 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9501 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009502 xmlParsePI(ctxt);
9503 } else if ((cur == '<') && (next != '!')) {
9504 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009505 break;
9506 } else if ((cur == '<') && (next == '!') &&
9507 (ctxt->input->cur[2] == '-') &&
9508 (ctxt->input->cur[3] == '-')) {
9509 if ((!terminate) &&
9510 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9511 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009512 xmlParseComment(ctxt);
9513 ctxt->instate = XML_PARSER_CONTENT;
9514 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9515 (ctxt->input->cur[2] == '[') &&
9516 (ctxt->input->cur[3] == 'C') &&
9517 (ctxt->input->cur[4] == 'D') &&
9518 (ctxt->input->cur[5] == 'A') &&
9519 (ctxt->input->cur[6] == 'T') &&
9520 (ctxt->input->cur[7] == 'A') &&
9521 (ctxt->input->cur[8] == '[')) {
9522 SKIP(9);
9523 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009524 break;
9525 } else if ((cur == '<') && (next == '!') &&
9526 (avail < 9)) {
9527 goto done;
9528 } else if (cur == '&') {
9529 if ((!terminate) &&
9530 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9531 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009532 xmlParseReference(ctxt);
9533 } else {
9534 /* TODO Avoid the extra copy, handle directly !!! */
9535 /*
9536 * Goal of the following test is:
9537 * - minimize calls to the SAX 'character' callback
9538 * when they are mergeable
9539 * - handle an problem for isBlank when we only parse
9540 * a sequence of blank chars and the next one is
9541 * not available to check against '<' presence.
9542 * - tries to homogenize the differences in SAX
9543 * callbacks between the push and pull versions
9544 * of the parser.
9545 */
9546 if ((ctxt->inputNr == 1) &&
9547 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9548 if (!terminate) {
9549 if (ctxt->progressive) {
9550 if ((lastlt == NULL) ||
9551 (ctxt->input->cur > lastlt))
9552 goto done;
9553 } else if (xmlParseLookupSequence(ctxt,
9554 '<', 0, 0) < 0) {
9555 goto done;
9556 }
9557 }
9558 }
9559 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009560 xmlParseCharData(ctxt, 0);
9561 }
9562 /*
9563 * Pop-up of finished entities.
9564 */
9565 while ((RAW == 0) && (ctxt->inputNr > 1))
9566 xmlPopInput(ctxt);
9567 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009568 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9569 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009570 ctxt->instate = XML_PARSER_EOF;
9571 break;
9572 }
9573 break;
9574 }
9575 case XML_PARSER_END_TAG:
9576 if (avail < 2)
9577 goto done;
9578 if (!terminate) {
9579 if (ctxt->progressive) {
9580 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9581 goto done;
9582 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9583 goto done;
9584 }
9585 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009586 if (ctxt->sax2) {
9587 xmlParseEndTag2(ctxt,
9588 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9589 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9590 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9591 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009592 }
9593#ifdef LIBXML_SAX1_ENABLED
9594 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009595 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009596#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009597 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009598 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009599 } else {
9600 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009601 }
9602 break;
9603 case XML_PARSER_CDATA_SECTION: {
9604 /*
9605 * The Push mode need to have the SAX callback for
9606 * cdataBlock merge back contiguous callbacks.
9607 */
9608 int base;
9609
9610 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9611 if (base < 0) {
9612 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9613 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9614 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009615 ctxt->sax->cdataBlock(ctxt->userData,
9616 ctxt->input->cur,
9617 XML_PARSER_BIG_BUFFER_SIZE);
9618 else if (ctxt->sax->characters != NULL)
9619 ctxt->sax->characters(ctxt->userData,
9620 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009621 XML_PARSER_BIG_BUFFER_SIZE);
9622 }
9623 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9624 ctxt->checkIndex = 0;
9625 }
9626 goto done;
9627 } else {
9628 if ((ctxt->sax != NULL) && (base > 0) &&
9629 (!ctxt->disableSAX)) {
9630 if (ctxt->sax->cdataBlock != NULL)
9631 ctxt->sax->cdataBlock(ctxt->userData,
9632 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009633 else if (ctxt->sax->characters != NULL)
9634 ctxt->sax->characters(ctxt->userData,
9635 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009636 }
9637 SKIP(base + 3);
9638 ctxt->checkIndex = 0;
9639 ctxt->instate = XML_PARSER_CONTENT;
9640#ifdef DEBUG_PUSH
9641 xmlGenericError(xmlGenericErrorContext,
9642 "PP: entering CONTENT\n");
9643#endif
9644 }
9645 break;
9646 }
Owen Taylor3473f882001-02-23 17:55:21 +00009647 case XML_PARSER_MISC:
9648 SKIP_BLANKS;
9649 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009650 avail = ctxt->input->length -
9651 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009652 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009653 avail = ctxt->input->buf->buffer->use -
9654 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009655 if (avail < 2)
9656 goto done;
9657 cur = ctxt->input->cur[0];
9658 next = ctxt->input->cur[1];
9659 if ((cur == '<') && (next == '?')) {
9660 if ((!terminate) &&
9661 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9662 goto done;
9663#ifdef DEBUG_PUSH
9664 xmlGenericError(xmlGenericErrorContext,
9665 "PP: Parsing PI\n");
9666#endif
9667 xmlParsePI(ctxt);
9668 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009669 (ctxt->input->cur[2] == '-') &&
9670 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009671 if ((!terminate) &&
9672 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9673 goto done;
9674#ifdef DEBUG_PUSH
9675 xmlGenericError(xmlGenericErrorContext,
9676 "PP: Parsing Comment\n");
9677#endif
9678 xmlParseComment(ctxt);
9679 ctxt->instate = XML_PARSER_MISC;
9680 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 (ctxt->input->cur[2] == 'D') &&
9682 (ctxt->input->cur[3] == 'O') &&
9683 (ctxt->input->cur[4] == 'C') &&
9684 (ctxt->input->cur[5] == 'T') &&
9685 (ctxt->input->cur[6] == 'Y') &&
9686 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009687 (ctxt->input->cur[8] == 'E')) {
9688 if ((!terminate) &&
9689 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9690 goto done;
9691#ifdef DEBUG_PUSH
9692 xmlGenericError(xmlGenericErrorContext,
9693 "PP: Parsing internal subset\n");
9694#endif
9695 ctxt->inSubset = 1;
9696 xmlParseDocTypeDecl(ctxt);
9697 if (RAW == '[') {
9698 ctxt->instate = XML_PARSER_DTD;
9699#ifdef DEBUG_PUSH
9700 xmlGenericError(xmlGenericErrorContext,
9701 "PP: entering DTD\n");
9702#endif
9703 } else {
9704 /*
9705 * Create and update the external subset.
9706 */
9707 ctxt->inSubset = 2;
9708 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9709 (ctxt->sax->externalSubset != NULL))
9710 ctxt->sax->externalSubset(ctxt->userData,
9711 ctxt->intSubName, ctxt->extSubSystem,
9712 ctxt->extSubURI);
9713 ctxt->inSubset = 0;
9714 ctxt->instate = XML_PARSER_PROLOG;
9715#ifdef DEBUG_PUSH
9716 xmlGenericError(xmlGenericErrorContext,
9717 "PP: entering PROLOG\n");
9718#endif
9719 }
9720 } else if ((cur == '<') && (next == '!') &&
9721 (avail < 9)) {
9722 goto done;
9723 } else {
9724 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009725 ctxt->progressive = 1;
9726 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009727#ifdef DEBUG_PUSH
9728 xmlGenericError(xmlGenericErrorContext,
9729 "PP: entering START_TAG\n");
9730#endif
9731 }
9732 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009733 case XML_PARSER_PROLOG:
9734 SKIP_BLANKS;
9735 if (ctxt->input->buf == NULL)
9736 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9737 else
9738 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9739 if (avail < 2)
9740 goto done;
9741 cur = ctxt->input->cur[0];
9742 next = ctxt->input->cur[1];
9743 if ((cur == '<') && (next == '?')) {
9744 if ((!terminate) &&
9745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9746 goto done;
9747#ifdef DEBUG_PUSH
9748 xmlGenericError(xmlGenericErrorContext,
9749 "PP: Parsing PI\n");
9750#endif
9751 xmlParsePI(ctxt);
9752 } else if ((cur == '<') && (next == '!') &&
9753 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9754 if ((!terminate) &&
9755 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9756 goto done;
9757#ifdef DEBUG_PUSH
9758 xmlGenericError(xmlGenericErrorContext,
9759 "PP: Parsing Comment\n");
9760#endif
9761 xmlParseComment(ctxt);
9762 ctxt->instate = XML_PARSER_PROLOG;
9763 } else if ((cur == '<') && (next == '!') &&
9764 (avail < 4)) {
9765 goto done;
9766 } else {
9767 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009768 ctxt->progressive = 1;
9769 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009770#ifdef DEBUG_PUSH
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: entering START_TAG\n");
9773#endif
9774 }
9775 break;
9776 case XML_PARSER_EPILOG:
9777 SKIP_BLANKS;
9778 if (ctxt->input->buf == NULL)
9779 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9780 else
9781 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9782 if (avail < 2)
9783 goto done;
9784 cur = ctxt->input->cur[0];
9785 next = ctxt->input->cur[1];
9786 if ((cur == '<') && (next == '?')) {
9787 if ((!terminate) &&
9788 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9789 goto done;
9790#ifdef DEBUG_PUSH
9791 xmlGenericError(xmlGenericErrorContext,
9792 "PP: Parsing PI\n");
9793#endif
9794 xmlParsePI(ctxt);
9795 ctxt->instate = XML_PARSER_EPILOG;
9796 } else if ((cur == '<') && (next == '!') &&
9797 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9798 if ((!terminate) &&
9799 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9800 goto done;
9801#ifdef DEBUG_PUSH
9802 xmlGenericError(xmlGenericErrorContext,
9803 "PP: Parsing Comment\n");
9804#endif
9805 xmlParseComment(ctxt);
9806 ctxt->instate = XML_PARSER_EPILOG;
9807 } else if ((cur == '<') && (next == '!') &&
9808 (avail < 4)) {
9809 goto done;
9810 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009811 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009812 ctxt->instate = XML_PARSER_EOF;
9813#ifdef DEBUG_PUSH
9814 xmlGenericError(xmlGenericErrorContext,
9815 "PP: entering EOF\n");
9816#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009817 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009818 ctxt->sax->endDocument(ctxt->userData);
9819 goto done;
9820 }
9821 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009822 case XML_PARSER_DTD: {
9823 /*
9824 * Sorry but progressive parsing of the internal subset
9825 * is not expected to be supported. We first check that
9826 * the full content of the internal subset is available and
9827 * the parsing is launched only at that point.
9828 * Internal subset ends up with "']' S? '>'" in an unescaped
9829 * section and not in a ']]>' sequence which are conditional
9830 * sections (whoever argued to keep that crap in XML deserve
9831 * a place in hell !).
9832 */
9833 int base, i;
9834 xmlChar *buf;
9835 xmlChar quote = 0;
9836
9837 base = ctxt->input->cur - ctxt->input->base;
9838 if (base < 0) return(0);
9839 if (ctxt->checkIndex > base)
9840 base = ctxt->checkIndex;
9841 buf = ctxt->input->buf->buffer->content;
9842 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9843 base++) {
9844 if (quote != 0) {
9845 if (buf[base] == quote)
9846 quote = 0;
9847 continue;
9848 }
9849 if (buf[base] == '"') {
9850 quote = '"';
9851 continue;
9852 }
9853 if (buf[base] == '\'') {
9854 quote = '\'';
9855 continue;
9856 }
9857 if (buf[base] == ']') {
9858 if ((unsigned int) base +1 >=
9859 ctxt->input->buf->buffer->use)
9860 break;
9861 if (buf[base + 1] == ']') {
9862 /* conditional crap, skip both ']' ! */
9863 base++;
9864 continue;
9865 }
9866 for (i = 0;
9867 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9868 i++) {
9869 if (buf[base + i] == '>')
9870 goto found_end_int_subset;
9871 }
9872 break;
9873 }
9874 }
9875 /*
9876 * We didn't found the end of the Internal subset
9877 */
9878 if (quote == 0)
9879 ctxt->checkIndex = base;
9880#ifdef DEBUG_PUSH
9881 if (next == 0)
9882 xmlGenericError(xmlGenericErrorContext,
9883 "PP: lookup of int subset end filed\n");
9884#endif
9885 goto done;
9886
9887found_end_int_subset:
9888 xmlParseInternalSubset(ctxt);
9889 ctxt->inSubset = 2;
9890 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9891 (ctxt->sax->externalSubset != NULL))
9892 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9893 ctxt->extSubSystem, ctxt->extSubURI);
9894 ctxt->inSubset = 0;
9895 ctxt->instate = XML_PARSER_PROLOG;
9896 ctxt->checkIndex = 0;
9897#ifdef DEBUG_PUSH
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: entering PROLOG\n");
9900#endif
9901 break;
9902 }
9903 case XML_PARSER_COMMENT:
9904 xmlGenericError(xmlGenericErrorContext,
9905 "PP: internal error, state == COMMENT\n");
9906 ctxt->instate = XML_PARSER_CONTENT;
9907#ifdef DEBUG_PUSH
9908 xmlGenericError(xmlGenericErrorContext,
9909 "PP: entering CONTENT\n");
9910#endif
9911 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009912 case XML_PARSER_IGNORE:
9913 xmlGenericError(xmlGenericErrorContext,
9914 "PP: internal error, state == IGNORE");
9915 ctxt->instate = XML_PARSER_DTD;
9916#ifdef DEBUG_PUSH
9917 xmlGenericError(xmlGenericErrorContext,
9918 "PP: entering DTD\n");
9919#endif
9920 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009921 case XML_PARSER_PI:
9922 xmlGenericError(xmlGenericErrorContext,
9923 "PP: internal error, state == PI\n");
9924 ctxt->instate = XML_PARSER_CONTENT;
9925#ifdef DEBUG_PUSH
9926 xmlGenericError(xmlGenericErrorContext,
9927 "PP: entering CONTENT\n");
9928#endif
9929 break;
9930 case XML_PARSER_ENTITY_DECL:
9931 xmlGenericError(xmlGenericErrorContext,
9932 "PP: internal error, state == ENTITY_DECL\n");
9933 ctxt->instate = XML_PARSER_DTD;
9934#ifdef DEBUG_PUSH
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: entering DTD\n");
9937#endif
9938 break;
9939 case XML_PARSER_ENTITY_VALUE:
9940 xmlGenericError(xmlGenericErrorContext,
9941 "PP: internal error, state == ENTITY_VALUE\n");
9942 ctxt->instate = XML_PARSER_CONTENT;
9943#ifdef DEBUG_PUSH
9944 xmlGenericError(xmlGenericErrorContext,
9945 "PP: entering DTD\n");
9946#endif
9947 break;
9948 case XML_PARSER_ATTRIBUTE_VALUE:
9949 xmlGenericError(xmlGenericErrorContext,
9950 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9951 ctxt->instate = XML_PARSER_START_TAG;
9952#ifdef DEBUG_PUSH
9953 xmlGenericError(xmlGenericErrorContext,
9954 "PP: entering START_TAG\n");
9955#endif
9956 break;
9957 case XML_PARSER_SYSTEM_LITERAL:
9958 xmlGenericError(xmlGenericErrorContext,
9959 "PP: internal error, state == SYSTEM_LITERAL\n");
9960 ctxt->instate = XML_PARSER_START_TAG;
9961#ifdef DEBUG_PUSH
9962 xmlGenericError(xmlGenericErrorContext,
9963 "PP: entering START_TAG\n");
9964#endif
9965 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009966 case XML_PARSER_PUBLIC_LITERAL:
9967 xmlGenericError(xmlGenericErrorContext,
9968 "PP: internal error, state == PUBLIC_LITERAL\n");
9969 ctxt->instate = XML_PARSER_START_TAG;
9970#ifdef DEBUG_PUSH
9971 xmlGenericError(xmlGenericErrorContext,
9972 "PP: entering START_TAG\n");
9973#endif
9974 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009975 }
9976 }
9977done:
9978#ifdef DEBUG_PUSH
9979 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9980#endif
9981 return(ret);
9982}
9983
9984/**
Owen Taylor3473f882001-02-23 17:55:21 +00009985 * xmlParseChunk:
9986 * @ctxt: an XML parser context
9987 * @chunk: an char array
9988 * @size: the size in byte of the chunk
9989 * @terminate: last chunk indicator
9990 *
9991 * Parse a Chunk of memory
9992 *
9993 * Returns zero if no error, the xmlParserErrors otherwise.
9994 */
9995int
9996xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9997 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009998 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9999 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010000 if (ctxt->instate == XML_PARSER_START)
10001 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010002 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10003 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10004 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10005 int cur = ctxt->input->cur - ctxt->input->base;
10006
10007 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10008 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10009 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010010 ctxt->input->end =
10011 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010012#ifdef DEBUG_PUSH
10013 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10014#endif
10015
Owen Taylor3473f882001-02-23 17:55:21 +000010016 } else if (ctxt->instate != XML_PARSER_EOF) {
10017 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10018 xmlParserInputBufferPtr in = ctxt->input->buf;
10019 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10020 (in->raw != NULL)) {
10021 int nbchars;
10022
10023 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10024 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010025 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010026 xmlGenericError(xmlGenericErrorContext,
10027 "xmlParseChunk: encoder error\n");
10028 return(XML_ERR_INVALID_ENCODING);
10029 }
10030 }
10031 }
10032 }
10033 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010034 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10035 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010036 if (terminate) {
10037 /*
10038 * Check for termination
10039 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010040 int avail = 0;
10041 if (ctxt->input->buf == NULL)
10042 avail = ctxt->input->length -
10043 (ctxt->input->cur - ctxt->input->base);
10044 else
10045 avail = ctxt->input->buf->buffer->use -
10046 (ctxt->input->cur - ctxt->input->base);
10047
Owen Taylor3473f882001-02-23 17:55:21 +000010048 if ((ctxt->instate != XML_PARSER_EOF) &&
10049 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010050 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010051 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010052 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010053 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010054 }
Owen Taylor3473f882001-02-23 17:55:21 +000010055 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010056 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010057 ctxt->sax->endDocument(ctxt->userData);
10058 }
10059 ctxt->instate = XML_PARSER_EOF;
10060 }
10061 return((xmlParserErrors) ctxt->errNo);
10062}
10063
10064/************************************************************************
10065 * *
10066 * I/O front end functions to the parser *
10067 * *
10068 ************************************************************************/
10069
10070/**
10071 * xmlStopParser:
10072 * @ctxt: an XML parser context
10073 *
10074 * Blocks further parser processing
10075 */
10076void
10077xmlStopParser(xmlParserCtxtPtr ctxt) {
10078 ctxt->instate = XML_PARSER_EOF;
10079 if (ctxt->input != NULL)
10080 ctxt->input->cur = BAD_CAST"";
10081}
10082
10083/**
10084 * xmlCreatePushParserCtxt:
10085 * @sax: a SAX handler
10086 * @user_data: The user data returned on SAX callbacks
10087 * @chunk: a pointer to an array of chars
10088 * @size: number of chars in the array
10089 * @filename: an optional file name or URI
10090 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010091 * Create a parser context for using the XML parser in push mode.
10092 * If @buffer and @size are non-NULL, the data is used to detect
10093 * the encoding. The remaining characters will be parsed so they
10094 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010095 * To allow content encoding detection, @size should be >= 4
10096 * The value of @filename is used for fetching external entities
10097 * and error/warning reports.
10098 *
10099 * Returns the new parser context or NULL
10100 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010101
Owen Taylor3473f882001-02-23 17:55:21 +000010102xmlParserCtxtPtr
10103xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10104 const char *chunk, int size, const char *filename) {
10105 xmlParserCtxtPtr ctxt;
10106 xmlParserInputPtr inputStream;
10107 xmlParserInputBufferPtr buf;
10108 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10109
10110 /*
10111 * plug some encoding conversion routines
10112 */
10113 if ((chunk != NULL) && (size >= 4))
10114 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10115
10116 buf = xmlAllocParserInputBuffer(enc);
10117 if (buf == NULL) return(NULL);
10118
10119 ctxt = xmlNewParserCtxt();
10120 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010121 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010122 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010123 return(NULL);
10124 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010125 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10126 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010127 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010128 xmlFreeParserInputBuffer(buf);
10129 xmlFreeParserCtxt(ctxt);
10130 return(NULL);
10131 }
Owen Taylor3473f882001-02-23 17:55:21 +000010132 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010133#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010134 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010135#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010136 xmlFree(ctxt->sax);
10137 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10138 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010139 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010140 xmlFreeParserInputBuffer(buf);
10141 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010142 return(NULL);
10143 }
10144 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10145 if (user_data != NULL)
10146 ctxt->userData = user_data;
10147 }
10148 if (filename == NULL) {
10149 ctxt->directory = NULL;
10150 } else {
10151 ctxt->directory = xmlParserGetDirectory(filename);
10152 }
10153
10154 inputStream = xmlNewInputStream(ctxt);
10155 if (inputStream == NULL) {
10156 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010157 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010158 return(NULL);
10159 }
10160
10161 if (filename == NULL)
10162 inputStream->filename = NULL;
10163 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010164 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010165 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010166 inputStream->buf = buf;
10167 inputStream->base = inputStream->buf->buffer->content;
10168 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010169 inputStream->end =
10170 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010171
10172 inputPush(ctxt, inputStream);
10173
10174 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10175 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010176 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10177 int cur = ctxt->input->cur - ctxt->input->base;
10178
Owen Taylor3473f882001-02-23 17:55:21 +000010179 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010180
10181 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10182 ctxt->input->cur = ctxt->input->base + cur;
10183 ctxt->input->end =
10184 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010185#ifdef DEBUG_PUSH
10186 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10187#endif
10188 }
10189
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010190 if (enc != XML_CHAR_ENCODING_NONE) {
10191 xmlSwitchEncoding(ctxt, enc);
10192 }
10193
Owen Taylor3473f882001-02-23 17:55:21 +000010194 return(ctxt);
10195}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010196#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010197
10198/**
10199 * xmlCreateIOParserCtxt:
10200 * @sax: a SAX handler
10201 * @user_data: The user data returned on SAX callbacks
10202 * @ioread: an I/O read function
10203 * @ioclose: an I/O close function
10204 * @ioctx: an I/O handler
10205 * @enc: the charset encoding if known
10206 *
10207 * Create a parser context for using the XML parser with an existing
10208 * I/O stream
10209 *
10210 * Returns the new parser context or NULL
10211 */
10212xmlParserCtxtPtr
10213xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10214 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10215 void *ioctx, xmlCharEncoding enc) {
10216 xmlParserCtxtPtr ctxt;
10217 xmlParserInputPtr inputStream;
10218 xmlParserInputBufferPtr buf;
10219
10220 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10221 if (buf == NULL) return(NULL);
10222
10223 ctxt = xmlNewParserCtxt();
10224 if (ctxt == NULL) {
10225 xmlFree(buf);
10226 return(NULL);
10227 }
10228 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010229#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010230 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010231#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010232 xmlFree(ctxt->sax);
10233 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10234 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010235 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010236 xmlFree(ctxt);
10237 return(NULL);
10238 }
10239 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10240 if (user_data != NULL)
10241 ctxt->userData = user_data;
10242 }
10243
10244 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10245 if (inputStream == NULL) {
10246 xmlFreeParserCtxt(ctxt);
10247 return(NULL);
10248 }
10249 inputPush(ctxt, inputStream);
10250
10251 return(ctxt);
10252}
10253
Daniel Veillard4432df22003-09-28 18:58:27 +000010254#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010255/************************************************************************
10256 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010257 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010258 * *
10259 ************************************************************************/
10260
10261/**
10262 * xmlIOParseDTD:
10263 * @sax: the SAX handler block or NULL
10264 * @input: an Input Buffer
10265 * @enc: the charset encoding if known
10266 *
10267 * Load and parse a DTD
10268 *
10269 * Returns the resulting xmlDtdPtr or NULL in case of error.
10270 * @input will be freed at parsing end.
10271 */
10272
10273xmlDtdPtr
10274xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10275 xmlCharEncoding enc) {
10276 xmlDtdPtr ret = NULL;
10277 xmlParserCtxtPtr ctxt;
10278 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010279 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010280
10281 if (input == NULL)
10282 return(NULL);
10283
10284 ctxt = xmlNewParserCtxt();
10285 if (ctxt == NULL) {
10286 return(NULL);
10287 }
10288
10289 /*
10290 * Set-up the SAX context
10291 */
10292 if (sax != NULL) {
10293 if (ctxt->sax != NULL)
10294 xmlFree(ctxt->sax);
10295 ctxt->sax = sax;
10296 ctxt->userData = NULL;
10297 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010298 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010299
10300 /*
10301 * generate a parser input from the I/O handler
10302 */
10303
10304 pinput = xmlNewIOInputStream(ctxt, input, enc);
10305 if (pinput == NULL) {
10306 if (sax != NULL) ctxt->sax = NULL;
10307 xmlFreeParserCtxt(ctxt);
10308 return(NULL);
10309 }
10310
10311 /*
10312 * plug some encoding conversion routines here.
10313 */
10314 xmlPushInput(ctxt, pinput);
10315
10316 pinput->filename = NULL;
10317 pinput->line = 1;
10318 pinput->col = 1;
10319 pinput->base = ctxt->input->cur;
10320 pinput->cur = ctxt->input->cur;
10321 pinput->free = NULL;
10322
10323 /*
10324 * let's parse that entity knowing it's an external subset.
10325 */
10326 ctxt->inSubset = 2;
10327 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10328 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10329 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010330
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010331 if ((enc == XML_CHAR_ENCODING_NONE) &&
10332 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010333 /*
10334 * Get the 4 first bytes and decode the charset
10335 * if enc != XML_CHAR_ENCODING_NONE
10336 * plug some encoding conversion routines.
10337 */
10338 start[0] = RAW;
10339 start[1] = NXT(1);
10340 start[2] = NXT(2);
10341 start[3] = NXT(3);
10342 enc = xmlDetectCharEncoding(start, 4);
10343 if (enc != XML_CHAR_ENCODING_NONE) {
10344 xmlSwitchEncoding(ctxt, enc);
10345 }
10346 }
10347
Owen Taylor3473f882001-02-23 17:55:21 +000010348 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10349
10350 if (ctxt->myDoc != NULL) {
10351 if (ctxt->wellFormed) {
10352 ret = ctxt->myDoc->extSubset;
10353 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010354 if (ret != NULL) {
10355 xmlNodePtr tmp;
10356
10357 ret->doc = NULL;
10358 tmp = ret->children;
10359 while (tmp != NULL) {
10360 tmp->doc = NULL;
10361 tmp = tmp->next;
10362 }
10363 }
Owen Taylor3473f882001-02-23 17:55:21 +000010364 } else {
10365 ret = NULL;
10366 }
10367 xmlFreeDoc(ctxt->myDoc);
10368 ctxt->myDoc = NULL;
10369 }
10370 if (sax != NULL) ctxt->sax = NULL;
10371 xmlFreeParserCtxt(ctxt);
10372
10373 return(ret);
10374}
10375
10376/**
10377 * xmlSAXParseDTD:
10378 * @sax: the SAX handler block
10379 * @ExternalID: a NAME* containing the External ID of the DTD
10380 * @SystemID: a NAME* containing the URL to the DTD
10381 *
10382 * Load and parse an external subset.
10383 *
10384 * Returns the resulting xmlDtdPtr or NULL in case of error.
10385 */
10386
10387xmlDtdPtr
10388xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10389 const xmlChar *SystemID) {
10390 xmlDtdPtr ret = NULL;
10391 xmlParserCtxtPtr ctxt;
10392 xmlParserInputPtr input = NULL;
10393 xmlCharEncoding enc;
10394
10395 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10396
10397 ctxt = xmlNewParserCtxt();
10398 if (ctxt == NULL) {
10399 return(NULL);
10400 }
10401
10402 /*
10403 * Set-up the SAX context
10404 */
10405 if (sax != NULL) {
10406 if (ctxt->sax != NULL)
10407 xmlFree(ctxt->sax);
10408 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010409 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010410 }
10411
10412 /*
10413 * Ask the Entity resolver to load the damn thing
10414 */
10415
10416 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010417 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 if (input == NULL) {
10419 if (sax != NULL) ctxt->sax = NULL;
10420 xmlFreeParserCtxt(ctxt);
10421 return(NULL);
10422 }
10423
10424 /*
10425 * plug some encoding conversion routines here.
10426 */
10427 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010428 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10429 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10430 xmlSwitchEncoding(ctxt, enc);
10431 }
Owen Taylor3473f882001-02-23 17:55:21 +000010432
10433 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010434 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010435 input->line = 1;
10436 input->col = 1;
10437 input->base = ctxt->input->cur;
10438 input->cur = ctxt->input->cur;
10439 input->free = NULL;
10440
10441 /*
10442 * let's parse that entity knowing it's an external subset.
10443 */
10444 ctxt->inSubset = 2;
10445 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10446 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10447 ExternalID, SystemID);
10448 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10449
10450 if (ctxt->myDoc != NULL) {
10451 if (ctxt->wellFormed) {
10452 ret = ctxt->myDoc->extSubset;
10453 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010454 if (ret != NULL) {
10455 xmlNodePtr tmp;
10456
10457 ret->doc = NULL;
10458 tmp = ret->children;
10459 while (tmp != NULL) {
10460 tmp->doc = NULL;
10461 tmp = tmp->next;
10462 }
10463 }
Owen Taylor3473f882001-02-23 17:55:21 +000010464 } else {
10465 ret = NULL;
10466 }
10467 xmlFreeDoc(ctxt->myDoc);
10468 ctxt->myDoc = NULL;
10469 }
10470 if (sax != NULL) ctxt->sax = NULL;
10471 xmlFreeParserCtxt(ctxt);
10472
10473 return(ret);
10474}
10475
Daniel Veillard4432df22003-09-28 18:58:27 +000010476
Owen Taylor3473f882001-02-23 17:55:21 +000010477/**
10478 * xmlParseDTD:
10479 * @ExternalID: a NAME* containing the External ID of the DTD
10480 * @SystemID: a NAME* containing the URL to the DTD
10481 *
10482 * Load and parse an external subset.
10483 *
10484 * Returns the resulting xmlDtdPtr or NULL in case of error.
10485 */
10486
10487xmlDtdPtr
10488xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10489 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10490}
Daniel Veillard4432df22003-09-28 18:58:27 +000010491#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010492
10493/************************************************************************
10494 * *
10495 * Front ends when parsing an Entity *
10496 * *
10497 ************************************************************************/
10498
10499/**
Owen Taylor3473f882001-02-23 17:55:21 +000010500 * xmlParseCtxtExternalEntity:
10501 * @ctx: the existing parsing context
10502 * @URL: the URL for the entity to load
10503 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010504 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010505 *
10506 * Parse an external general entity within an existing parsing context
10507 * An external general parsed entity is well-formed if it matches the
10508 * production labeled extParsedEnt.
10509 *
10510 * [78] extParsedEnt ::= TextDecl? content
10511 *
10512 * Returns 0 if the entity is well formed, -1 in case of args problem and
10513 * the parser error code otherwise
10514 */
10515
10516int
10517xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010518 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010519 xmlParserCtxtPtr ctxt;
10520 xmlDocPtr newDoc;
10521 xmlSAXHandlerPtr oldsax = NULL;
10522 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010523 xmlChar start[4];
10524 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010525
10526 if (ctx->depth > 40) {
10527 return(XML_ERR_ENTITY_LOOP);
10528 }
10529
Daniel Veillardcda96922001-08-21 10:56:31 +000010530 if (lst != NULL)
10531 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010532 if ((URL == NULL) && (ID == NULL))
10533 return(-1);
10534 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10535 return(-1);
10536
10537
10538 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10539 if (ctxt == NULL) return(-1);
10540 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010541 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010542 oldsax = ctxt->sax;
10543 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010544 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010545 newDoc = xmlNewDoc(BAD_CAST "1.0");
10546 if (newDoc == NULL) {
10547 xmlFreeParserCtxt(ctxt);
10548 return(-1);
10549 }
10550 if (ctx->myDoc != NULL) {
10551 newDoc->intSubset = ctx->myDoc->intSubset;
10552 newDoc->extSubset = ctx->myDoc->extSubset;
10553 }
10554 if (ctx->myDoc->URL != NULL) {
10555 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10556 }
10557 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10558 if (newDoc->children == NULL) {
10559 ctxt->sax = oldsax;
10560 xmlFreeParserCtxt(ctxt);
10561 newDoc->intSubset = NULL;
10562 newDoc->extSubset = NULL;
10563 xmlFreeDoc(newDoc);
10564 return(-1);
10565 }
10566 nodePush(ctxt, newDoc->children);
10567 if (ctx->myDoc == NULL) {
10568 ctxt->myDoc = newDoc;
10569 } else {
10570 ctxt->myDoc = ctx->myDoc;
10571 newDoc->children->doc = ctx->myDoc;
10572 }
10573
Daniel Veillard87a764e2001-06-20 17:41:10 +000010574 /*
10575 * Get the 4 first bytes and decode the charset
10576 * if enc != XML_CHAR_ENCODING_NONE
10577 * plug some encoding conversion routines.
10578 */
10579 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010580 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10581 start[0] = RAW;
10582 start[1] = NXT(1);
10583 start[2] = NXT(2);
10584 start[3] = NXT(3);
10585 enc = xmlDetectCharEncoding(start, 4);
10586 if (enc != XML_CHAR_ENCODING_NONE) {
10587 xmlSwitchEncoding(ctxt, enc);
10588 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010589 }
10590
Owen Taylor3473f882001-02-23 17:55:21 +000010591 /*
10592 * Parse a possible text declaration first
10593 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010594 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010595 xmlParseTextDecl(ctxt);
10596 }
10597
10598 /*
10599 * Doing validity checking on chunk doesn't make sense
10600 */
10601 ctxt->instate = XML_PARSER_CONTENT;
10602 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010603 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010604 ctxt->loadsubset = ctx->loadsubset;
10605 ctxt->depth = ctx->depth + 1;
10606 ctxt->replaceEntities = ctx->replaceEntities;
10607 if (ctxt->validate) {
10608 ctxt->vctxt.error = ctx->vctxt.error;
10609 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010610 } else {
10611 ctxt->vctxt.error = NULL;
10612 ctxt->vctxt.warning = NULL;
10613 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010614 ctxt->vctxt.nodeTab = NULL;
10615 ctxt->vctxt.nodeNr = 0;
10616 ctxt->vctxt.nodeMax = 0;
10617 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010618
10619 xmlParseContent(ctxt);
10620
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010621 ctx->validate = ctxt->validate;
10622 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010623 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010624 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010625 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010626 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 }
10628 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010629 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010630 }
10631
10632 if (!ctxt->wellFormed) {
10633 if (ctxt->errNo == 0)
10634 ret = 1;
10635 else
10636 ret = ctxt->errNo;
10637 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010638 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010639 xmlNodePtr cur;
10640
10641 /*
10642 * Return the newly created nodeset after unlinking it from
10643 * they pseudo parent.
10644 */
10645 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010646 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010647 while (cur != NULL) {
10648 cur->parent = NULL;
10649 cur = cur->next;
10650 }
10651 newDoc->children->children = NULL;
10652 }
10653 ret = 0;
10654 }
10655 ctxt->sax = oldsax;
10656 xmlFreeParserCtxt(ctxt);
10657 newDoc->intSubset = NULL;
10658 newDoc->extSubset = NULL;
10659 xmlFreeDoc(newDoc);
10660
10661 return(ret);
10662}
10663
10664/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010665 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010666 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010667 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010668 * @sax: the SAX handler bloc (possibly NULL)
10669 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10670 * @depth: Used for loop detection, use 0
10671 * @URL: the URL for the entity to load
10672 * @ID: the System ID for the entity to load
10673 * @list: the return value for the set of parsed nodes
10674 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010675 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010676 *
10677 * Returns 0 if the entity is well formed, -1 in case of args problem and
10678 * the parser error code otherwise
10679 */
10680
Daniel Veillard7d515752003-09-26 19:12:37 +000010681static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010682xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10683 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010684 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010685 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010686 xmlParserCtxtPtr ctxt;
10687 xmlDocPtr newDoc;
10688 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010689 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010690 xmlChar start[4];
10691 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010692
10693 if (depth > 40) {
10694 return(XML_ERR_ENTITY_LOOP);
10695 }
10696
10697
10698
10699 if (list != NULL)
10700 *list = NULL;
10701 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010702 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010703 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010704 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010705
10706
10707 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010708 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010709 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010710 if (oldctxt != NULL) {
10711 ctxt->_private = oldctxt->_private;
10712 ctxt->loadsubset = oldctxt->loadsubset;
10713 ctxt->validate = oldctxt->validate;
10714 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010715 ctxt->record_info = oldctxt->record_info;
10716 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10717 ctxt->node_seq.length = oldctxt->node_seq.length;
10718 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010719 } else {
10720 /*
10721 * Doing validity checking on chunk without context
10722 * doesn't make sense
10723 */
10724 ctxt->_private = NULL;
10725 ctxt->validate = 0;
10726 ctxt->external = 2;
10727 ctxt->loadsubset = 0;
10728 }
Owen Taylor3473f882001-02-23 17:55:21 +000010729 if (sax != NULL) {
10730 oldsax = ctxt->sax;
10731 ctxt->sax = sax;
10732 if (user_data != NULL)
10733 ctxt->userData = user_data;
10734 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010735 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010736 newDoc = xmlNewDoc(BAD_CAST "1.0");
10737 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010738 ctxt->node_seq.maximum = 0;
10739 ctxt->node_seq.length = 0;
10740 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010741 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010742 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010743 }
10744 if (doc != NULL) {
10745 newDoc->intSubset = doc->intSubset;
10746 newDoc->extSubset = doc->extSubset;
10747 }
10748 if (doc->URL != NULL) {
10749 newDoc->URL = xmlStrdup(doc->URL);
10750 }
10751 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10752 if (newDoc->children == NULL) {
10753 if (sax != NULL)
10754 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010755 ctxt->node_seq.maximum = 0;
10756 ctxt->node_seq.length = 0;
10757 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010758 xmlFreeParserCtxt(ctxt);
10759 newDoc->intSubset = NULL;
10760 newDoc->extSubset = NULL;
10761 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010762 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010763 }
10764 nodePush(ctxt, newDoc->children);
10765 if (doc == NULL) {
10766 ctxt->myDoc = newDoc;
10767 } else {
10768 ctxt->myDoc = doc;
10769 newDoc->children->doc = doc;
10770 }
10771
Daniel Veillard87a764e2001-06-20 17:41:10 +000010772 /*
10773 * Get the 4 first bytes and decode the charset
10774 * if enc != XML_CHAR_ENCODING_NONE
10775 * plug some encoding conversion routines.
10776 */
10777 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010778 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10779 start[0] = RAW;
10780 start[1] = NXT(1);
10781 start[2] = NXT(2);
10782 start[3] = NXT(3);
10783 enc = xmlDetectCharEncoding(start, 4);
10784 if (enc != XML_CHAR_ENCODING_NONE) {
10785 xmlSwitchEncoding(ctxt, enc);
10786 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010787 }
10788
Owen Taylor3473f882001-02-23 17:55:21 +000010789 /*
10790 * Parse a possible text declaration first
10791 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010792 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010793 xmlParseTextDecl(ctxt);
10794 }
10795
Owen Taylor3473f882001-02-23 17:55:21 +000010796 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010797 ctxt->depth = depth;
10798
10799 xmlParseContent(ctxt);
10800
Daniel Veillard561b7f82002-03-20 21:55:57 +000010801 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010802 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010803 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010804 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010805 }
10806 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010807 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010808 }
10809
10810 if (!ctxt->wellFormed) {
10811 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010812 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010813 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010814 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010815 } else {
10816 if (list != NULL) {
10817 xmlNodePtr cur;
10818
10819 /*
10820 * Return the newly created nodeset after unlinking it from
10821 * they pseudo parent.
10822 */
10823 cur = newDoc->children->children;
10824 *list = cur;
10825 while (cur != NULL) {
10826 cur->parent = NULL;
10827 cur = cur->next;
10828 }
10829 newDoc->children->children = NULL;
10830 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010831 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010832 }
10833 if (sax != NULL)
10834 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010835 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10836 oldctxt->node_seq.length = ctxt->node_seq.length;
10837 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010838 ctxt->node_seq.maximum = 0;
10839 ctxt->node_seq.length = 0;
10840 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010841 xmlFreeParserCtxt(ctxt);
10842 newDoc->intSubset = NULL;
10843 newDoc->extSubset = NULL;
10844 xmlFreeDoc(newDoc);
10845
10846 return(ret);
10847}
10848
Daniel Veillard81273902003-09-30 00:43:48 +000010849#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010850/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010851 * xmlParseExternalEntity:
10852 * @doc: the document the chunk pertains to
10853 * @sax: the SAX handler bloc (possibly NULL)
10854 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10855 * @depth: Used for loop detection, use 0
10856 * @URL: the URL for the entity to load
10857 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010858 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010859 *
10860 * Parse an external general entity
10861 * An external general parsed entity is well-formed if it matches the
10862 * production labeled extParsedEnt.
10863 *
10864 * [78] extParsedEnt ::= TextDecl? content
10865 *
10866 * Returns 0 if the entity is well formed, -1 in case of args problem and
10867 * the parser error code otherwise
10868 */
10869
10870int
10871xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010872 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010873 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010874 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010875}
10876
10877/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010878 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010879 * @doc: the document the chunk pertains to
10880 * @sax: the SAX handler bloc (possibly NULL)
10881 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10882 * @depth: Used for loop detection, use 0
10883 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010884 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010885 *
10886 * Parse a well-balanced chunk of an XML document
10887 * called by the parser
10888 * The allowed sequence for the Well Balanced Chunk is the one defined by
10889 * the content production in the XML grammar:
10890 *
10891 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10892 *
10893 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10894 * the parser error code otherwise
10895 */
10896
10897int
10898xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010899 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010900 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10901 depth, string, lst, 0 );
10902}
Daniel Veillard81273902003-09-30 00:43:48 +000010903#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010904
10905/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010906 * xmlParseBalancedChunkMemoryInternal:
10907 * @oldctxt: the existing parsing context
10908 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10909 * @user_data: the user data field for the parser context
10910 * @lst: the return value for the set of parsed nodes
10911 *
10912 *
10913 * Parse a well-balanced chunk of an XML document
10914 * called by the parser
10915 * The allowed sequence for the Well Balanced Chunk is the one defined by
10916 * the content production in the XML grammar:
10917 *
10918 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10919 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010920 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10921 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010922 *
10923 * In case recover is set to 1, the nodelist will not be empty even if
10924 * the parsed chunk is not well balanced.
10925 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010926static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010927xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10928 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10929 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010930 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010931 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010932 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010933 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010934 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010935
10936 if (oldctxt->depth > 40) {
10937 return(XML_ERR_ENTITY_LOOP);
10938 }
10939
10940
10941 if (lst != NULL)
10942 *lst = NULL;
10943 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010944 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010945
10946 size = xmlStrlen(string);
10947
10948 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010949 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010950 if (user_data != NULL)
10951 ctxt->userData = user_data;
10952 else
10953 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010954 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10955 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010956
10957 oldsax = ctxt->sax;
10958 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010959 xmlDetectSAX2(ctxt);
10960
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010961 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010962 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010963 newDoc = xmlNewDoc(BAD_CAST "1.0");
10964 if (newDoc == NULL) {
10965 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010966 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010967 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010968 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010969 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010970 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010971 } else {
10972 ctxt->myDoc = oldctxt->myDoc;
10973 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010974 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010975 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010976 BAD_CAST "pseudoroot", NULL);
10977 if (ctxt->myDoc->children == NULL) {
10978 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010979 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010980 xmlFreeParserCtxt(ctxt);
10981 if (newDoc != NULL)
10982 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010983 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010984 }
10985 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010986 ctxt->instate = XML_PARSER_CONTENT;
10987 ctxt->depth = oldctxt->depth + 1;
10988
Daniel Veillard328f48c2002-11-15 15:24:34 +000010989 ctxt->validate = 0;
10990 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010991 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10992 /*
10993 * ID/IDREF registration will be done in xmlValidateElement below
10994 */
10995 ctxt->loadsubset |= XML_SKIP_IDS;
10996 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010997 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010998
Daniel Veillard68e9e742002-11-16 15:35:11 +000010999 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011000 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011001 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011002 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011003 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011004 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011005 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011006 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011007 }
11008
11009 if (!ctxt->wellFormed) {
11010 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011011 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011012 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011013 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011014 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011015 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011016 }
11017
William M. Brack7b9154b2003-09-27 19:23:50 +000011018 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011019 xmlNodePtr cur;
11020
11021 /*
11022 * Return the newly created nodeset after unlinking it from
11023 * they pseudo parent.
11024 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011025 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011026 *lst = cur;
11027 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011028#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011029 if (oldctxt->validate && oldctxt->wellFormed &&
11030 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11031 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11032 oldctxt->myDoc, cur);
11033 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011034#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011035 cur->parent = NULL;
11036 cur = cur->next;
11037 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011038 ctxt->myDoc->children->children = NULL;
11039 }
11040 if (ctxt->myDoc != NULL) {
11041 xmlFreeNode(ctxt->myDoc->children);
11042 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011043 }
11044
11045 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011046 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011047 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011048 if (newDoc != NULL)
11049 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011050
11051 return(ret);
11052}
11053
Daniel Veillard81273902003-09-30 00:43:48 +000011054#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011055/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011056 * xmlParseBalancedChunkMemoryRecover:
11057 * @doc: the document the chunk pertains to
11058 * @sax: the SAX handler bloc (possibly NULL)
11059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11060 * @depth: Used for loop detection, use 0
11061 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11062 * @lst: the return value for the set of parsed nodes
11063 * @recover: return nodes even if the data is broken (use 0)
11064 *
11065 *
11066 * Parse a well-balanced chunk of an XML document
11067 * called by the parser
11068 * The allowed sequence for the Well Balanced Chunk is the one defined by
11069 * the content production in the XML grammar:
11070 *
11071 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11072 *
11073 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11074 * the parser error code otherwise
11075 *
11076 * In case recover is set to 1, the nodelist will not be empty even if
11077 * the parsed chunk is not well balanced.
11078 */
11079int
11080xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11081 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11082 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011083 xmlParserCtxtPtr ctxt;
11084 xmlDocPtr newDoc;
11085 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011086 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011087 int size;
11088 int ret = 0;
11089
11090 if (depth > 40) {
11091 return(XML_ERR_ENTITY_LOOP);
11092 }
11093
11094
Daniel Veillardcda96922001-08-21 10:56:31 +000011095 if (lst != NULL)
11096 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011097 if (string == NULL)
11098 return(-1);
11099
11100 size = xmlStrlen(string);
11101
11102 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11103 if (ctxt == NULL) return(-1);
11104 ctxt->userData = ctxt;
11105 if (sax != NULL) {
11106 oldsax = ctxt->sax;
11107 ctxt->sax = sax;
11108 if (user_data != NULL)
11109 ctxt->userData = user_data;
11110 }
11111 newDoc = xmlNewDoc(BAD_CAST "1.0");
11112 if (newDoc == NULL) {
11113 xmlFreeParserCtxt(ctxt);
11114 return(-1);
11115 }
11116 if (doc != NULL) {
11117 newDoc->intSubset = doc->intSubset;
11118 newDoc->extSubset = doc->extSubset;
11119 }
11120 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11121 if (newDoc->children == NULL) {
11122 if (sax != NULL)
11123 ctxt->sax = oldsax;
11124 xmlFreeParserCtxt(ctxt);
11125 newDoc->intSubset = NULL;
11126 newDoc->extSubset = NULL;
11127 xmlFreeDoc(newDoc);
11128 return(-1);
11129 }
11130 nodePush(ctxt, newDoc->children);
11131 if (doc == NULL) {
11132 ctxt->myDoc = newDoc;
11133 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011134 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011135 newDoc->children->doc = doc;
11136 }
11137 ctxt->instate = XML_PARSER_CONTENT;
11138 ctxt->depth = depth;
11139
11140 /*
11141 * Doing validity checking on chunk doesn't make sense
11142 */
11143 ctxt->validate = 0;
11144 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011145 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011146
Daniel Veillardb39bc392002-10-26 19:29:51 +000011147 if ( doc != NULL ){
11148 content = doc->children;
11149 doc->children = NULL;
11150 xmlParseContent(ctxt);
11151 doc->children = content;
11152 }
11153 else {
11154 xmlParseContent(ctxt);
11155 }
Owen Taylor3473f882001-02-23 17:55:21 +000011156 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011157 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011158 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011159 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011160 }
11161 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011162 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011163 }
11164
11165 if (!ctxt->wellFormed) {
11166 if (ctxt->errNo == 0)
11167 ret = 1;
11168 else
11169 ret = ctxt->errNo;
11170 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011171 ret = 0;
11172 }
11173
11174 if (lst != NULL && (ret == 0 || recover == 1)) {
11175 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011176
11177 /*
11178 * Return the newly created nodeset after unlinking it from
11179 * they pseudo parent.
11180 */
11181 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011182 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011183 while (cur != NULL) {
11184 cur->parent = NULL;
11185 cur = cur->next;
11186 }
11187 newDoc->children->children = NULL;
11188 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011189
Owen Taylor3473f882001-02-23 17:55:21 +000011190 if (sax != NULL)
11191 ctxt->sax = oldsax;
11192 xmlFreeParserCtxt(ctxt);
11193 newDoc->intSubset = NULL;
11194 newDoc->extSubset = NULL;
11195 xmlFreeDoc(newDoc);
11196
11197 return(ret);
11198}
11199
11200/**
11201 * xmlSAXParseEntity:
11202 * @sax: the SAX handler block
11203 * @filename: the filename
11204 *
11205 * parse an XML external entity out of context and build a tree.
11206 * It use the given SAX function block to handle the parsing callback.
11207 * If sax is NULL, fallback to the default DOM tree building routines.
11208 *
11209 * [78] extParsedEnt ::= TextDecl? content
11210 *
11211 * This correspond to a "Well Balanced" chunk
11212 *
11213 * Returns the resulting document tree
11214 */
11215
11216xmlDocPtr
11217xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11218 xmlDocPtr ret;
11219 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011220
11221 ctxt = xmlCreateFileParserCtxt(filename);
11222 if (ctxt == NULL) {
11223 return(NULL);
11224 }
11225 if (sax != NULL) {
11226 if (ctxt->sax != NULL)
11227 xmlFree(ctxt->sax);
11228 ctxt->sax = sax;
11229 ctxt->userData = NULL;
11230 }
11231
Owen Taylor3473f882001-02-23 17:55:21 +000011232 xmlParseExtParsedEnt(ctxt);
11233
11234 if (ctxt->wellFormed)
11235 ret = ctxt->myDoc;
11236 else {
11237 ret = NULL;
11238 xmlFreeDoc(ctxt->myDoc);
11239 ctxt->myDoc = NULL;
11240 }
11241 if (sax != NULL)
11242 ctxt->sax = NULL;
11243 xmlFreeParserCtxt(ctxt);
11244
11245 return(ret);
11246}
11247
11248/**
11249 * xmlParseEntity:
11250 * @filename: the filename
11251 *
11252 * parse an XML external entity out of context and build a tree.
11253 *
11254 * [78] extParsedEnt ::= TextDecl? content
11255 *
11256 * This correspond to a "Well Balanced" chunk
11257 *
11258 * Returns the resulting document tree
11259 */
11260
11261xmlDocPtr
11262xmlParseEntity(const char *filename) {
11263 return(xmlSAXParseEntity(NULL, filename));
11264}
Daniel Veillard81273902003-09-30 00:43:48 +000011265#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011266
11267/**
11268 * xmlCreateEntityParserCtxt:
11269 * @URL: the entity URL
11270 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011271 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011272 *
11273 * Create a parser context for an external entity
11274 * Automatic support for ZLIB/Compress compressed document is provided
11275 * by default if found at compile-time.
11276 *
11277 * Returns the new parser context or NULL
11278 */
11279xmlParserCtxtPtr
11280xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11281 const xmlChar *base) {
11282 xmlParserCtxtPtr ctxt;
11283 xmlParserInputPtr inputStream;
11284 char *directory = NULL;
11285 xmlChar *uri;
11286
11287 ctxt = xmlNewParserCtxt();
11288 if (ctxt == NULL) {
11289 return(NULL);
11290 }
11291
11292 uri = xmlBuildURI(URL, base);
11293
11294 if (uri == NULL) {
11295 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11296 if (inputStream == NULL) {
11297 xmlFreeParserCtxt(ctxt);
11298 return(NULL);
11299 }
11300
11301 inputPush(ctxt, inputStream);
11302
11303 if ((ctxt->directory == NULL) && (directory == NULL))
11304 directory = xmlParserGetDirectory((char *)URL);
11305 if ((ctxt->directory == NULL) && (directory != NULL))
11306 ctxt->directory = directory;
11307 } else {
11308 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11309 if (inputStream == NULL) {
11310 xmlFree(uri);
11311 xmlFreeParserCtxt(ctxt);
11312 return(NULL);
11313 }
11314
11315 inputPush(ctxt, inputStream);
11316
11317 if ((ctxt->directory == NULL) && (directory == NULL))
11318 directory = xmlParserGetDirectory((char *)uri);
11319 if ((ctxt->directory == NULL) && (directory != NULL))
11320 ctxt->directory = directory;
11321 xmlFree(uri);
11322 }
Owen Taylor3473f882001-02-23 17:55:21 +000011323 return(ctxt);
11324}
11325
11326/************************************************************************
11327 * *
11328 * Front ends when parsing from a file *
11329 * *
11330 ************************************************************************/
11331
11332/**
11333 * xmlCreateFileParserCtxt:
11334 * @filename: the filename
11335 *
11336 * Create a parser context for a file content.
11337 * Automatic support for ZLIB/Compress compressed document is provided
11338 * by default if found at compile-time.
11339 *
11340 * Returns the new parser context or NULL
11341 */
11342xmlParserCtxtPtr
11343xmlCreateFileParserCtxt(const char *filename)
11344{
11345 xmlParserCtxtPtr ctxt;
11346 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011347 char *directory = NULL;
11348
Owen Taylor3473f882001-02-23 17:55:21 +000011349 ctxt = xmlNewParserCtxt();
11350 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011351 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011352 return(NULL);
11353 }
11354
Igor Zlatkovicce076162003-02-23 13:39:39 +000011355
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011356 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011357 if (inputStream == NULL) {
11358 xmlFreeParserCtxt(ctxt);
11359 return(NULL);
11360 }
11361
Owen Taylor3473f882001-02-23 17:55:21 +000011362 inputPush(ctxt, inputStream);
11363 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011364 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011365 if ((ctxt->directory == NULL) && (directory != NULL))
11366 ctxt->directory = directory;
11367
11368 return(ctxt);
11369}
11370
Daniel Veillard81273902003-09-30 00:43:48 +000011371#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011372/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011373 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011374 * @sax: the SAX handler block
11375 * @filename: the filename
11376 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11377 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011378 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011379 *
11380 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11381 * compressed document is provided by default if found at compile-time.
11382 * It use the given SAX function block to handle the parsing callback.
11383 * If sax is NULL, fallback to the default DOM tree building routines.
11384 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011385 * User data (void *) is stored within the parser context in the
11386 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011387 *
Owen Taylor3473f882001-02-23 17:55:21 +000011388 * Returns the resulting document tree
11389 */
11390
11391xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011392xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11393 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011394 xmlDocPtr ret;
11395 xmlParserCtxtPtr ctxt;
11396 char *directory = NULL;
11397
Daniel Veillard635ef722001-10-29 11:48:19 +000011398 xmlInitParser();
11399
Owen Taylor3473f882001-02-23 17:55:21 +000011400 ctxt = xmlCreateFileParserCtxt(filename);
11401 if (ctxt == NULL) {
11402 return(NULL);
11403 }
11404 if (sax != NULL) {
11405 if (ctxt->sax != NULL)
11406 xmlFree(ctxt->sax);
11407 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011408 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011409 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011410 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011411 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011412 }
Owen Taylor3473f882001-02-23 17:55:21 +000011413
11414 if ((ctxt->directory == NULL) && (directory == NULL))
11415 directory = xmlParserGetDirectory(filename);
11416 if ((ctxt->directory == NULL) && (directory != NULL))
11417 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11418
Daniel Veillarddad3f682002-11-17 16:47:27 +000011419 ctxt->recovery = recovery;
11420
Owen Taylor3473f882001-02-23 17:55:21 +000011421 xmlParseDocument(ctxt);
11422
William M. Brackc07329e2003-09-08 01:57:30 +000011423 if ((ctxt->wellFormed) || recovery) {
11424 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011425 if (ret != NULL) {
11426 if (ctxt->input->buf->compressed > 0)
11427 ret->compression = 9;
11428 else
11429 ret->compression = ctxt->input->buf->compressed;
11430 }
William M. Brackc07329e2003-09-08 01:57:30 +000011431 }
Owen Taylor3473f882001-02-23 17:55:21 +000011432 else {
11433 ret = NULL;
11434 xmlFreeDoc(ctxt->myDoc);
11435 ctxt->myDoc = NULL;
11436 }
11437 if (sax != NULL)
11438 ctxt->sax = NULL;
11439 xmlFreeParserCtxt(ctxt);
11440
11441 return(ret);
11442}
11443
11444/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011445 * xmlSAXParseFile:
11446 * @sax: the SAX handler block
11447 * @filename: the filename
11448 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11449 * documents
11450 *
11451 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11452 * compressed document is provided by default if found at compile-time.
11453 * It use the given SAX function block to handle the parsing callback.
11454 * If sax is NULL, fallback to the default DOM tree building routines.
11455 *
11456 * Returns the resulting document tree
11457 */
11458
11459xmlDocPtr
11460xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11461 int recovery) {
11462 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11463}
11464
11465/**
Owen Taylor3473f882001-02-23 17:55:21 +000011466 * xmlRecoverDoc:
11467 * @cur: a pointer to an array of xmlChar
11468 *
11469 * parse an XML in-memory document and build a tree.
11470 * In the case the document is not Well Formed, a tree is built anyway
11471 *
11472 * Returns the resulting document tree
11473 */
11474
11475xmlDocPtr
11476xmlRecoverDoc(xmlChar *cur) {
11477 return(xmlSAXParseDoc(NULL, cur, 1));
11478}
11479
11480/**
11481 * xmlParseFile:
11482 * @filename: the filename
11483 *
11484 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11485 * compressed document is provided by default if found at compile-time.
11486 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011487 * Returns the resulting document tree if the file was wellformed,
11488 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011489 */
11490
11491xmlDocPtr
11492xmlParseFile(const char *filename) {
11493 return(xmlSAXParseFile(NULL, filename, 0));
11494}
11495
11496/**
11497 * xmlRecoverFile:
11498 * @filename: the filename
11499 *
11500 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11501 * compressed document is provided by default if found at compile-time.
11502 * In the case the document is not Well Formed, a tree is built anyway
11503 *
11504 * Returns the resulting document tree
11505 */
11506
11507xmlDocPtr
11508xmlRecoverFile(const char *filename) {
11509 return(xmlSAXParseFile(NULL, filename, 1));
11510}
11511
11512
11513/**
11514 * xmlSetupParserForBuffer:
11515 * @ctxt: an XML parser context
11516 * @buffer: a xmlChar * buffer
11517 * @filename: a file name
11518 *
11519 * Setup the parser context to parse a new buffer; Clears any prior
11520 * contents from the parser context. The buffer parameter must not be
11521 * NULL, but the filename parameter can be
11522 */
11523void
11524xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11525 const char* filename)
11526{
11527 xmlParserInputPtr input;
11528
11529 input = xmlNewInputStream(ctxt);
11530 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011531 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011532 xmlFree(ctxt);
11533 return;
11534 }
11535
11536 xmlClearParserCtxt(ctxt);
11537 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011538 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011539 input->base = buffer;
11540 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011541 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011542 inputPush(ctxt, input);
11543}
11544
11545/**
11546 * xmlSAXUserParseFile:
11547 * @sax: a SAX handler
11548 * @user_data: The user data returned on SAX callbacks
11549 * @filename: a file name
11550 *
11551 * parse an XML file and call the given SAX handler routines.
11552 * Automatic support for ZLIB/Compress compressed document is provided
11553 *
11554 * Returns 0 in case of success or a error number otherwise
11555 */
11556int
11557xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11558 const char *filename) {
11559 int ret = 0;
11560 xmlParserCtxtPtr ctxt;
11561
11562 ctxt = xmlCreateFileParserCtxt(filename);
11563 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011564#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011565 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011566#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011567 xmlFree(ctxt->sax);
11568 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011569 xmlDetectSAX2(ctxt);
11570
Owen Taylor3473f882001-02-23 17:55:21 +000011571 if (user_data != NULL)
11572 ctxt->userData = user_data;
11573
11574 xmlParseDocument(ctxt);
11575
11576 if (ctxt->wellFormed)
11577 ret = 0;
11578 else {
11579 if (ctxt->errNo != 0)
11580 ret = ctxt->errNo;
11581 else
11582 ret = -1;
11583 }
11584 if (sax != NULL)
11585 ctxt->sax = NULL;
11586 xmlFreeParserCtxt(ctxt);
11587
11588 return ret;
11589}
Daniel Veillard81273902003-09-30 00:43:48 +000011590#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011591
11592/************************************************************************
11593 * *
11594 * Front ends when parsing from memory *
11595 * *
11596 ************************************************************************/
11597
11598/**
11599 * xmlCreateMemoryParserCtxt:
11600 * @buffer: a pointer to a char array
11601 * @size: the size of the array
11602 *
11603 * Create a parser context for an XML in-memory document.
11604 *
11605 * Returns the new parser context or NULL
11606 */
11607xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011608xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011609 xmlParserCtxtPtr ctxt;
11610 xmlParserInputPtr input;
11611 xmlParserInputBufferPtr buf;
11612
11613 if (buffer == NULL)
11614 return(NULL);
11615 if (size <= 0)
11616 return(NULL);
11617
11618 ctxt = xmlNewParserCtxt();
11619 if (ctxt == NULL)
11620 return(NULL);
11621
Daniel Veillard53350552003-09-18 13:35:51 +000011622 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011623 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011624 if (buf == NULL) {
11625 xmlFreeParserCtxt(ctxt);
11626 return(NULL);
11627 }
Owen Taylor3473f882001-02-23 17:55:21 +000011628
11629 input = xmlNewInputStream(ctxt);
11630 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011631 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011632 xmlFreeParserCtxt(ctxt);
11633 return(NULL);
11634 }
11635
11636 input->filename = NULL;
11637 input->buf = buf;
11638 input->base = input->buf->buffer->content;
11639 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011640 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011641
11642 inputPush(ctxt, input);
11643 return(ctxt);
11644}
11645
Daniel Veillard81273902003-09-30 00:43:48 +000011646#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011647/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011648 * xmlSAXParseMemoryWithData:
11649 * @sax: the SAX handler block
11650 * @buffer: an pointer to a char array
11651 * @size: the size of the array
11652 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11653 * documents
11654 * @data: the userdata
11655 *
11656 * parse an XML in-memory block and use the given SAX function block
11657 * to handle the parsing callback. If sax is NULL, fallback to the default
11658 * DOM tree building routines.
11659 *
11660 * User data (void *) is stored within the parser context in the
11661 * context's _private member, so it is available nearly everywhere in libxml
11662 *
11663 * Returns the resulting document tree
11664 */
11665
11666xmlDocPtr
11667xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11668 int size, int recovery, void *data) {
11669 xmlDocPtr ret;
11670 xmlParserCtxtPtr ctxt;
11671
11672 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11673 if (ctxt == NULL) return(NULL);
11674 if (sax != NULL) {
11675 if (ctxt->sax != NULL)
11676 xmlFree(ctxt->sax);
11677 ctxt->sax = sax;
11678 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011679 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011680 if (data!=NULL) {
11681 ctxt->_private=data;
11682 }
11683
Daniel Veillardadba5f12003-04-04 16:09:01 +000011684 ctxt->recovery = recovery;
11685
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011686 xmlParseDocument(ctxt);
11687
11688 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11689 else {
11690 ret = NULL;
11691 xmlFreeDoc(ctxt->myDoc);
11692 ctxt->myDoc = NULL;
11693 }
11694 if (sax != NULL)
11695 ctxt->sax = NULL;
11696 xmlFreeParserCtxt(ctxt);
11697
11698 return(ret);
11699}
11700
11701/**
Owen Taylor3473f882001-02-23 17:55:21 +000011702 * xmlSAXParseMemory:
11703 * @sax: the SAX handler block
11704 * @buffer: an pointer to a char array
11705 * @size: the size of the array
11706 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11707 * documents
11708 *
11709 * parse an XML in-memory block and use the given SAX function block
11710 * to handle the parsing callback. If sax is NULL, fallback to the default
11711 * DOM tree building routines.
11712 *
11713 * Returns the resulting document tree
11714 */
11715xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011716xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11717 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011718 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011719}
11720
11721/**
11722 * xmlParseMemory:
11723 * @buffer: an pointer to a char array
11724 * @size: the size of the array
11725 *
11726 * parse an XML in-memory block and build a tree.
11727 *
11728 * Returns the resulting document tree
11729 */
11730
Daniel Veillard50822cb2001-07-26 20:05:51 +000011731xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011732 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11733}
11734
11735/**
11736 * xmlRecoverMemory:
11737 * @buffer: an pointer to a char array
11738 * @size: the size of the array
11739 *
11740 * parse an XML in-memory block and build a tree.
11741 * In the case the document is not Well Formed, a tree is built anyway
11742 *
11743 * Returns the resulting document tree
11744 */
11745
Daniel Veillard50822cb2001-07-26 20:05:51 +000011746xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011747 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11748}
11749
11750/**
11751 * xmlSAXUserParseMemory:
11752 * @sax: a SAX handler
11753 * @user_data: The user data returned on SAX callbacks
11754 * @buffer: an in-memory XML document input
11755 * @size: the length of the XML document in bytes
11756 *
11757 * A better SAX parsing routine.
11758 * parse an XML in-memory buffer and call the given SAX handler routines.
11759 *
11760 * Returns 0 in case of success or a error number otherwise
11761 */
11762int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011763 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011764 int ret = 0;
11765 xmlParserCtxtPtr ctxt;
11766 xmlSAXHandlerPtr oldsax = NULL;
11767
Daniel Veillard9e923512002-08-14 08:48:52 +000011768 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011769 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11770 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011771 oldsax = ctxt->sax;
11772 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011773 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011774 if (user_data != NULL)
11775 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011776
11777 xmlParseDocument(ctxt);
11778
11779 if (ctxt->wellFormed)
11780 ret = 0;
11781 else {
11782 if (ctxt->errNo != 0)
11783 ret = ctxt->errNo;
11784 else
11785 ret = -1;
11786 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011787 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011788 xmlFreeParserCtxt(ctxt);
11789
11790 return ret;
11791}
Daniel Veillard81273902003-09-30 00:43:48 +000011792#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011793
11794/**
11795 * xmlCreateDocParserCtxt:
11796 * @cur: a pointer to an array of xmlChar
11797 *
11798 * Creates a parser context for an XML in-memory document.
11799 *
11800 * Returns the new parser context or NULL
11801 */
11802xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011803xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011804 int len;
11805
11806 if (cur == NULL)
11807 return(NULL);
11808 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011809 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011810}
11811
Daniel Veillard81273902003-09-30 00:43:48 +000011812#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011813/**
11814 * xmlSAXParseDoc:
11815 * @sax: the SAX handler block
11816 * @cur: a pointer to an array of xmlChar
11817 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11818 * documents
11819 *
11820 * parse an XML in-memory document and build a tree.
11821 * It use the given SAX function block to handle the parsing callback.
11822 * If sax is NULL, fallback to the default DOM tree building routines.
11823 *
11824 * Returns the resulting document tree
11825 */
11826
11827xmlDocPtr
11828xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11829 xmlDocPtr ret;
11830 xmlParserCtxtPtr ctxt;
11831
11832 if (cur == NULL) return(NULL);
11833
11834
11835 ctxt = xmlCreateDocParserCtxt(cur);
11836 if (ctxt == NULL) return(NULL);
11837 if (sax != NULL) {
11838 ctxt->sax = sax;
11839 ctxt->userData = NULL;
11840 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011841 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011842
11843 xmlParseDocument(ctxt);
11844 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11845 else {
11846 ret = NULL;
11847 xmlFreeDoc(ctxt->myDoc);
11848 ctxt->myDoc = NULL;
11849 }
11850 if (sax != NULL)
11851 ctxt->sax = NULL;
11852 xmlFreeParserCtxt(ctxt);
11853
11854 return(ret);
11855}
11856
11857/**
11858 * xmlParseDoc:
11859 * @cur: a pointer to an array of xmlChar
11860 *
11861 * parse an XML in-memory document and build a tree.
11862 *
11863 * Returns the resulting document tree
11864 */
11865
11866xmlDocPtr
11867xmlParseDoc(xmlChar *cur) {
11868 return(xmlSAXParseDoc(NULL, cur, 0));
11869}
Daniel Veillard81273902003-09-30 00:43:48 +000011870#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011871
Daniel Veillard81273902003-09-30 00:43:48 +000011872#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011873/************************************************************************
11874 * *
11875 * Specific function to keep track of entities references *
11876 * and used by the XSLT debugger *
11877 * *
11878 ************************************************************************/
11879
11880static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11881
11882/**
11883 * xmlAddEntityReference:
11884 * @ent : A valid entity
11885 * @firstNode : A valid first node for children of entity
11886 * @lastNode : A valid last node of children entity
11887 *
11888 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11889 */
11890static void
11891xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11892 xmlNodePtr lastNode)
11893{
11894 if (xmlEntityRefFunc != NULL) {
11895 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11896 }
11897}
11898
11899
11900/**
11901 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011902 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011903 *
11904 * Set the function to call call back when a xml reference has been made
11905 */
11906void
11907xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11908{
11909 xmlEntityRefFunc = func;
11910}
Daniel Veillard81273902003-09-30 00:43:48 +000011911#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011912
11913/************************************************************************
11914 * *
11915 * Miscellaneous *
11916 * *
11917 ************************************************************************/
11918
11919#ifdef LIBXML_XPATH_ENABLED
11920#include <libxml/xpath.h>
11921#endif
11922
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011923extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011924static int xmlParserInitialized = 0;
11925
11926/**
11927 * xmlInitParser:
11928 *
11929 * Initialization function for the XML parser.
11930 * This is not reentrant. Call once before processing in case of
11931 * use in multithreaded programs.
11932 */
11933
11934void
11935xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011936 if (xmlParserInitialized != 0)
11937 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011938
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011939 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11940 (xmlGenericError == NULL))
11941 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011942 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011943 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011944 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011945 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011946 xmlDefaultSAXHandlerInit();
11947 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011948#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011949 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011950#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011951#ifdef LIBXML_HTML_ENABLED
11952 htmlInitAutoClose();
11953 htmlDefaultSAXHandlerInit();
11954#endif
11955#ifdef LIBXML_XPATH_ENABLED
11956 xmlXPathInit();
11957#endif
11958 xmlParserInitialized = 1;
11959}
11960
11961/**
11962 * xmlCleanupParser:
11963 *
11964 * Cleanup function for the XML parser. It tries to reclaim all
11965 * parsing related global memory allocated for the parser processing.
11966 * It doesn't deallocate any document related memory. Calling this
11967 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011968 * One should call xmlCleanupParser() only when the process has
11969 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011970 */
11971
11972void
11973xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011974 if (!xmlParserInitialized)
11975 return;
11976
Owen Taylor3473f882001-02-23 17:55:21 +000011977 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011978#ifdef LIBXML_CATALOG_ENABLED
11979 xmlCatalogCleanup();
11980#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011981 xmlCleanupInputCallbacks();
11982#ifdef LIBXML_OUTPUT_ENABLED
11983 xmlCleanupOutputCallbacks();
11984#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011985 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011986 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011987 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000011988 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011989}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011990
11991/************************************************************************
11992 * *
11993 * New set (2.6.0) of simpler and more flexible APIs *
11994 * *
11995 ************************************************************************/
11996
11997/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011998 * DICT_FREE:
11999 * @str: a string
12000 *
12001 * Free a string if it is not owned by the "dict" dictionnary in the
12002 * current scope
12003 */
12004#define DICT_FREE(str) \
12005 if ((str) && ((!dict) || \
12006 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12007 xmlFree((char *)(str));
12008
12009/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012010 * xmlCtxtReset:
12011 * @ctxt: an XML parser context
12012 *
12013 * Reset a parser context
12014 */
12015void
12016xmlCtxtReset(xmlParserCtxtPtr ctxt)
12017{
12018 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012019 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012020
12021 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12022 xmlFreeInputStream(input);
12023 }
12024 ctxt->inputNr = 0;
12025 ctxt->input = NULL;
12026
12027 ctxt->spaceNr = 0;
12028 ctxt->spaceTab[0] = -1;
12029 ctxt->space = &ctxt->spaceTab[0];
12030
12031
12032 ctxt->nodeNr = 0;
12033 ctxt->node = NULL;
12034
12035 ctxt->nameNr = 0;
12036 ctxt->name = NULL;
12037
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012038 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012039 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012040 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012041 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012042 DICT_FREE(ctxt->directory);
12043 ctxt->directory = NULL;
12044 DICT_FREE(ctxt->extSubURI);
12045 ctxt->extSubURI = NULL;
12046 DICT_FREE(ctxt->extSubSystem);
12047 ctxt->extSubSystem = NULL;
12048 if (ctxt->myDoc != NULL)
12049 xmlFreeDoc(ctxt->myDoc);
12050 ctxt->myDoc = NULL;
12051
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052 ctxt->standalone = -1;
12053 ctxt->hasExternalSubset = 0;
12054 ctxt->hasPErefs = 0;
12055 ctxt->html = 0;
12056 ctxt->external = 0;
12057 ctxt->instate = XML_PARSER_START;
12058 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012059
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012060 ctxt->wellFormed = 1;
12061 ctxt->nsWellFormed = 1;
12062 ctxt->valid = 1;
12063 ctxt->vctxt.userData = ctxt;
12064 ctxt->vctxt.error = xmlParserValidityError;
12065 ctxt->vctxt.warning = xmlParserValidityWarning;
12066 ctxt->record_info = 0;
12067 ctxt->nbChars = 0;
12068 ctxt->checkIndex = 0;
12069 ctxt->inSubset = 0;
12070 ctxt->errNo = XML_ERR_OK;
12071 ctxt->depth = 0;
12072 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12073 ctxt->catalogs = NULL;
12074 xmlInitNodeInfoSeq(&ctxt->node_seq);
12075
12076 if (ctxt->attsDefault != NULL) {
12077 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12078 ctxt->attsDefault = NULL;
12079 }
12080 if (ctxt->attsSpecial != NULL) {
12081 xmlHashFree(ctxt->attsSpecial, NULL);
12082 ctxt->attsSpecial = NULL;
12083 }
12084
Daniel Veillard4432df22003-09-28 18:58:27 +000012085#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012086 if (ctxt->catalogs != NULL)
12087 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012088#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012089}
12090
12091/**
12092 * xmlCtxtUseOptions:
12093 * @ctxt: an XML parser context
12094 * @options: a combination of xmlParserOption(s)
12095 *
12096 * Applies the options to the parser context
12097 *
12098 * Returns 0 in case of success, the set of unknown or unimplemented options
12099 * in case of error.
12100 */
12101int
12102xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12103{
12104 if (options & XML_PARSE_RECOVER) {
12105 ctxt->recovery = 1;
12106 options -= XML_PARSE_RECOVER;
12107 } else
12108 ctxt->recovery = 0;
12109 if (options & XML_PARSE_DTDLOAD) {
12110 ctxt->loadsubset = XML_DETECT_IDS;
12111 options -= XML_PARSE_DTDLOAD;
12112 } else
12113 ctxt->loadsubset = 0;
12114 if (options & XML_PARSE_DTDATTR) {
12115 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12116 options -= XML_PARSE_DTDATTR;
12117 }
12118 if (options & XML_PARSE_NOENT) {
12119 ctxt->replaceEntities = 1;
12120 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12121 options -= XML_PARSE_NOENT;
12122 } else
12123 ctxt->replaceEntities = 0;
12124 if (options & XML_PARSE_NOWARNING) {
12125 ctxt->sax->warning = NULL;
12126 options -= XML_PARSE_NOWARNING;
12127 }
12128 if (options & XML_PARSE_NOERROR) {
12129 ctxt->sax->error = NULL;
12130 ctxt->sax->fatalError = NULL;
12131 options -= XML_PARSE_NOERROR;
12132 }
12133 if (options & XML_PARSE_PEDANTIC) {
12134 ctxt->pedantic = 1;
12135 options -= XML_PARSE_PEDANTIC;
12136 } else
12137 ctxt->pedantic = 0;
12138 if (options & XML_PARSE_NOBLANKS) {
12139 ctxt->keepBlanks = 0;
12140 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12141 options -= XML_PARSE_NOBLANKS;
12142 } else
12143 ctxt->keepBlanks = 1;
12144 if (options & XML_PARSE_DTDVALID) {
12145 ctxt->validate = 1;
12146 if (options & XML_PARSE_NOWARNING)
12147 ctxt->vctxt.warning = NULL;
12148 if (options & XML_PARSE_NOERROR)
12149 ctxt->vctxt.error = NULL;
12150 options -= XML_PARSE_DTDVALID;
12151 } else
12152 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012153#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012154 if (options & XML_PARSE_SAX1) {
12155 ctxt->sax->startElement = xmlSAX2StartElement;
12156 ctxt->sax->endElement = xmlSAX2EndElement;
12157 ctxt->sax->startElementNs = NULL;
12158 ctxt->sax->endElementNs = NULL;
12159 ctxt->sax->initialized = 1;
12160 options -= XML_PARSE_SAX1;
12161 }
Daniel Veillard81273902003-09-30 00:43:48 +000012162#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012163 if (options & XML_PARSE_NODICT) {
12164 ctxt->dictNames = 0;
12165 options -= XML_PARSE_NODICT;
12166 } else {
12167 ctxt->dictNames = 1;
12168 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012169 if (options & XML_PARSE_NOCDATA) {
12170 ctxt->sax->cdataBlock = NULL;
12171 options -= XML_PARSE_NOCDATA;
12172 }
12173 if (options & XML_PARSE_NSCLEAN) {
12174 ctxt->options |= XML_PARSE_NSCLEAN;
12175 options -= XML_PARSE_NSCLEAN;
12176 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012177 return (options);
12178}
12179
12180/**
12181 * xmlDoRead:
12182 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012183 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012184 * @encoding: the document encoding, or NULL
12185 * @options: a combination of xmlParserOption(s)
12186 * @reuse: keep the context for reuse
12187 *
12188 * Common front-end for the xmlRead functions
12189 *
12190 * Returns the resulting document tree or NULL
12191 */
12192static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012193xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12194 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012195{
12196 xmlDocPtr ret;
12197
12198 xmlCtxtUseOptions(ctxt, options);
12199 if (encoding != NULL) {
12200 xmlCharEncodingHandlerPtr hdlr;
12201
12202 hdlr = xmlFindCharEncodingHandler(encoding);
12203 if (hdlr != NULL)
12204 xmlSwitchToEncoding(ctxt, hdlr);
12205 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012206 if ((URL != NULL) && (ctxt->input != NULL) &&
12207 (ctxt->input->filename == NULL))
12208 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012209 xmlParseDocument(ctxt);
12210 if ((ctxt->wellFormed) || ctxt->recovery)
12211 ret = ctxt->myDoc;
12212 else {
12213 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012214 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012215 if ((ctxt->dictNames) &&
12216 (ctxt->myDoc->dict == ctxt->dict))
12217 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012218 xmlFreeDoc(ctxt->myDoc);
12219 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012220 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012221 ctxt->myDoc = NULL;
12222 if (!reuse) {
12223 if ((ctxt->dictNames) &&
12224 (ret != NULL) &&
12225 (ret->dict == ctxt->dict))
12226 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012227 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012228 } else {
12229 /* Must duplicate the reference to the dictionary */
12230 if ((ctxt->dictNames) &&
12231 (ret != NULL) &&
12232 (ret->dict == ctxt->dict))
12233 xmlDictReference(ctxt->dict);
12234 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012235
12236 return (ret);
12237}
12238
12239/**
12240 * xmlReadDoc:
12241 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012242 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243 * @encoding: the document encoding, or NULL
12244 * @options: a combination of xmlParserOption(s)
12245 *
12246 * parse an XML in-memory document and build a tree.
12247 *
12248 * Returns the resulting document tree
12249 */
12250xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012251xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012252{
12253 xmlParserCtxtPtr ctxt;
12254
12255 if (cur == NULL)
12256 return (NULL);
12257
12258 ctxt = xmlCreateDocParserCtxt(cur);
12259 if (ctxt == NULL)
12260 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012261 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012262}
12263
12264/**
12265 * xmlReadFile:
12266 * @filename: a file or URL
12267 * @encoding: the document encoding, or NULL
12268 * @options: a combination of xmlParserOption(s)
12269 *
12270 * parse an XML file from the filesystem or the network.
12271 *
12272 * Returns the resulting document tree
12273 */
12274xmlDocPtr
12275xmlReadFile(const char *filename, const char *encoding, int options)
12276{
12277 xmlParserCtxtPtr ctxt;
12278
12279 ctxt = xmlCreateFileParserCtxt(filename);
12280 if (ctxt == NULL)
12281 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012282 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012283}
12284
12285/**
12286 * xmlReadMemory:
12287 * @buffer: a pointer to a char array
12288 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012289 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012290 * @encoding: the document encoding, or NULL
12291 * @options: a combination of xmlParserOption(s)
12292 *
12293 * parse an XML in-memory document and build a tree.
12294 *
12295 * Returns the resulting document tree
12296 */
12297xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012298xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012299{
12300 xmlParserCtxtPtr ctxt;
12301
12302 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12303 if (ctxt == NULL)
12304 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012305 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012306}
12307
12308/**
12309 * xmlReadFd:
12310 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012311 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012312 * @encoding: the document encoding, or NULL
12313 * @options: a combination of xmlParserOption(s)
12314 *
12315 * parse an XML from a file descriptor and build a tree.
12316 *
12317 * Returns the resulting document tree
12318 */
12319xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012320xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012321{
12322 xmlParserCtxtPtr ctxt;
12323 xmlParserInputBufferPtr input;
12324 xmlParserInputPtr stream;
12325
12326 if (fd < 0)
12327 return (NULL);
12328
12329 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12330 if (input == NULL)
12331 return (NULL);
12332 ctxt = xmlNewParserCtxt();
12333 if (ctxt == NULL) {
12334 xmlFreeParserInputBuffer(input);
12335 return (NULL);
12336 }
12337 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12338 if (stream == NULL) {
12339 xmlFreeParserInputBuffer(input);
12340 xmlFreeParserCtxt(ctxt);
12341 return (NULL);
12342 }
12343 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012344 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345}
12346
12347/**
12348 * xmlReadIO:
12349 * @ioread: an I/O read function
12350 * @ioclose: an I/O close function
12351 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012352 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012353 * @encoding: the document encoding, or NULL
12354 * @options: a combination of xmlParserOption(s)
12355 *
12356 * parse an XML document from I/O functions and source and build a tree.
12357 *
12358 * Returns the resulting document tree
12359 */
12360xmlDocPtr
12361xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012362 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012363{
12364 xmlParserCtxtPtr ctxt;
12365 xmlParserInputBufferPtr input;
12366 xmlParserInputPtr stream;
12367
12368 if (ioread == NULL)
12369 return (NULL);
12370
12371 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12372 XML_CHAR_ENCODING_NONE);
12373 if (input == NULL)
12374 return (NULL);
12375 ctxt = xmlNewParserCtxt();
12376 if (ctxt == NULL) {
12377 xmlFreeParserInputBuffer(input);
12378 return (NULL);
12379 }
12380 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12381 if (stream == NULL) {
12382 xmlFreeParserInputBuffer(input);
12383 xmlFreeParserCtxt(ctxt);
12384 return (NULL);
12385 }
12386 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012387 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012388}
12389
12390/**
12391 * xmlCtxtReadDoc:
12392 * @ctxt: an XML parser context
12393 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012394 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012395 * @encoding: the document encoding, or NULL
12396 * @options: a combination of xmlParserOption(s)
12397 *
12398 * parse an XML in-memory document and build a tree.
12399 * This reuses the existing @ctxt parser context
12400 *
12401 * Returns the resulting document tree
12402 */
12403xmlDocPtr
12404xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012405 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012406{
12407 xmlParserInputPtr stream;
12408
12409 if (cur == NULL)
12410 return (NULL);
12411 if (ctxt == NULL)
12412 return (NULL);
12413
12414 xmlCtxtReset(ctxt);
12415
12416 stream = xmlNewStringInputStream(ctxt, cur);
12417 if (stream == NULL) {
12418 return (NULL);
12419 }
12420 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012421 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012422}
12423
12424/**
12425 * xmlCtxtReadFile:
12426 * @ctxt: an XML parser context
12427 * @filename: a file or URL
12428 * @encoding: the document encoding, or NULL
12429 * @options: a combination of xmlParserOption(s)
12430 *
12431 * parse an XML file from the filesystem or the network.
12432 * This reuses the existing @ctxt parser context
12433 *
12434 * Returns the resulting document tree
12435 */
12436xmlDocPtr
12437xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12438 const char *encoding, int options)
12439{
12440 xmlParserInputPtr stream;
12441
12442 if (filename == NULL)
12443 return (NULL);
12444 if (ctxt == NULL)
12445 return (NULL);
12446
12447 xmlCtxtReset(ctxt);
12448
12449 stream = xmlNewInputFromFile(ctxt, filename);
12450 if (stream == NULL) {
12451 return (NULL);
12452 }
12453 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012454 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012455}
12456
12457/**
12458 * xmlCtxtReadMemory:
12459 * @ctxt: an XML parser context
12460 * @buffer: a pointer to a char array
12461 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012462 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012463 * @encoding: the document encoding, or NULL
12464 * @options: a combination of xmlParserOption(s)
12465 *
12466 * parse an XML in-memory document and build a tree.
12467 * This reuses the existing @ctxt parser context
12468 *
12469 * Returns the resulting document tree
12470 */
12471xmlDocPtr
12472xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012473 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012474{
12475 xmlParserInputBufferPtr input;
12476 xmlParserInputPtr stream;
12477
12478 if (ctxt == NULL)
12479 return (NULL);
12480 if (buffer == NULL)
12481 return (NULL);
12482
12483 xmlCtxtReset(ctxt);
12484
12485 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12486 if (input == NULL) {
12487 return(NULL);
12488 }
12489
12490 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12491 if (stream == NULL) {
12492 xmlFreeParserInputBuffer(input);
12493 return(NULL);
12494 }
12495
12496 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012497 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012498}
12499
12500/**
12501 * xmlCtxtReadFd:
12502 * @ctxt: an XML parser context
12503 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012504 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012505 * @encoding: the document encoding, or NULL
12506 * @options: a combination of xmlParserOption(s)
12507 *
12508 * parse an XML from a file descriptor and build a tree.
12509 * This reuses the existing @ctxt parser context
12510 *
12511 * Returns the resulting document tree
12512 */
12513xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012514xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12515 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012516{
12517 xmlParserInputBufferPtr input;
12518 xmlParserInputPtr stream;
12519
12520 if (fd < 0)
12521 return (NULL);
12522 if (ctxt == NULL)
12523 return (NULL);
12524
12525 xmlCtxtReset(ctxt);
12526
12527
12528 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12529 if (input == NULL)
12530 return (NULL);
12531 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12532 if (stream == NULL) {
12533 xmlFreeParserInputBuffer(input);
12534 return (NULL);
12535 }
12536 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012537 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012538}
12539
12540/**
12541 * xmlCtxtReadIO:
12542 * @ctxt: an XML parser context
12543 * @ioread: an I/O read function
12544 * @ioclose: an I/O close function
12545 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012546 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012547 * @encoding: the document encoding, or NULL
12548 * @options: a combination of xmlParserOption(s)
12549 *
12550 * parse an XML document from I/O functions and source and build a tree.
12551 * This reuses the existing @ctxt parser context
12552 *
12553 * Returns the resulting document tree
12554 */
12555xmlDocPtr
12556xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12557 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012558 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012559 const char *encoding, int options)
12560{
12561 xmlParserInputBufferPtr input;
12562 xmlParserInputPtr stream;
12563
12564 if (ioread == NULL)
12565 return (NULL);
12566 if (ctxt == NULL)
12567 return (NULL);
12568
12569 xmlCtxtReset(ctxt);
12570
12571 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12572 XML_CHAR_ENCODING_NONE);
12573 if (input == NULL)
12574 return (NULL);
12575 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12576 if (stream == NULL) {
12577 xmlFreeParserInputBuffer(input);
12578 return (NULL);
12579 }
12580 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012581 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012582}