blob: ccec4364b6931cda6aa28a65033dbd59fc371beb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
105/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000106xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
107 const xmlChar **str);
108
Daniel Veillard7d515752003-09-26 19:12:37 +0000109static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
111 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000112 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000114
Daniel Veillard81273902003-09-30 00:43:48 +0000115#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000116static void
117xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
118 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000120
Daniel Veillard7d515752003-09-26 19:12:37 +0000121static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000122xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
123 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000124
125/************************************************************************
126 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000127 * Some factorized error routines *
128 * *
129 ************************************************************************/
130
131/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000132 * xmlErrAttributeDup:
133 * @ctxt: an XML parser context
134 * @prefix: the attribute prefix
135 * @localname: the attribute localname
136 *
137 * Handle a redefinition of attribute error
138 */
139static void
140xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
141 const xmlChar * localname)
142{
143 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000144 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000145 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000146 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
147 (const char *) localname, NULL, NULL, 0, 0,
148 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000149 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000150 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000151 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
152 (const char *) prefix, (const char *) localname,
153 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
154 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000155 ctxt->wellFormed = 0;
156 if (ctxt->recovery == 0)
157 ctxt->disableSAX = 1;
158}
159
160/**
161 * xmlFatalErr:
162 * @ctxt: an XML parser context
163 * @error: the error number
164 * @extra: extra information string
165 *
166 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
167 */
168static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000169xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000170{
171 const char *errmsg;
172
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000173 switch (error) {
174 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000175 errmsg = "CharRef: invalid hexadecimal value\n";
176 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000178 errmsg = "CharRef: invalid decimal value\n";
179 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000181 errmsg = "CharRef: invalid value\n";
182 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000184 errmsg = "internal error";
185 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000187 errmsg = "PEReference at end of document\n";
188 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000190 errmsg = "PEReference in prolog\n";
191 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000193 errmsg = "PEReference in epilog\n";
194 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000195 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000196 errmsg = "PEReference: no name\n";
197 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000198 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000199 errmsg = "PEReference: expecting ';'\n";
200 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000201 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000202 errmsg = "Detected an entity reference loop\n";
203 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000204 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000205 errmsg = "EntityValue: \" or ' expected\n";
206 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000207 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000208 errmsg = "PEReferences forbidden in internal subset\n";
209 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000210 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000211 errmsg = "EntityValue: \" or ' expected\n";
212 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000213 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000214 errmsg = "AttValue: \" or ' expected\n";
215 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000216 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000217 errmsg = "Unescaped '<' not allowed in attributes values\n";
218 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000219 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000220 errmsg = "SystemLiteral \" or ' expected\n";
221 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000222 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000223 errmsg = "Unfinished System or Public ID \" or ' expected\n";
224 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000225 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000226 errmsg = "Sequence ']]>' not allowed in content\n";
227 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000229 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
230 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000231 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000232 errmsg = "PUBLIC, the Public Identifier is missing\n";
233 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000234 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000235 errmsg = "Comment must not contain '--' (double-hyphen)\n";
236 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000238 errmsg = "xmlParsePI : no target name\n";
239 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000241 errmsg = "Invalid PI name\n";
242 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000243 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000244 errmsg = "NOTATION: Name expected here\n";
245 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000246 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000247 errmsg = "'>' required to close NOTATION declaration\n";
248 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000249 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000250 errmsg = "Entity value required\n";
251 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000253 errmsg = "Fragment not allowed";
254 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000255 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 errmsg = "'(' required to start ATTLIST enumeration\n";
257 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000258 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000259 errmsg = "NmToken expected in ATTLIST enumeration\n";
260 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000261 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 errmsg = "')' required to finish ATTLIST enumeration\n";
263 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000264 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
266 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000267 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000268 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
269 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000271 errmsg = "ContentDecl : Name or '(' expected\n";
272 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000273 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000274 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
275 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000276 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000277 errmsg =
278 "PEReference: forbidden within markup decl in internal subset\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "expected '>'\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "XML conditional section '[' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "Content error in the external subset\n";
288 break;
289 case XML_ERR_CONDSEC_INVALID_KEYWORD:
290 errmsg =
291 "conditional section INCLUDE or IGNORE keyword expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section not closed\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Text declaration '<?xml' required\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "parsing XML declaration: '?>' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "external parsed entities cannot be standalone\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "EntityRef: expecting ';'\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "DOCTYPE improperly terminated\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "EndTag: '</' not found\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "expected '='\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "String not closed expecting \" or '\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "String not started expecting ' or \"\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "Invalid XML encoding name\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "standalone accepts only 'yes' or 'no'\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "Document is empty\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Extra content at the end of the document\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "chunk is not well balanced\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "extra content at the end of well balanced chunk\n";
340 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000341 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Malformed declaration expecting version\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 case:
346 errmsg = "\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 default:
350 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 }
352 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000353 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
355 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 ctxt->wellFormed = 0;
357 if (ctxt->recovery == 0)
358 ctxt->disableSAX = 1;
359}
360
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361/**
362 * xmlFatalErrMsg:
363 * @ctxt: an XML parser context
364 * @error: the error number
365 * @msg: the error message
366 *
367 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
368 */
369static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
371 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000372{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000374 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379}
380
381/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000382 * xmlWarningMsg:
383 * @ctxt: an XML parser context
384 * @error: the error number
385 * @msg: the error message
386 * @str1: extra data
387 * @str2: extra data
388 *
389 * Handle a warning.
390 */
391static void
392xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg, const xmlChar *str1, const xmlChar *str2)
394{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000395 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000396
Daniel Veillard24eb9782003-10-04 21:08:09 +0000397 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000398 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000399 schannel = ctxt->sax->serror;
400 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000401 (ctxt->sax) ? ctxt->sax->warning : NULL,
402 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000403 ctxt, NULL, XML_FROM_PARSER, error,
404 XML_ERR_WARNING, NULL, 0,
405 (const char *) str1, (const char *) str2, NULL, 0, 0,
406 msg, (const char *) str1, (const char *) str2);
407}
408
409/**
410 * xmlValidityError:
411 * @ctxt: an XML parser context
412 * @error: the error number
413 * @msg: the error message
414 * @str1: extra data
415 *
416 * Handle a warning.
417 */
418static void
419xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
420 const char *msg, const xmlChar *str1)
421{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000422 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000423 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000424 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000425 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000426 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000427 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_DTD, error,
429 XML_ERR_ERROR, NULL, 0, (const char *) str1,
430 NULL, NULL, 0, 0,
431 msg, (const char *) str1);
432 ctxt->valid = 0;
433}
434
435/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000436 * xmlFatalErrMsgInt:
437 * @ctxt: an XML parser context
438 * @error: the error number
439 * @msg: the error message
440 * @val: an integer value
441 *
442 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
443 */
444static void
445xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000447{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000448 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000449 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
451 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000452 ctxt->wellFormed = 0;
453 if (ctxt->recovery == 0)
454 ctxt->disableSAX = 1;
455}
456
457/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000458 * xmlFatalErrMsgStrIntStr:
459 * @ctxt: an XML parser context
460 * @error: the error number
461 * @msg: the error message
462 * @str1: an string info
463 * @val: an integer value
464 * @str2: an string info
465 *
466 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
467 */
468static void
469xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
470 const char *msg, const xmlChar *str1, int val,
471 const xmlChar *str2)
472{
473 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000474 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000475 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
476 NULL, 0, (const char *) str1, (const char *) str2,
477 NULL, val, 0, msg, str1, val, str2);
478 ctxt->wellFormed = 0;
479 if (ctxt->recovery == 0)
480 ctxt->disableSAX = 1;
481}
482
483/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000484 * xmlFatalErrMsgStr:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 * @val: a string value
489 *
490 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
491 */
492static void
493xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000495{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
500 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000507 * xmlErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a non fatal parser error
514 */
515static void
516xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar * val)
518{
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 XML_FROM_PARSER, error, XML_ERR_ERROR,
522 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
523 val);
524}
525
526/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000527 * xmlNsErr:
528 * @ctxt: an XML parser context
529 * @error: the error number
530 * @msg: the message
531 * @info1: extra information string
532 * @info2: extra information string
533 *
534 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
535 */
536static void
537xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
538 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const xmlChar * info1, const xmlChar * info2,
540 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000541{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000542 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000544 XML_ERR_ERROR, NULL, 0, (const char *) info1,
545 (const char *) info2, (const char *) info3, 0, 0, msg,
546 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000547 ctxt->nsWellFormed = 0;
548}
549
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000550/************************************************************************
551 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000552 * SAX2 defaulted attributes handling *
553 * *
554 ************************************************************************/
555
556/**
557 * xmlDetectSAX2:
558 * @ctxt: an XML parser context
559 *
560 * Do the SAX2 detection and specific intialization
561 */
562static void
563xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
564 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000565#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000566 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
567 ((ctxt->sax->startElementNs != NULL) ||
568 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000569#else
570 ctxt->sax2 = 1;
571#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000572
573 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
574 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
575 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
576}
577
Daniel Veillarde57ec792003-09-10 10:50:59 +0000578typedef struct _xmlDefAttrs xmlDefAttrs;
579typedef xmlDefAttrs *xmlDefAttrsPtr;
580struct _xmlDefAttrs {
581 int nbAttrs; /* number of defaulted attributes on that element */
582 int maxAttrs; /* the size of the array */
583 const xmlChar *values[4]; /* array of localname/prefix/values */
584};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000585
586/**
587 * xmlAddDefAttrs:
588 * @ctxt: an XML parser context
589 * @fullname: the element fullname
590 * @fullattr: the attribute fullname
591 * @value: the attribute value
592 *
593 * Add a defaulted attribute for an element
594 */
595static void
596xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
597 const xmlChar *fullname,
598 const xmlChar *fullattr,
599 const xmlChar *value) {
600 xmlDefAttrsPtr defaults;
601 int len;
602 const xmlChar *name;
603 const xmlChar *prefix;
604
605 if (ctxt->attsDefault == NULL) {
606 ctxt->attsDefault = xmlHashCreate(10);
607 if (ctxt->attsDefault == NULL)
608 goto mem_error;
609 }
610
611 /*
612 * plit the element name into prefix:localname , the string found
613 * are within the DTD and hen not associated to namespace names.
614 */
615 name = xmlSplitQName3(fullname, &len);
616 if (name == NULL) {
617 name = xmlDictLookup(ctxt->dict, fullname, -1);
618 prefix = NULL;
619 } else {
620 name = xmlDictLookup(ctxt->dict, name, -1);
621 prefix = xmlDictLookup(ctxt->dict, fullname, len);
622 }
623
624 /*
625 * make sure there is some storage
626 */
627 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
628 if (defaults == NULL) {
629 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
630 12 * sizeof(const xmlChar *));
631 if (defaults == NULL)
632 goto mem_error;
633 defaults->maxAttrs = 4;
634 defaults->nbAttrs = 0;
635 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
636 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
637 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
638 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
639 if (defaults == NULL)
640 goto mem_error;
641 defaults->maxAttrs *= 2;
642 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
643 }
644
645 /*
646 * plit the element name into prefix:localname , the string found
647 * are within the DTD and hen not associated to namespace names.
648 */
649 name = xmlSplitQName3(fullattr, &len);
650 if (name == NULL) {
651 name = xmlDictLookup(ctxt->dict, fullattr, -1);
652 prefix = NULL;
653 } else {
654 name = xmlDictLookup(ctxt->dict, name, -1);
655 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
656 }
657
658 defaults->values[4 * defaults->nbAttrs] = name;
659 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
660 /* intern the string and precompute the end */
661 len = xmlStrlen(value);
662 value = xmlDictLookup(ctxt->dict, value, len);
663 defaults->values[4 * defaults->nbAttrs + 2] = value;
664 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
665 defaults->nbAttrs++;
666
667 return;
668
669mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000670 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 return;
672}
673
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000674/**
675 * xmlAddSpecialAttr:
676 * @ctxt: an XML parser context
677 * @fullname: the element fullname
678 * @fullattr: the attribute fullname
679 * @type: the attribute type
680 *
681 * Register that this attribute is not CDATA
682 */
683static void
684xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
685 const xmlChar *fullname,
686 const xmlChar *fullattr,
687 int type)
688{
689 if (ctxt->attsSpecial == NULL) {
690 ctxt->attsSpecial = xmlHashCreate(10);
691 if (ctxt->attsSpecial == NULL)
692 goto mem_error;
693 }
694
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000695 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
696 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000697 return;
698
699mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000700 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000701 return;
702}
703
Daniel Veillard4432df22003-09-28 18:58:27 +0000704/**
705 * xmlCheckLanguageID:
706 * @lang: pointer to the string value
707 *
708 * Checks that the value conforms to the LanguageID production:
709 *
710 * NOTE: this is somewhat deprecated, those productions were removed from
711 * the XML Second edition.
712 *
713 * [33] LanguageID ::= Langcode ('-' Subcode)*
714 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
715 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
716 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
717 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
718 * [38] Subcode ::= ([a-z] | [A-Z])+
719 *
720 * Returns 1 if correct 0 otherwise
721 **/
722int
723xmlCheckLanguageID(const xmlChar * lang)
724{
725 const xmlChar *cur = lang;
726
727 if (cur == NULL)
728 return (0);
729 if (((cur[0] == 'i') && (cur[1] == '-')) ||
730 ((cur[0] == 'I') && (cur[1] == '-'))) {
731 /*
732 * IANA code
733 */
734 cur += 2;
735 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
736 ((cur[0] >= 'a') && (cur[0] <= 'z')))
737 cur++;
738 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
739 ((cur[0] == 'X') && (cur[1] == '-'))) {
740 /*
741 * User code
742 */
743 cur += 2;
744 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
745 ((cur[0] >= 'a') && (cur[0] <= 'z')))
746 cur++;
747 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
748 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
749 /*
750 * ISO639
751 */
752 cur++;
753 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
754 ((cur[0] >= 'a') && (cur[0] <= 'z')))
755 cur++;
756 else
757 return (0);
758 } else
759 return (0);
760 while (cur[0] != 0) { /* non input consuming */
761 if (cur[0] != '-')
762 return (0);
763 cur++;
764 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
765 ((cur[0] >= 'a') && (cur[0] <= 'z')))
766 cur++;
767 else
768 return (0);
769 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
770 ((cur[0] >= 'a') && (cur[0] <= 'z')))
771 cur++;
772 }
773 return (1);
774}
775
Owen Taylor3473f882001-02-23 17:55:21 +0000776/************************************************************************
777 * *
778 * Parser stacks related functions and macros *
779 * *
780 ************************************************************************/
781
782xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
783 const xmlChar ** str);
784
Daniel Veillard0fb18932003-09-07 09:14:37 +0000785#ifdef SAX2
786/**
787 * nsPush:
788 * @ctxt: an XML parser context
789 * @prefix: the namespace prefix or NULL
790 * @URL: the namespace name
791 *
792 * Pushes a new parser namespace on top of the ns stack
793 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000794 * Returns -1 in case of error, -2 if the namespace should be discarded
795 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000796 */
797static int
798nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
799{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000800 if (ctxt->options & XML_PARSE_NSCLEAN) {
801 int i;
802 for (i = 0;i < ctxt->nsNr;i += 2) {
803 if (ctxt->nsTab[i] == prefix) {
804 /* in scope */
805 if (ctxt->nsTab[i + 1] == URL)
806 return(-2);
807 /* out of scope keep it */
808 break;
809 }
810 }
811 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000812 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
813 ctxt->nsMax = 10;
814 ctxt->nsNr = 0;
815 ctxt->nsTab = (const xmlChar **)
816 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
817 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000818 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000819 ctxt->nsMax = 0;
820 return (-1);
821 }
822 } else if (ctxt->nsNr >= ctxt->nsMax) {
823 ctxt->nsMax *= 2;
824 ctxt->nsTab = (const xmlChar **)
825 xmlRealloc(ctxt->nsTab,
826 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
827 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000828 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000829 ctxt->nsMax /= 2;
830 return (-1);
831 }
832 }
833 ctxt->nsTab[ctxt->nsNr++] = prefix;
834 ctxt->nsTab[ctxt->nsNr++] = URL;
835 return (ctxt->nsNr);
836}
837/**
838 * nsPop:
839 * @ctxt: an XML parser context
840 * @nr: the number to pop
841 *
842 * Pops the top @nr parser prefix/namespace from the ns stack
843 *
844 * Returns the number of namespaces removed
845 */
846static int
847nsPop(xmlParserCtxtPtr ctxt, int nr)
848{
849 int i;
850
851 if (ctxt->nsTab == NULL) return(0);
852 if (ctxt->nsNr < nr) {
853 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
854 nr = ctxt->nsNr;
855 }
856 if (ctxt->nsNr <= 0)
857 return (0);
858
859 for (i = 0;i < nr;i++) {
860 ctxt->nsNr--;
861 ctxt->nsTab[ctxt->nsNr] = NULL;
862 }
863 return(nr);
864}
865#endif
866
867static int
868xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
869 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000870 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 int maxatts;
872
873 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000874 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000875 atts = (const xmlChar **)
876 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000878 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
880 if (attallocs == NULL) goto mem_error;
881 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000882 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 } else if (nr + 5 > ctxt->maxatts) {
884 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000885 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
886 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000887 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000888 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000889 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
890 (maxatts / 5) * sizeof(int));
891 if (attallocs == NULL) goto mem_error;
892 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000893 ctxt->maxatts = maxatts;
894 }
895 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000896mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000897 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000898 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000899}
900
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000901/**
902 * inputPush:
903 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000904 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000905 *
906 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000907 *
908 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000909 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000910extern int
911inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
912{
913 if (ctxt->inputNr >= ctxt->inputMax) {
914 ctxt->inputMax *= 2;
915 ctxt->inputTab =
916 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
917 ctxt->inputMax *
918 sizeof(ctxt->inputTab[0]));
919 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000920 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000921 return (0);
922 }
923 }
924 ctxt->inputTab[ctxt->inputNr] = value;
925 ctxt->input = value;
926 return (ctxt->inputNr++);
927}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000928/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000929 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000930 * @ctxt: an XML parser context
931 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000932 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000934 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000935 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000936extern xmlParserInputPtr
937inputPop(xmlParserCtxtPtr ctxt)
938{
939 xmlParserInputPtr ret;
940
941 if (ctxt->inputNr <= 0)
942 return (0);
943 ctxt->inputNr--;
944 if (ctxt->inputNr > 0)
945 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
946 else
947 ctxt->input = NULL;
948 ret = ctxt->inputTab[ctxt->inputNr];
949 ctxt->inputTab[ctxt->inputNr] = 0;
950 return (ret);
951}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000952/**
953 * nodePush:
954 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000955 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000956 *
957 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000958 *
959 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961extern int
962nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
963{
964 if (ctxt->nodeNr >= ctxt->nodeMax) {
965 ctxt->nodeMax *= 2;
966 ctxt->nodeTab =
967 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
968 ctxt->nodeMax *
969 sizeof(ctxt->nodeTab[0]));
970 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000971 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 return (0);
973 }
974 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000975 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000976 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000977 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
978 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000979 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000980 return(0);
981 }
Daniel Veillard1c732d22002-11-30 11:22:59 +0000982 ctxt->nodeTab[ctxt->nodeNr] = value;
983 ctxt->node = value;
984 return (ctxt->nodeNr++);
985}
986/**
987 * nodePop:
988 * @ctxt: an XML parser context
989 *
990 * Pops the top element node from the node stack
991 *
992 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000993 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000994extern xmlNodePtr
995nodePop(xmlParserCtxtPtr ctxt)
996{
997 xmlNodePtr ret;
998
999 if (ctxt->nodeNr <= 0)
1000 return (0);
1001 ctxt->nodeNr--;
1002 if (ctxt->nodeNr > 0)
1003 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1004 else
1005 ctxt->node = NULL;
1006 ret = ctxt->nodeTab[ctxt->nodeNr];
1007 ctxt->nodeTab[ctxt->nodeNr] = 0;
1008 return (ret);
1009}
1010/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001011 * nameNsPush:
1012 * @ctxt: an XML parser context
1013 * @value: the element name
1014 * @prefix: the element prefix
1015 * @URI: the element namespace name
1016 *
1017 * Pushes a new element name/prefix/URL on top of the name stack
1018 *
1019 * Returns -1 in case of error, the index in the stack otherwise
1020 */
1021static int
1022nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1023 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1024{
1025 if (ctxt->nameNr >= ctxt->nameMax) {
1026 const xmlChar * *tmp;
1027 void **tmp2;
1028 ctxt->nameMax *= 2;
1029 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1030 ctxt->nameMax *
1031 sizeof(ctxt->nameTab[0]));
1032 if (tmp == NULL) {
1033 ctxt->nameMax /= 2;
1034 goto mem_error;
1035 }
1036 ctxt->nameTab = tmp;
1037 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1038 ctxt->nameMax * 3 *
1039 sizeof(ctxt->pushTab[0]));
1040 if (tmp2 == NULL) {
1041 ctxt->nameMax /= 2;
1042 goto mem_error;
1043 }
1044 ctxt->pushTab = tmp2;
1045 }
1046 ctxt->nameTab[ctxt->nameNr] = value;
1047 ctxt->name = value;
1048 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1049 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001050 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001051 return (ctxt->nameNr++);
1052mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001053 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001054 return (-1);
1055}
1056/**
1057 * nameNsPop:
1058 * @ctxt: an XML parser context
1059 *
1060 * Pops the top element/prefix/URI name from the name stack
1061 *
1062 * Returns the name just removed
1063 */
1064static const xmlChar *
1065nameNsPop(xmlParserCtxtPtr ctxt)
1066{
1067 const xmlChar *ret;
1068
1069 if (ctxt->nameNr <= 0)
1070 return (0);
1071 ctxt->nameNr--;
1072 if (ctxt->nameNr > 0)
1073 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1074 else
1075 ctxt->name = NULL;
1076 ret = ctxt->nameTab[ctxt->nameNr];
1077 ctxt->nameTab[ctxt->nameNr] = NULL;
1078 return (ret);
1079}
1080
1081/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001082 * namePush:
1083 * @ctxt: an XML parser context
1084 * @value: the element name
1085 *
1086 * Pushes a new element name on top of the name stack
1087 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001088 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001089 */
1090extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001091namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001092{
1093 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001094 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001095 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001097 ctxt->nameMax *
1098 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001099 if (tmp == NULL) {
1100 ctxt->nameMax /= 2;
1101 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001102 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001104 }
1105 ctxt->nameTab[ctxt->nameNr] = value;
1106 ctxt->name = value;
1107 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001108mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001109 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001110 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001111}
1112/**
1113 * namePop:
1114 * @ctxt: an XML parser context
1115 *
1116 * Pops the top element name from the name stack
1117 *
1118 * Returns the name just removed
1119 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001120extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121namePop(xmlParserCtxtPtr ctxt)
1122{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124
1125 if (ctxt->nameNr <= 0)
1126 return (0);
1127 ctxt->nameNr--;
1128 if (ctxt->nameNr > 0)
1129 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1130 else
1131 ctxt->name = NULL;
1132 ret = ctxt->nameTab[ctxt->nameNr];
1133 ctxt->nameTab[ctxt->nameNr] = 0;
1134 return (ret);
1135}
Owen Taylor3473f882001-02-23 17:55:21 +00001136
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001137static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001138 if (ctxt->spaceNr >= ctxt->spaceMax) {
1139 ctxt->spaceMax *= 2;
1140 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1141 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1142 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001143 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001144 return(0);
1145 }
1146 }
1147 ctxt->spaceTab[ctxt->spaceNr] = val;
1148 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1149 return(ctxt->spaceNr++);
1150}
1151
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001152static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001153 int ret;
1154 if (ctxt->spaceNr <= 0) return(0);
1155 ctxt->spaceNr--;
1156 if (ctxt->spaceNr > 0)
1157 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1158 else
1159 ctxt->space = NULL;
1160 ret = ctxt->spaceTab[ctxt->spaceNr];
1161 ctxt->spaceTab[ctxt->spaceNr] = -1;
1162 return(ret);
1163}
1164
1165/*
1166 * Macros for accessing the content. Those should be used only by the parser,
1167 * and not exported.
1168 *
1169 * Dirty macros, i.e. one often need to make assumption on the context to
1170 * use them
1171 *
1172 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1173 * To be used with extreme caution since operations consuming
1174 * characters may move the input buffer to a different location !
1175 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1176 * This should be used internally by the parser
1177 * only to compare to ASCII values otherwise it would break when
1178 * running with UTF-8 encoding.
1179 * RAW same as CUR but in the input buffer, bypass any token
1180 * extraction that may have been done
1181 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1182 * to compare on ASCII based substring.
1183 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001184 * strings without newlines within the parser.
1185 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1186 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001187 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1188 *
1189 * NEXT Skip to the next character, this does the proper decoding
1190 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001191 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001192 * CUR_CHAR(l) returns the current unicode character (int), set l
1193 * to the number of xmlChars used for the encoding [0-5].
1194 * CUR_SCHAR same but operate on a string instead of the context
1195 * COPY_BUF copy the current unicode char to the target buffer, increment
1196 * the index
1197 * GROW, SHRINK handling of input buffers
1198 */
1199
Daniel Veillardfdc91562002-07-01 21:52:03 +00001200#define RAW (*ctxt->input->cur)
1201#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001202#define NXT(val) ctxt->input->cur[(val)]
1203#define CUR_PTR ctxt->input->cur
1204
1205#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001206 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001207 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001208 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001209 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1210 xmlPopInput(ctxt); \
1211 } while (0)
1212
Daniel Veillarda880b122003-04-21 21:36:41 +00001213#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001214 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1215 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001216 xmlSHRINK (ctxt);
1217
1218static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1219 xmlParserInputShrink(ctxt->input);
1220 if ((*ctxt->input->cur == 0) &&
1221 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1222 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001223 }
Owen Taylor3473f882001-02-23 17:55:21 +00001224
Daniel Veillarda880b122003-04-21 21:36:41 +00001225#define GROW if ((ctxt->progressive == 0) && \
1226 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001227 xmlGROW (ctxt);
1228
1229static void xmlGROW (xmlParserCtxtPtr ctxt) {
1230 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1231 if ((*ctxt->input->cur == 0) &&
1232 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1233 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001234}
Owen Taylor3473f882001-02-23 17:55:21 +00001235
1236#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1237
1238#define NEXT xmlNextChar(ctxt)
1239
Daniel Veillard21a0f912001-02-25 19:54:14 +00001240#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001241 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001242 ctxt->input->cur++; \
1243 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001244 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001245 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1246 }
1247
Owen Taylor3473f882001-02-23 17:55:21 +00001248#define NEXTL(l) do { \
1249 if (*(ctxt->input->cur) == '\n') { \
1250 ctxt->input->line++; ctxt->input->col = 1; \
1251 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001252 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001253 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001254 } while (0)
1255
1256#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1257#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1258
1259#define COPY_BUF(l,b,i,v) \
1260 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001261 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001262
1263/**
1264 * xmlSkipBlankChars:
1265 * @ctxt: the XML parser context
1266 *
1267 * skip all blanks character found at that point in the input streams.
1268 * It pops up finished entities in the process if allowable at that point.
1269 *
1270 * Returns the number of space chars skipped
1271 */
1272
1273int
1274xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001275 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001276
1277 /*
1278 * It's Okay to use CUR/NEXT here since all the blanks are on
1279 * the ASCII range.
1280 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001281 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1282 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001283 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001284 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001285 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001286 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001287 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001288 if (*cur == '\n') {
1289 ctxt->input->line++; ctxt->input->col = 1;
1290 }
1291 cur++;
1292 res++;
1293 if (*cur == 0) {
1294 ctxt->input->cur = cur;
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 cur = ctxt->input->cur;
1297 }
1298 }
1299 ctxt->input->cur = cur;
1300 } else {
1301 int cur;
1302 do {
1303 cur = CUR;
1304 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1305 NEXT;
1306 cur = CUR;
1307 res++;
1308 }
1309 while ((cur == 0) && (ctxt->inputNr > 1) &&
1310 (ctxt->instate != XML_PARSER_COMMENT)) {
1311 xmlPopInput(ctxt);
1312 cur = CUR;
1313 }
1314 /*
1315 * Need to handle support of entities branching here
1316 */
1317 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1318 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1319 }
Owen Taylor3473f882001-02-23 17:55:21 +00001320 return(res);
1321}
1322
1323/************************************************************************
1324 * *
1325 * Commodity functions to handle entities *
1326 * *
1327 ************************************************************************/
1328
1329/**
1330 * xmlPopInput:
1331 * @ctxt: an XML parser context
1332 *
1333 * xmlPopInput: the current input pointed by ctxt->input came to an end
1334 * pop it and return the next char.
1335 *
1336 * Returns the current xmlChar in the parser context
1337 */
1338xmlChar
1339xmlPopInput(xmlParserCtxtPtr ctxt) {
1340 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1341 if (xmlParserDebugEntities)
1342 xmlGenericError(xmlGenericErrorContext,
1343 "Popping input %d\n", ctxt->inputNr);
1344 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001345 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1347 return(xmlPopInput(ctxt));
1348 return(CUR);
1349}
1350
1351/**
1352 * xmlPushInput:
1353 * @ctxt: an XML parser context
1354 * @input: an XML parser input fragment (entity, XML fragment ...).
1355 *
1356 * xmlPushInput: switch to a new input stream which is stacked on top
1357 * of the previous one(s).
1358 */
1359void
1360xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1361 if (input == NULL) return;
1362
1363 if (xmlParserDebugEntities) {
1364 if ((ctxt->input != NULL) && (ctxt->input->filename))
1365 xmlGenericError(xmlGenericErrorContext,
1366 "%s(%d): ", ctxt->input->filename,
1367 ctxt->input->line);
1368 xmlGenericError(xmlGenericErrorContext,
1369 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1370 }
1371 inputPush(ctxt, input);
1372 GROW;
1373}
1374
1375/**
1376 * xmlParseCharRef:
1377 * @ctxt: an XML parser context
1378 *
1379 * parse Reference declarations
1380 *
1381 * [66] CharRef ::= '&#' [0-9]+ ';' |
1382 * '&#x' [0-9a-fA-F]+ ';'
1383 *
1384 * [ WFC: Legal Character ]
1385 * Characters referred to using character references must match the
1386 * production for Char.
1387 *
1388 * Returns the value parsed (as an int), 0 in case of error
1389 */
1390int
1391xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001392 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001393 int count = 0;
1394
Owen Taylor3473f882001-02-23 17:55:21 +00001395 /*
1396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1397 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001398 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001399 (NXT(2) == 'x')) {
1400 SKIP(3);
1401 GROW;
1402 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001403 if (count++ > 20) {
1404 count = 0;
1405 GROW;
1406 }
1407 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001408 val = val * 16 + (CUR - '0');
1409 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1410 val = val * 16 + (CUR - 'a') + 10;
1411 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1412 val = val * 16 + (CUR - 'A') + 10;
1413 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001415 val = 0;
1416 break;
1417 }
1418 NEXT;
1419 count++;
1420 }
1421 if (RAW == ';') {
1422 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001423 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001424 ctxt->nbChars ++;
1425 ctxt->input->cur++;
1426 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001427 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001428 SKIP(2);
1429 GROW;
1430 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001431 if (count++ > 20) {
1432 count = 0;
1433 GROW;
1434 }
1435 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001436 val = val * 10 + (CUR - '0');
1437 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001438 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001439 val = 0;
1440 break;
1441 }
1442 NEXT;
1443 count++;
1444 }
1445 if (RAW == ';') {
1446 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001447 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001448 ctxt->nbChars ++;
1449 ctxt->input->cur++;
1450 }
1451 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001452 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001453 }
1454
1455 /*
1456 * [ WFC: Legal Character ]
1457 * Characters referred to using character references must match the
1458 * production for Char.
1459 */
William M. Brack871611b2003-10-18 04:53:14 +00001460 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001461 return(val);
1462 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001463 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1464 "xmlParseCharRef: invalid xmlChar value %d\n",
1465 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001466 }
1467 return(0);
1468}
1469
1470/**
1471 * xmlParseStringCharRef:
1472 * @ctxt: an XML parser context
1473 * @str: a pointer to an index in the string
1474 *
1475 * parse Reference declarations, variant parsing from a string rather
1476 * than an an input flow.
1477 *
1478 * [66] CharRef ::= '&#' [0-9]+ ';' |
1479 * '&#x' [0-9a-fA-F]+ ';'
1480 *
1481 * [ WFC: Legal Character ]
1482 * Characters referred to using character references must match the
1483 * production for Char.
1484 *
1485 * Returns the value parsed (as an int), 0 in case of error, str will be
1486 * updated to the current value of the index
1487 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001488static int
Owen Taylor3473f882001-02-23 17:55:21 +00001489xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1490 const xmlChar *ptr;
1491 xmlChar cur;
1492 int val = 0;
1493
1494 if ((str == NULL) || (*str == NULL)) return(0);
1495 ptr = *str;
1496 cur = *ptr;
1497 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1498 ptr += 3;
1499 cur = *ptr;
1500 while (cur != ';') { /* Non input consuming loop */
1501 if ((cur >= '0') && (cur <= '9'))
1502 val = val * 16 + (cur - '0');
1503 else if ((cur >= 'a') && (cur <= 'f'))
1504 val = val * 16 + (cur - 'a') + 10;
1505 else if ((cur >= 'A') && (cur <= 'F'))
1506 val = val * 16 + (cur - 'A') + 10;
1507 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001508 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001509 val = 0;
1510 break;
1511 }
1512 ptr++;
1513 cur = *ptr;
1514 }
1515 if (cur == ';')
1516 ptr++;
1517 } else if ((cur == '&') && (ptr[1] == '#')){
1518 ptr += 2;
1519 cur = *ptr;
1520 while (cur != ';') { /* Non input consuming loops */
1521 if ((cur >= '0') && (cur <= '9'))
1522 val = val * 10 + (cur - '0');
1523 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001524 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 val = 0;
1526 break;
1527 }
1528 ptr++;
1529 cur = *ptr;
1530 }
1531 if (cur == ';')
1532 ptr++;
1533 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001534 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001535 return(0);
1536 }
1537 *str = ptr;
1538
1539 /*
1540 * [ WFC: Legal Character ]
1541 * Characters referred to using character references must match the
1542 * production for Char.
1543 */
William M. Brack871611b2003-10-18 04:53:14 +00001544 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001545 return(val);
1546 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001547 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1548 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1549 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001550 }
1551 return(0);
1552}
1553
1554/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001555 * xmlNewBlanksWrapperInputStream:
1556 * @ctxt: an XML parser context
1557 * @entity: an Entity pointer
1558 *
1559 * Create a new input stream for wrapping
1560 * blanks around a PEReference
1561 *
1562 * Returns the new input stream or NULL
1563 */
1564
1565static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1566
Daniel Veillardf4862f02002-09-10 11:13:43 +00001567static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001568xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1569 xmlParserInputPtr input;
1570 xmlChar *buffer;
1571 size_t length;
1572 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001573 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1574 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001575 return(NULL);
1576 }
1577 if (xmlParserDebugEntities)
1578 xmlGenericError(xmlGenericErrorContext,
1579 "new blanks wrapper for entity: %s\n", entity->name);
1580 input = xmlNewInputStream(ctxt);
1581 if (input == NULL) {
1582 return(NULL);
1583 }
1584 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001585 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001586 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001587 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001588 return(NULL);
1589 }
1590 buffer [0] = ' ';
1591 buffer [1] = '%';
1592 buffer [length-3] = ';';
1593 buffer [length-2] = ' ';
1594 buffer [length-1] = 0;
1595 memcpy(buffer + 2, entity->name, length - 5);
1596 input->free = deallocblankswrapper;
1597 input->base = buffer;
1598 input->cur = buffer;
1599 input->length = length;
1600 input->end = &buffer[length];
1601 return(input);
1602}
1603
1604/**
Owen Taylor3473f882001-02-23 17:55:21 +00001605 * xmlParserHandlePEReference:
1606 * @ctxt: the parser context
1607 *
1608 * [69] PEReference ::= '%' Name ';'
1609 *
1610 * [ WFC: No Recursion ]
1611 * A parsed entity must not contain a recursive
1612 * reference to itself, either directly or indirectly.
1613 *
1614 * [ WFC: Entity Declared ]
1615 * In a document without any DTD, a document with only an internal DTD
1616 * subset which contains no parameter entity references, or a document
1617 * with "standalone='yes'", ... ... The declaration of a parameter
1618 * entity must precede any reference to it...
1619 *
1620 * [ VC: Entity Declared ]
1621 * In a document with an external subset or external parameter entities
1622 * with "standalone='no'", ... ... The declaration of a parameter entity
1623 * must precede any reference to it...
1624 *
1625 * [ WFC: In DTD ]
1626 * Parameter-entity references may only appear in the DTD.
1627 * NOTE: misleading but this is handled.
1628 *
1629 * A PEReference may have been detected in the current input stream
1630 * the handling is done accordingly to
1631 * http://www.w3.org/TR/REC-xml#entproc
1632 * i.e.
1633 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001634 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001635 */
1636void
1637xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001638 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001639 xmlEntityPtr entity = NULL;
1640 xmlParserInputPtr input;
1641
Owen Taylor3473f882001-02-23 17:55:21 +00001642 if (RAW != '%') return;
1643 switch(ctxt->instate) {
1644 case XML_PARSER_CDATA_SECTION:
1645 return;
1646 case XML_PARSER_COMMENT:
1647 return;
1648 case XML_PARSER_START_TAG:
1649 return;
1650 case XML_PARSER_END_TAG:
1651 return;
1652 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001653 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001654 return;
1655 case XML_PARSER_PROLOG:
1656 case XML_PARSER_START:
1657 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001658 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001659 return;
1660 case XML_PARSER_ENTITY_DECL:
1661 case XML_PARSER_CONTENT:
1662 case XML_PARSER_ATTRIBUTE_VALUE:
1663 case XML_PARSER_PI:
1664 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001665 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001666 /* we just ignore it there */
1667 return;
1668 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001669 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001670 return;
1671 case XML_PARSER_ENTITY_VALUE:
1672 /*
1673 * NOTE: in the case of entity values, we don't do the
1674 * substitution here since we need the literal
1675 * entity value to be able to save the internal
1676 * subset of the document.
1677 * This will be handled by xmlStringDecodeEntities
1678 */
1679 return;
1680 case XML_PARSER_DTD:
1681 /*
1682 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1683 * In the internal DTD subset, parameter-entity references
1684 * can occur only where markup declarations can occur, not
1685 * within markup declarations.
1686 * In that case this is handled in xmlParseMarkupDecl
1687 */
1688 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1689 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001690 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001691 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001692 break;
1693 case XML_PARSER_IGNORE:
1694 return;
1695 }
1696
1697 NEXT;
1698 name = xmlParseName(ctxt);
1699 if (xmlParserDebugEntities)
1700 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001701 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001702 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 } else {
1705 if (RAW == ';') {
1706 NEXT;
1707 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1708 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1709 if (entity == NULL) {
1710
1711 /*
1712 * [ WFC: Entity Declared ]
1713 * In a document without any DTD, a document with only an
1714 * internal DTD subset which contains no parameter entity
1715 * references, or a document with "standalone='yes'", ...
1716 * ... The declaration of a parameter entity must precede
1717 * any reference to it...
1718 */
1719 if ((ctxt->standalone == 1) ||
1720 ((ctxt->hasExternalSubset == 0) &&
1721 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001722 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001723 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001724 } else {
1725 /*
1726 * [ VC: Entity Declared ]
1727 * In a document with an external subset or external
1728 * parameter entities with "standalone='no'", ...
1729 * ... The declaration of a parameter entity must precede
1730 * any reference to it...
1731 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001732 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1733 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1734 "PEReference: %%%s; not found\n",
1735 name);
1736 } else
1737 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1738 "PEReference: %%%s; not found\n",
1739 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001740 ctxt->valid = 0;
1741 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001742 } else if (ctxt->input->free != deallocblankswrapper) {
1743 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1744 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001745 } else {
1746 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1747 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001748 xmlChar start[4];
1749 xmlCharEncoding enc;
1750
Owen Taylor3473f882001-02-23 17:55:21 +00001751 /*
1752 * handle the extra spaces added before and after
1753 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001754 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001755 */
1756 input = xmlNewEntityInputStream(ctxt, entity);
1757 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001758
1759 /*
1760 * Get the 4 first bytes and decode the charset
1761 * if enc != XML_CHAR_ENCODING_NONE
1762 * plug some encoding conversion routines.
1763 */
1764 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001765 if (entity->length >= 4) {
1766 start[0] = RAW;
1767 start[1] = NXT(1);
1768 start[2] = NXT(2);
1769 start[3] = NXT(3);
1770 enc = xmlDetectCharEncoding(start, 4);
1771 if (enc != XML_CHAR_ENCODING_NONE) {
1772 xmlSwitchEncoding(ctxt, enc);
1773 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001774 }
1775
Owen Taylor3473f882001-02-23 17:55:21 +00001776 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
William M. Brack76e95df2003-10-18 16:20:14 +00001777 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
1778 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001779 xmlParseTextDecl(ctxt);
1780 }
Owen Taylor3473f882001-02-23 17:55:21 +00001781 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001782 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1783 "PEReference: %s is not a parameter entity\n",
1784 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786 }
1787 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001788 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001789 }
Owen Taylor3473f882001-02-23 17:55:21 +00001790 }
1791}
1792
1793/*
1794 * Macro used to grow the current buffer.
1795 */
1796#define growBuffer(buffer) { \
1797 buffer##_size *= 2; \
1798 buffer = (xmlChar *) \
1799 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001800 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001801}
1802
1803/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001804 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001805 * @ctxt: the parser context
1806 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001807 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001808 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1809 * @end: an end marker xmlChar, 0 if none
1810 * @end2: an end marker xmlChar, 0 if none
1811 * @end3: an end marker xmlChar, 0 if none
1812 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001813 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001814 *
1815 * [67] Reference ::= EntityRef | CharRef
1816 *
1817 * [69] PEReference ::= '%' Name ';'
1818 *
1819 * Returns A newly allocated string with the substitution done. The caller
1820 * must deallocate it !
1821 */
1822xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001823xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1824 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001825 xmlChar *buffer = NULL;
1826 int buffer_size = 0;
1827
1828 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001829 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001830 xmlEntityPtr ent;
1831 int c,l;
1832 int nbchars = 0;
1833
Daniel Veillarde57ec792003-09-10 10:50:59 +00001834 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001835 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001837
1838 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001839 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001840 return(NULL);
1841 }
1842
1843 /*
1844 * allocate a translation buffer.
1845 */
1846 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001847 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001848 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001849
1850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001851 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001852 * we are operating on already parsed values.
1853 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854 if (str < last)
1855 c = CUR_SCHAR(str, l);
1856 else
1857 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 while ((c != 0) && (c != end) && /* non input consuming loop */
1859 (c != end2) && (c != end3)) {
1860
1861 if (c == 0) break;
1862 if ((c == '&') && (str[1] == '#')) {
1863 int val = xmlParseStringCharRef(ctxt, &str);
1864 if (val != 0) {
1865 COPY_BUF(0,buffer,nbchars,val);
1866 }
1867 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1868 if (xmlParserDebugEntities)
1869 xmlGenericError(xmlGenericErrorContext,
1870 "String decoding Entity Reference: %.30s\n",
1871 str);
1872 ent = xmlParseStringEntityRef(ctxt, &str);
1873 if ((ent != NULL) &&
1874 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1875 if (ent->content != NULL) {
1876 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1877 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001878 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1879 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001880 }
1881 } else if ((ent != NULL) && (ent->content != NULL)) {
1882 xmlChar *rep;
1883
1884 ctxt->depth++;
1885 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1886 0, 0, 0);
1887 ctxt->depth--;
1888 if (rep != NULL) {
1889 current = rep;
1890 while (*current != 0) { /* non input consuming loop */
1891 buffer[nbchars++] = *current++;
1892 if (nbchars >
1893 buffer_size - XML_PARSER_BUFFER_SIZE) {
1894 growBuffer(buffer);
1895 }
1896 }
1897 xmlFree(rep);
1898 }
1899 } else if (ent != NULL) {
1900 int i = xmlStrlen(ent->name);
1901 const xmlChar *cur = ent->name;
1902
1903 buffer[nbchars++] = '&';
1904 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1905 growBuffer(buffer);
1906 }
1907 for (;i > 0;i--)
1908 buffer[nbchars++] = *cur++;
1909 buffer[nbchars++] = ';';
1910 }
1911 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1912 if (xmlParserDebugEntities)
1913 xmlGenericError(xmlGenericErrorContext,
1914 "String decoding PE Reference: %.30s\n", str);
1915 ent = xmlParseStringPEReference(ctxt, &str);
1916 if (ent != NULL) {
1917 xmlChar *rep;
1918
1919 ctxt->depth++;
1920 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1921 0, 0, 0);
1922 ctxt->depth--;
1923 if (rep != NULL) {
1924 current = rep;
1925 while (*current != 0) { /* non input consuming loop */
1926 buffer[nbchars++] = *current++;
1927 if (nbchars >
1928 buffer_size - XML_PARSER_BUFFER_SIZE) {
1929 growBuffer(buffer);
1930 }
1931 }
1932 xmlFree(rep);
1933 }
1934 }
1935 } else {
1936 COPY_BUF(l,buffer,nbchars,c);
1937 str += l;
1938 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1939 growBuffer(buffer);
1940 }
1941 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001942 if (str < last)
1943 c = CUR_SCHAR(str, l);
1944 else
1945 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 }
1947 buffer[nbchars++] = 0;
1948 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001949
1950mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001951 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001952 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001953}
1954
Daniel Veillarde57ec792003-09-10 10:50:59 +00001955/**
1956 * xmlStringDecodeEntities:
1957 * @ctxt: the parser context
1958 * @str: the input string
1959 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1960 * @end: an end marker xmlChar, 0 if none
1961 * @end2: an end marker xmlChar, 0 if none
1962 * @end3: an end marker xmlChar, 0 if none
1963 *
1964 * Takes a entity string content and process to do the adequate substitutions.
1965 *
1966 * [67] Reference ::= EntityRef | CharRef
1967 *
1968 * [69] PEReference ::= '%' Name ';'
1969 *
1970 * Returns A newly allocated string with the substitution done. The caller
1971 * must deallocate it !
1972 */
1973xmlChar *
1974xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1975 xmlChar end, xmlChar end2, xmlChar end3) {
1976 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1977 end, end2, end3));
1978}
Owen Taylor3473f882001-02-23 17:55:21 +00001979
1980/************************************************************************
1981 * *
1982 * Commodity functions to handle xmlChars *
1983 * *
1984 ************************************************************************/
1985
1986/**
1987 * xmlStrndup:
1988 * @cur: the input xmlChar *
1989 * @len: the len of @cur
1990 *
1991 * a strndup for array of xmlChar's
1992 *
1993 * Returns a new xmlChar * or NULL
1994 */
1995xmlChar *
1996xmlStrndup(const xmlChar *cur, int len) {
1997 xmlChar *ret;
1998
1999 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002000 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002001 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002002 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003 return(NULL);
2004 }
2005 memcpy(ret, cur, len * sizeof(xmlChar));
2006 ret[len] = 0;
2007 return(ret);
2008}
2009
2010/**
2011 * xmlStrdup:
2012 * @cur: the input xmlChar *
2013 *
2014 * a strdup for array of xmlChar's. Since they are supposed to be
2015 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2016 * a termination mark of '0'.
2017 *
2018 * Returns a new xmlChar * or NULL
2019 */
2020xmlChar *
2021xmlStrdup(const xmlChar *cur) {
2022 const xmlChar *p = cur;
2023
2024 if (cur == NULL) return(NULL);
2025 while (*p != 0) p++; /* non input consuming */
2026 return(xmlStrndup(cur, p - cur));
2027}
2028
2029/**
2030 * xmlCharStrndup:
2031 * @cur: the input char *
2032 * @len: the len of @cur
2033 *
2034 * a strndup for char's to xmlChar's
2035 *
2036 * Returns a new xmlChar * or NULL
2037 */
2038
2039xmlChar *
2040xmlCharStrndup(const char *cur, int len) {
2041 int i;
2042 xmlChar *ret;
2043
2044 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002045 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002046 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002047 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002048 return(NULL);
2049 }
2050 for (i = 0;i < len;i++)
2051 ret[i] = (xmlChar) cur[i];
2052 ret[len] = 0;
2053 return(ret);
2054}
2055
2056/**
2057 * xmlCharStrdup:
2058 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002059 *
2060 * a strdup for char's to xmlChar's
2061 *
2062 * Returns a new xmlChar * or NULL
2063 */
2064
2065xmlChar *
2066xmlCharStrdup(const char *cur) {
2067 const char *p = cur;
2068
2069 if (cur == NULL) return(NULL);
2070 while (*p != '\0') p++; /* non input consuming */
2071 return(xmlCharStrndup(cur, p - cur));
2072}
2073
2074/**
2075 * xmlStrcmp:
2076 * @str1: the first xmlChar *
2077 * @str2: the second xmlChar *
2078 *
2079 * a strcmp for xmlChar's
2080 *
2081 * Returns the integer result of the comparison
2082 */
2083
2084int
2085xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2086 register int tmp;
2087
2088 if (str1 == str2) return(0);
2089 if (str1 == NULL) return(-1);
2090 if (str2 == NULL) return(1);
2091 do {
2092 tmp = *str1++ - *str2;
2093 if (tmp != 0) return(tmp);
2094 } while (*str2++ != 0);
2095 return 0;
2096}
2097
2098/**
2099 * xmlStrEqual:
2100 * @str1: the first xmlChar *
2101 * @str2: the second xmlChar *
2102 *
2103 * Check if both string are equal of have same content
2104 * Should be a bit more readable and faster than xmlStrEqual()
2105 *
2106 * Returns 1 if they are equal, 0 if they are different
2107 */
2108
2109int
2110xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2111 if (str1 == str2) return(1);
2112 if (str1 == NULL) return(0);
2113 if (str2 == NULL) return(0);
2114 do {
2115 if (*str1++ != *str2) return(0);
2116 } while (*str2++);
2117 return(1);
2118}
2119
2120/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002121 * xmlStrQEqual:
2122 * @pref: the prefix of the QName
2123 * @name: the localname of the QName
2124 * @str: the second xmlChar *
2125 *
2126 * Check if a QName is Equal to a given string
2127 *
2128 * Returns 1 if they are equal, 0 if they are different
2129 */
2130
2131int
2132xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2133 if (pref == NULL) return(xmlStrEqual(name, str));
2134 if (name == NULL) return(0);
2135 if (str == NULL) return(0);
2136
2137 do {
2138 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002139 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002140 if (*str++ != ':') return(0);
2141 do {
2142 if (*name++ != *str) return(0);
2143 } while (*str++);
2144 return(1);
2145}
2146
2147/**
Owen Taylor3473f882001-02-23 17:55:21 +00002148 * xmlStrncmp:
2149 * @str1: the first xmlChar *
2150 * @str2: the second xmlChar *
2151 * @len: the max comparison length
2152 *
2153 * a strncmp for xmlChar's
2154 *
2155 * Returns the integer result of the comparison
2156 */
2157
2158int
2159xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2160 register int tmp;
2161
2162 if (len <= 0) return(0);
2163 if (str1 == str2) return(0);
2164 if (str1 == NULL) return(-1);
2165 if (str2 == NULL) return(1);
2166 do {
2167 tmp = *str1++ - *str2;
2168 if (tmp != 0 || --len == 0) return(tmp);
2169 } while (*str2++ != 0);
2170 return 0;
2171}
2172
Daniel Veillardb44025c2001-10-11 22:55:55 +00002173static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002174 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2175 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2176 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2177 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2178 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2179 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2180 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2181 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2182 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2183 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2184 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2185 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2186 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2187 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2188 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2189 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2190 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2191 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2192 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2193 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2194 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2195 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2196 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2197 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2198 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2199 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2200 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2201 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2202 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2203 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2204 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2205 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2206};
2207
2208/**
2209 * xmlStrcasecmp:
2210 * @str1: the first xmlChar *
2211 * @str2: the second xmlChar *
2212 *
2213 * a strcasecmp for xmlChar's
2214 *
2215 * Returns the integer result of the comparison
2216 */
2217
2218int
2219xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2220 register int tmp;
2221
2222 if (str1 == str2) return(0);
2223 if (str1 == NULL) return(-1);
2224 if (str2 == NULL) return(1);
2225 do {
2226 tmp = casemap[*str1++] - casemap[*str2];
2227 if (tmp != 0) return(tmp);
2228 } while (*str2++ != 0);
2229 return 0;
2230}
2231
2232/**
2233 * xmlStrncasecmp:
2234 * @str1: the first xmlChar *
2235 * @str2: the second xmlChar *
2236 * @len: the max comparison length
2237 *
2238 * a strncasecmp for xmlChar's
2239 *
2240 * Returns the integer result of the comparison
2241 */
2242
2243int
2244xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2245 register int tmp;
2246
2247 if (len <= 0) return(0);
2248 if (str1 == str2) return(0);
2249 if (str1 == NULL) return(-1);
2250 if (str2 == NULL) return(1);
2251 do {
2252 tmp = casemap[*str1++] - casemap[*str2];
2253 if (tmp != 0 || --len == 0) return(tmp);
2254 } while (*str2++ != 0);
2255 return 0;
2256}
2257
2258/**
2259 * xmlStrchr:
2260 * @str: the xmlChar * array
2261 * @val: the xmlChar to search
2262 *
2263 * a strchr for xmlChar's
2264 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002265 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002266 */
2267
2268const xmlChar *
2269xmlStrchr(const xmlChar *str, xmlChar val) {
2270 if (str == NULL) return(NULL);
2271 while (*str != 0) { /* non input consuming */
2272 if (*str == val) return((xmlChar *) str);
2273 str++;
2274 }
2275 return(NULL);
2276}
2277
2278/**
2279 * xmlStrstr:
2280 * @str: the xmlChar * array (haystack)
2281 * @val: the xmlChar to search (needle)
2282 *
2283 * a strstr for xmlChar's
2284 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002285 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002286 */
2287
2288const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002289xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002290 int n;
2291
2292 if (str == NULL) return(NULL);
2293 if (val == NULL) return(NULL);
2294 n = xmlStrlen(val);
2295
2296 if (n == 0) return(str);
2297 while (*str != 0) { /* non input consuming */
2298 if (*str == *val) {
2299 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2300 }
2301 str++;
2302 }
2303 return(NULL);
2304}
2305
2306/**
2307 * xmlStrcasestr:
2308 * @str: the xmlChar * array (haystack)
2309 * @val: the xmlChar to search (needle)
2310 *
2311 * a case-ignoring strstr for xmlChar's
2312 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002313 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002314 */
2315
2316const xmlChar *
2317xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2318 int n;
2319
2320 if (str == NULL) return(NULL);
2321 if (val == NULL) return(NULL);
2322 n = xmlStrlen(val);
2323
2324 if (n == 0) return(str);
2325 while (*str != 0) { /* non input consuming */
2326 if (casemap[*str] == casemap[*val])
2327 if (!xmlStrncasecmp(str, val, n)) return(str);
2328 str++;
2329 }
2330 return(NULL);
2331}
2332
2333/**
2334 * xmlStrsub:
2335 * @str: the xmlChar * array (haystack)
2336 * @start: the index of the first char (zero based)
2337 * @len: the length of the substring
2338 *
2339 * Extract a substring of a given string
2340 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002341 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002342 */
2343
2344xmlChar *
2345xmlStrsub(const xmlChar *str, int start, int len) {
2346 int i;
2347
2348 if (str == NULL) return(NULL);
2349 if (start < 0) return(NULL);
2350 if (len < 0) return(NULL);
2351
2352 for (i = 0;i < start;i++) {
2353 if (*str == 0) return(NULL);
2354 str++;
2355 }
2356 if (*str == 0) return(NULL);
2357 return(xmlStrndup(str, len));
2358}
2359
2360/**
2361 * xmlStrlen:
2362 * @str: the xmlChar * array
2363 *
2364 * length of a xmlChar's string
2365 *
2366 * Returns the number of xmlChar contained in the ARRAY.
2367 */
2368
2369int
2370xmlStrlen(const xmlChar *str) {
2371 int len = 0;
2372
2373 if (str == NULL) return(0);
2374 while (*str != 0) { /* non input consuming */
2375 str++;
2376 len++;
2377 }
2378 return(len);
2379}
2380
2381/**
2382 * xmlStrncat:
2383 * @cur: the original xmlChar * array
2384 * @add: the xmlChar * array added
2385 * @len: the length of @add
2386 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002387 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002388 * first bytes of @add.
2389 *
2390 * Returns a new xmlChar *, the original @cur is reallocated if needed
2391 * and should not be freed
2392 */
2393
2394xmlChar *
2395xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2396 int size;
2397 xmlChar *ret;
2398
2399 if ((add == NULL) || (len == 0))
2400 return(cur);
2401 if (cur == NULL)
2402 return(xmlStrndup(add, len));
2403
2404 size = xmlStrlen(cur);
2405 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2406 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002407 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002408 return(cur);
2409 }
2410 memcpy(&ret[size], add, len * sizeof(xmlChar));
2411 ret[size + len] = 0;
2412 return(ret);
2413}
2414
2415/**
2416 * xmlStrcat:
2417 * @cur: the original xmlChar * array
2418 * @add: the xmlChar * array added
2419 *
2420 * a strcat for array of xmlChar's. Since they are supposed to be
2421 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2422 * a termination mark of '0'.
2423 *
2424 * Returns a new xmlChar * containing the concatenated string.
2425 */
2426xmlChar *
2427xmlStrcat(xmlChar *cur, const xmlChar *add) {
2428 const xmlChar *p = add;
2429
2430 if (add == NULL) return(cur);
2431 if (cur == NULL)
2432 return(xmlStrdup(add));
2433
2434 while (*p != 0) p++; /* non input consuming */
2435 return(xmlStrncat(cur, add, p - add));
2436}
2437
Aleksey Sanine7acf432003-10-02 20:05:27 +00002438/**
2439 * xmlStrPrintf:
2440 * @buf: the result buffer.
2441 * @len: the result buffer length.
2442 * @msg: the message with printf formatting.
2443 * @...: extra parameters for the message.
2444 *
2445 * Formats @msg and places result into @buf.
2446 *
2447 * Returns the number of characters written to @buf or -1 if an error occurs.
2448 */
2449int
2450xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2451 va_list args;
2452 int ret;
2453
2454 if((buf == NULL) || (msg == NULL)) {
2455 return(-1);
2456 }
2457
2458 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002459 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002460 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002461 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002462
2463 return(ret);
2464}
2465
Owen Taylor3473f882001-02-23 17:55:21 +00002466/************************************************************************
2467 * *
2468 * Commodity functions, cleanup needed ? *
2469 * *
2470 ************************************************************************/
2471
2472/**
2473 * areBlanks:
2474 * @ctxt: an XML parser context
2475 * @str: a xmlChar *
2476 * @len: the size of @str
2477 *
2478 * Is this a sequence of blank chars that one can ignore ?
2479 *
2480 * Returns 1 if ignorable 0 otherwise.
2481 */
2482
2483static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2484 int i, ret;
2485 xmlNodePtr lastChild;
2486
Daniel Veillard05c13a22001-09-09 08:38:09 +00002487 /*
2488 * Don't spend time trying to differentiate them, the same callback is
2489 * used !
2490 */
2491 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002492 return(0);
2493
Owen Taylor3473f882001-02-23 17:55:21 +00002494 /*
2495 * Check for xml:space value.
2496 */
2497 if (*(ctxt->space) == 1)
2498 return(0);
2499
2500 /*
2501 * Check that the string is made of blanks
2502 */
2503 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002504 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002505
2506 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002507 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002508 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002509 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002510 if (ctxt->myDoc != NULL) {
2511 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2512 if (ret == 0) return(1);
2513 if (ret == 1) return(0);
2514 }
2515
2516 /*
2517 * Otherwise, heuristic :-\
2518 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002519 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002520 if ((ctxt->node->children == NULL) &&
2521 (RAW == '<') && (NXT(1) == '/')) return(0);
2522
2523 lastChild = xmlGetLastChild(ctxt->node);
2524 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002525 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2526 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002527 } else if (xmlNodeIsText(lastChild))
2528 return(0);
2529 else if ((ctxt->node->children != NULL) &&
2530 (xmlNodeIsText(ctxt->node->children)))
2531 return(0);
2532 return(1);
2533}
2534
Owen Taylor3473f882001-02-23 17:55:21 +00002535/************************************************************************
2536 * *
2537 * Extra stuff for namespace support *
2538 * Relates to http://www.w3.org/TR/WD-xml-names *
2539 * *
2540 ************************************************************************/
2541
2542/**
2543 * xmlSplitQName:
2544 * @ctxt: an XML parser context
2545 * @name: an XML parser context
2546 * @prefix: a xmlChar **
2547 *
2548 * parse an UTF8 encoded XML qualified name string
2549 *
2550 * [NS 5] QName ::= (Prefix ':')? LocalPart
2551 *
2552 * [NS 6] Prefix ::= NCName
2553 *
2554 * [NS 7] LocalPart ::= NCName
2555 *
2556 * Returns the local part, and prefix is updated
2557 * to get the Prefix if any.
2558 */
2559
2560xmlChar *
2561xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2562 xmlChar buf[XML_MAX_NAMELEN + 5];
2563 xmlChar *buffer = NULL;
2564 int len = 0;
2565 int max = XML_MAX_NAMELEN;
2566 xmlChar *ret = NULL;
2567 const xmlChar *cur = name;
2568 int c;
2569
2570 *prefix = NULL;
2571
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002572 if (cur == NULL) return(NULL);
2573
Owen Taylor3473f882001-02-23 17:55:21 +00002574#ifndef XML_XML_NAMESPACE
2575 /* xml: prefix is not really a namespace */
2576 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2577 (cur[2] == 'l') && (cur[3] == ':'))
2578 return(xmlStrdup(name));
2579#endif
2580
Daniel Veillard597bc482003-07-24 16:08:28 +00002581 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (cur[0] == ':')
2583 return(xmlStrdup(name));
2584
2585 c = *cur++;
2586 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2587 buf[len++] = c;
2588 c = *cur++;
2589 }
2590 if (len >= max) {
2591 /*
2592 * Okay someone managed to make a huge name, so he's ready to pay
2593 * for the processing speed.
2594 */
2595 max = len * 2;
2596
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002597 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002598 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002599 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002600 return(NULL);
2601 }
2602 memcpy(buffer, buf, len);
2603 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2604 if (len + 10 > max) {
2605 max *= 2;
2606 buffer = (xmlChar *) xmlRealloc(buffer,
2607 max * sizeof(xmlChar));
2608 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002609 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 return(NULL);
2611 }
2612 }
2613 buffer[len++] = c;
2614 c = *cur++;
2615 }
2616 buffer[len] = 0;
2617 }
2618
Daniel Veillard597bc482003-07-24 16:08:28 +00002619 /* nasty but well=formed
2620 if ((c == ':') && (*cur == 0)) {
2621 return(xmlStrdup(name));
2622 } */
2623
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (buffer == NULL)
2625 ret = xmlStrndup(buf, len);
2626 else {
2627 ret = buffer;
2628 buffer = NULL;
2629 max = XML_MAX_NAMELEN;
2630 }
2631
2632
2633 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002634 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002635 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002636 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002637 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002638 }
Owen Taylor3473f882001-02-23 17:55:21 +00002639 len = 0;
2640
Daniel Veillardbb284f42002-10-16 18:02:47 +00002641 /*
2642 * Check that the first character is proper to start
2643 * a new name
2644 */
2645 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2646 ((c >= 0x41) && (c <= 0x5A)) ||
2647 (c == '_') || (c == ':'))) {
2648 int l;
2649 int first = CUR_SCHAR(cur, l);
2650
2651 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002652 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002653 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002654 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002655 }
2656 }
2657 cur++;
2658
Owen Taylor3473f882001-02-23 17:55:21 +00002659 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2660 buf[len++] = c;
2661 c = *cur++;
2662 }
2663 if (len >= max) {
2664 /*
2665 * Okay someone managed to make a huge name, so he's ready to pay
2666 * for the processing speed.
2667 */
2668 max = len * 2;
2669
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002670 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002671 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002672 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002673 return(NULL);
2674 }
2675 memcpy(buffer, buf, len);
2676 while (c != 0) { /* tested bigname2.xml */
2677 if (len + 10 > max) {
2678 max *= 2;
2679 buffer = (xmlChar *) xmlRealloc(buffer,
2680 max * sizeof(xmlChar));
2681 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002682 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002683 return(NULL);
2684 }
2685 }
2686 buffer[len++] = c;
2687 c = *cur++;
2688 }
2689 buffer[len] = 0;
2690 }
2691
2692 if (buffer == NULL)
2693 ret = xmlStrndup(buf, len);
2694 else {
2695 ret = buffer;
2696 }
2697 }
2698
2699 return(ret);
2700}
2701
2702/************************************************************************
2703 * *
2704 * The parser itself *
2705 * Relates to http://www.w3.org/TR/REC-xml *
2706 * *
2707 ************************************************************************/
2708
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002709static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002710static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002711 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002712
Owen Taylor3473f882001-02-23 17:55:21 +00002713/**
2714 * xmlParseName:
2715 * @ctxt: an XML parser context
2716 *
2717 * parse an XML name.
2718 *
2719 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2720 * CombiningChar | Extender
2721 *
2722 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2723 *
2724 * [6] Names ::= Name (S Name)*
2725 *
2726 * Returns the Name parsed or NULL
2727 */
2728
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002729const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002730xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002731 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002732 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002733 int count = 0;
2734
2735 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002736
2737 /*
2738 * Accelerator for simple ASCII names
2739 */
2740 in = ctxt->input->cur;
2741 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2742 ((*in >= 0x41) && (*in <= 0x5A)) ||
2743 (*in == '_') || (*in == ':')) {
2744 in++;
2745 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2746 ((*in >= 0x41) && (*in <= 0x5A)) ||
2747 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002748 (*in == '_') || (*in == '-') ||
2749 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002750 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002751 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002752 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002753 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002754 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002755 ctxt->nbChars += count;
2756 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002757 if (ret == NULL)
2758 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002759 return(ret);
2760 }
2761 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002762 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002763}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
Daniel Veillard46de64e2002-05-29 08:21:33 +00002765/**
2766 * xmlParseNameAndCompare:
2767 * @ctxt: an XML parser context
2768 *
2769 * parse an XML name and compares for match
2770 * (specialized for endtag parsing)
2771 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002772 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2773 * and the name for mismatch
2774 */
2775
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002776static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002777xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2778 const xmlChar *cmp = other;
2779 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002780 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002781
2782 GROW;
2783
2784 in = ctxt->input->cur;
2785 while (*in != 0 && *in == *cmp) {
2786 ++in;
2787 ++cmp;
2788 }
William M. Brack76e95df2003-10-18 16:20:14 +00002789 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002790 /* success */
2791 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002792 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002793 }
2794 /* failure (or end of input buffer), check with full function */
2795 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002796 /* strings coming from the dictionnary direct compare possible */
2797 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002798 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002799 }
2800 return ret;
2801}
2802
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002803static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002804xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002805 int len = 0, l;
2806 int c;
2807 int count = 0;
2808
2809 /*
2810 * Handler for more complex cases
2811 */
2812 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002813 c = CUR_CHAR(l);
2814 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2815 (!IS_LETTER(c) && (c != '_') &&
2816 (c != ':'))) {
2817 return(NULL);
2818 }
2819
2820 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002821 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002822 (c == '.') || (c == '-') ||
2823 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002824 (IS_COMBINING(c)) ||
2825 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (count++ > 100) {
2827 count = 0;
2828 GROW;
2829 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002830 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002831 NEXTL(l);
2832 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002833 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002834 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002835}
2836
2837/**
2838 * xmlParseStringName:
2839 * @ctxt: an XML parser context
2840 * @str: a pointer to the string pointer (IN/OUT)
2841 *
2842 * parse an XML name.
2843 *
2844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2845 * CombiningChar | Extender
2846 *
2847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2848 *
2849 * [6] Names ::= Name (S Name)*
2850 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002851 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002852 * is updated to the current location in the string.
2853 */
2854
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002855static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002856xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2857 xmlChar buf[XML_MAX_NAMELEN + 5];
2858 const xmlChar *cur = *str;
2859 int len = 0, l;
2860 int c;
2861
2862 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002863 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002864 (c != ':')) {
2865 return(NULL);
2866 }
2867
William M. Brack871611b2003-10-18 04:53:14 +00002868 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002869 (c == '.') || (c == '-') ||
2870 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002871 (IS_COMBINING(c)) ||
2872 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002873 COPY_BUF(l,buf,len,c);
2874 cur += l;
2875 c = CUR_SCHAR(cur, l);
2876 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2877 /*
2878 * Okay someone managed to make a huge name, so he's ready to pay
2879 * for the processing speed.
2880 */
2881 xmlChar *buffer;
2882 int max = len * 2;
2883
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002884 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002885 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002886 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002887 return(NULL);
2888 }
2889 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002890 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002891 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002892 (c == '.') || (c == '-') ||
2893 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002894 (IS_COMBINING(c)) ||
2895 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002896 if (len + 10 > max) {
2897 max *= 2;
2898 buffer = (xmlChar *) xmlRealloc(buffer,
2899 max * sizeof(xmlChar));
2900 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002901 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002902 return(NULL);
2903 }
2904 }
2905 COPY_BUF(l,buffer,len,c);
2906 cur += l;
2907 c = CUR_SCHAR(cur, l);
2908 }
2909 buffer[len] = 0;
2910 *str = cur;
2911 return(buffer);
2912 }
2913 }
2914 *str = cur;
2915 return(xmlStrndup(buf, len));
2916}
2917
2918/**
2919 * xmlParseNmtoken:
2920 * @ctxt: an XML parser context
2921 *
2922 * parse an XML Nmtoken.
2923 *
2924 * [7] Nmtoken ::= (NameChar)+
2925 *
2926 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2927 *
2928 * Returns the Nmtoken parsed or NULL
2929 */
2930
2931xmlChar *
2932xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2933 xmlChar buf[XML_MAX_NAMELEN + 5];
2934 int len = 0, l;
2935 int c;
2936 int count = 0;
2937
2938 GROW;
2939 c = CUR_CHAR(l);
2940
William M. Brack871611b2003-10-18 04:53:14 +00002941 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002942 (c == '.') || (c == '-') ||
2943 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002944 (IS_COMBINING(c)) ||
2945 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002946 if (count++ > 100) {
2947 count = 0;
2948 GROW;
2949 }
2950 COPY_BUF(l,buf,len,c);
2951 NEXTL(l);
2952 c = CUR_CHAR(l);
2953 if (len >= XML_MAX_NAMELEN) {
2954 /*
2955 * Okay someone managed to make a huge token, so he's ready to pay
2956 * for the processing speed.
2957 */
2958 xmlChar *buffer;
2959 int max = len * 2;
2960
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002961 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002962 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002963 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(NULL);
2965 }
2966 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002967 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002968 (c == '.') || (c == '-') ||
2969 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002970 (IS_COMBINING(c)) ||
2971 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002972 if (count++ > 100) {
2973 count = 0;
2974 GROW;
2975 }
2976 if (len + 10 > max) {
2977 max *= 2;
2978 buffer = (xmlChar *) xmlRealloc(buffer,
2979 max * sizeof(xmlChar));
2980 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002981 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002982 return(NULL);
2983 }
2984 }
2985 COPY_BUF(l,buffer,len,c);
2986 NEXTL(l);
2987 c = CUR_CHAR(l);
2988 }
2989 buffer[len] = 0;
2990 return(buffer);
2991 }
2992 }
2993 if (len == 0)
2994 return(NULL);
2995 return(xmlStrndup(buf, len));
2996}
2997
2998/**
2999 * xmlParseEntityValue:
3000 * @ctxt: an XML parser context
3001 * @orig: if non-NULL store a copy of the original entity value
3002 *
3003 * parse a value for ENTITY declarations
3004 *
3005 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3006 * "'" ([^%&'] | PEReference | Reference)* "'"
3007 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003008 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003009 */
3010
3011xmlChar *
3012xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3013 xmlChar *buf = NULL;
3014 int len = 0;
3015 int size = XML_PARSER_BUFFER_SIZE;
3016 int c, l;
3017 xmlChar stop;
3018 xmlChar *ret = NULL;
3019 const xmlChar *cur = NULL;
3020 xmlParserInputPtr input;
3021
3022 if (RAW == '"') stop = '"';
3023 else if (RAW == '\'') stop = '\'';
3024 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003025 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003026 return(NULL);
3027 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003028 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003029 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003030 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003031 return(NULL);
3032 }
3033
3034 /*
3035 * The content of the entity definition is copied in a buffer.
3036 */
3037
3038 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3039 input = ctxt->input;
3040 GROW;
3041 NEXT;
3042 c = CUR_CHAR(l);
3043 /*
3044 * NOTE: 4.4.5 Included in Literal
3045 * When a parameter entity reference appears in a literal entity
3046 * value, ... a single or double quote character in the replacement
3047 * text is always treated as a normal data character and will not
3048 * terminate the literal.
3049 * In practice it means we stop the loop only when back at parsing
3050 * the initial entity and the quote is found
3051 */
William M. Brack871611b2003-10-18 04:53:14 +00003052 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003053 (ctxt->input != input))) {
3054 if (len + 5 >= size) {
3055 size *= 2;
3056 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3057 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003058 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003059 return(NULL);
3060 }
3061 }
3062 COPY_BUF(l,buf,len,c);
3063 NEXTL(l);
3064 /*
3065 * Pop-up of finished entities.
3066 */
3067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3068 xmlPopInput(ctxt);
3069
3070 GROW;
3071 c = CUR_CHAR(l);
3072 if (c == 0) {
3073 GROW;
3074 c = CUR_CHAR(l);
3075 }
3076 }
3077 buf[len] = 0;
3078
3079 /*
3080 * Raise problem w.r.t. '&' and '%' being used in non-entities
3081 * reference constructs. Note Charref will be handled in
3082 * xmlStringDecodeEntities()
3083 */
3084 cur = buf;
3085 while (*cur != 0) { /* non input consuming */
3086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3087 xmlChar *name;
3088 xmlChar tmp = *cur;
3089
3090 cur++;
3091 name = xmlParseStringName(ctxt, &cur);
3092 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003093 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003094 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003095 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003096 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003097 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3098 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003099 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 }
3101 if (name != NULL)
3102 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003103 if (*cur == 0)
3104 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003105 }
3106 cur++;
3107 }
3108
3109 /*
3110 * Then PEReference entities are substituted.
3111 */
3112 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003113 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003114 xmlFree(buf);
3115 } else {
3116 NEXT;
3117 /*
3118 * NOTE: 4.4.7 Bypassed
3119 * When a general entity reference appears in the EntityValue in
3120 * an entity declaration, it is bypassed and left as is.
3121 * so XML_SUBSTITUTE_REF is not set here.
3122 */
3123 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3124 0, 0, 0);
3125 if (orig != NULL)
3126 *orig = buf;
3127 else
3128 xmlFree(buf);
3129 }
3130
3131 return(ret);
3132}
3133
3134/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003135 * xmlParseAttValueComplex:
3136 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003137 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003138 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003139 *
3140 * parse a value for an attribute, this is the fallback function
3141 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003142 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003143 *
3144 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3145 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003146static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003147xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003148 xmlChar limit = 0;
3149 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003150 int len = 0;
3151 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003152 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003153 xmlChar *current = NULL;
3154 xmlEntityPtr ent;
3155
Owen Taylor3473f882001-02-23 17:55:21 +00003156 if (NXT(0) == '"') {
3157 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3158 limit = '"';
3159 NEXT;
3160 } else if (NXT(0) == '\'') {
3161 limit = '\'';
3162 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3163 NEXT;
3164 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003165 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003166 return(NULL);
3167 }
3168
3169 /*
3170 * allocate a translation buffer.
3171 */
3172 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003173 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003174 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003175
3176 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003177 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003178 */
3179 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003180 while ((NXT(0) != limit) && /* checked */
3181 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003182 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003183 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003184 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003185 if (NXT(1) == '#') {
3186 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003187
Owen Taylor3473f882001-02-23 17:55:21 +00003188 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003189 if (ctxt->replaceEntities) {
3190 if (len > buf_size - 10) {
3191 growBuffer(buf);
3192 }
3193 buf[len++] = '&';
3194 } else {
3195 /*
3196 * The reparsing will be done in xmlStringGetNodeList()
3197 * called by the attribute() function in SAX.c
3198 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003199 if (len > buf_size - 10) {
3200 growBuffer(buf);
3201 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 buf[len++] = '&';
3203 buf[len++] = '#';
3204 buf[len++] = '3';
3205 buf[len++] = '8';
3206 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003207 }
3208 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003209 if (len > buf_size - 10) {
3210 growBuffer(buf);
3211 }
Owen Taylor3473f882001-02-23 17:55:21 +00003212 len += xmlCopyChar(0, &buf[len], val);
3213 }
3214 } else {
3215 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003216 if ((ent != NULL) &&
3217 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3218 if (len > buf_size - 10) {
3219 growBuffer(buf);
3220 }
3221 if ((ctxt->replaceEntities == 0) &&
3222 (ent->content[0] == '&')) {
3223 buf[len++] = '&';
3224 buf[len++] = '#';
3225 buf[len++] = '3';
3226 buf[len++] = '8';
3227 buf[len++] = ';';
3228 } else {
3229 buf[len++] = ent->content[0];
3230 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003231 } else if ((ent != NULL) &&
3232 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003233 xmlChar *rep;
3234
3235 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3236 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003237 XML_SUBSTITUTE_REF,
3238 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003239 if (rep != NULL) {
3240 current = rep;
3241 while (*current != 0) { /* non input consuming */
3242 buf[len++] = *current++;
3243 if (len > buf_size - 10) {
3244 growBuffer(buf);
3245 }
3246 }
3247 xmlFree(rep);
3248 }
3249 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003250 if (len > buf_size - 10) {
3251 growBuffer(buf);
3252 }
Owen Taylor3473f882001-02-23 17:55:21 +00003253 if (ent->content != NULL)
3254 buf[len++] = ent->content[0];
3255 }
3256 } else if (ent != NULL) {
3257 int i = xmlStrlen(ent->name);
3258 const xmlChar *cur = ent->name;
3259
3260 /*
3261 * This may look absurd but is needed to detect
3262 * entities problems
3263 */
3264 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3265 (ent->content != NULL)) {
3266 xmlChar *rep;
3267 rep = xmlStringDecodeEntities(ctxt, ent->content,
3268 XML_SUBSTITUTE_REF, 0, 0, 0);
3269 if (rep != NULL)
3270 xmlFree(rep);
3271 }
3272
3273 /*
3274 * Just output the reference
3275 */
3276 buf[len++] = '&';
3277 if (len > buf_size - i - 10) {
3278 growBuffer(buf);
3279 }
3280 for (;i > 0;i--)
3281 buf[len++] = *cur++;
3282 buf[len++] = ';';
3283 }
3284 }
3285 } else {
3286 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003287 if ((len != 0) || (!normalize)) {
3288 if ((!normalize) || (!in_space)) {
3289 COPY_BUF(l,buf,len,0x20);
3290 if (len > buf_size - 10) {
3291 growBuffer(buf);
3292 }
3293 }
3294 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003295 }
3296 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003297 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003298 COPY_BUF(l,buf,len,c);
3299 if (len > buf_size - 10) {
3300 growBuffer(buf);
3301 }
3302 }
3303 NEXTL(l);
3304 }
3305 GROW;
3306 c = CUR_CHAR(l);
3307 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003308 if ((in_space) && (normalize)) {
3309 while (buf[len - 1] == 0x20) len--;
3310 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003311 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003312 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003313 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003314 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003315 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3316 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003317 } else
3318 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003319 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003320 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003321
3322mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003323 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003324 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003325}
3326
3327/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003328 * xmlParseAttValue:
3329 * @ctxt: an XML parser context
3330 *
3331 * parse a value for an attribute
3332 * Note: the parser won't do substitution of entities here, this
3333 * will be handled later in xmlStringGetNodeList
3334 *
3335 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3336 * "'" ([^<&'] | Reference)* "'"
3337 *
3338 * 3.3.3 Attribute-Value Normalization:
3339 * Before the value of an attribute is passed to the application or
3340 * checked for validity, the XML processor must normalize it as follows:
3341 * - a character reference is processed by appending the referenced
3342 * character to the attribute value
3343 * - an entity reference is processed by recursively processing the
3344 * replacement text of the entity
3345 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3346 * appending #x20 to the normalized value, except that only a single
3347 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3348 * parsed entity or the literal entity value of an internal parsed entity
3349 * - other characters are processed by appending them to the normalized value
3350 * If the declared value is not CDATA, then the XML processor must further
3351 * process the normalized attribute value by discarding any leading and
3352 * trailing space (#x20) characters, and by replacing sequences of space
3353 * (#x20) characters by a single space (#x20) character.
3354 * All attributes for which no declaration has been read should be treated
3355 * by a non-validating parser as if declared CDATA.
3356 *
3357 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3358 */
3359
3360
3361xmlChar *
3362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003363 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003364}
3365
3366/**
Owen Taylor3473f882001-02-23 17:55:21 +00003367 * xmlParseSystemLiteral:
3368 * @ctxt: an XML parser context
3369 *
3370 * parse an XML Literal
3371 *
3372 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3373 *
3374 * Returns the SystemLiteral parsed or NULL
3375 */
3376
3377xmlChar *
3378xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3379 xmlChar *buf = NULL;
3380 int len = 0;
3381 int size = XML_PARSER_BUFFER_SIZE;
3382 int cur, l;
3383 xmlChar stop;
3384 int state = ctxt->instate;
3385 int count = 0;
3386
3387 SHRINK;
3388 if (RAW == '"') {
3389 NEXT;
3390 stop = '"';
3391 } else if (RAW == '\'') {
3392 NEXT;
3393 stop = '\'';
3394 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003395 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003396 return(NULL);
3397 }
3398
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003399 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003400 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003401 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003402 return(NULL);
3403 }
3404 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3405 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003406 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003407 if (len + 5 >= size) {
3408 size *= 2;
3409 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3410 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003411 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003412 ctxt->instate = (xmlParserInputState) state;
3413 return(NULL);
3414 }
3415 }
3416 count++;
3417 if (count > 50) {
3418 GROW;
3419 count = 0;
3420 }
3421 COPY_BUF(l,buf,len,cur);
3422 NEXTL(l);
3423 cur = CUR_CHAR(l);
3424 if (cur == 0) {
3425 GROW;
3426 SHRINK;
3427 cur = CUR_CHAR(l);
3428 }
3429 }
3430 buf[len] = 0;
3431 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003432 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003433 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003434 } else {
3435 NEXT;
3436 }
3437 return(buf);
3438}
3439
3440/**
3441 * xmlParsePubidLiteral:
3442 * @ctxt: an XML parser context
3443 *
3444 * parse an XML public literal
3445 *
3446 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3447 *
3448 * Returns the PubidLiteral parsed or NULL.
3449 */
3450
3451xmlChar *
3452xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3453 xmlChar *buf = NULL;
3454 int len = 0;
3455 int size = XML_PARSER_BUFFER_SIZE;
3456 xmlChar cur;
3457 xmlChar stop;
3458 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003459 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003460
3461 SHRINK;
3462 if (RAW == '"') {
3463 NEXT;
3464 stop = '"';
3465 } else if (RAW == '\'') {
3466 NEXT;
3467 stop = '\'';
3468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003469 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003470 return(NULL);
3471 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003472 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003473 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003474 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return(NULL);
3476 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003477 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003478 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003479 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003480 if (len + 1 >= size) {
3481 size *= 2;
3482 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3483 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003485 return(NULL);
3486 }
3487 }
3488 buf[len++] = cur;
3489 count++;
3490 if (count > 50) {
3491 GROW;
3492 count = 0;
3493 }
3494 NEXT;
3495 cur = CUR;
3496 if (cur == 0) {
3497 GROW;
3498 SHRINK;
3499 cur = CUR;
3500 }
3501 }
3502 buf[len] = 0;
3503 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003504 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 } else {
3506 NEXT;
3507 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003508 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003509 return(buf);
3510}
3511
Daniel Veillard48b2f892001-02-25 16:11:03 +00003512void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003513/**
3514 * xmlParseCharData:
3515 * @ctxt: an XML parser context
3516 * @cdata: int indicating whether we are within a CDATA section
3517 *
3518 * parse a CharData section.
3519 * if we are within a CDATA section ']]>' marks an end of section.
3520 *
3521 * The right angle bracket (>) may be represented using the string "&gt;",
3522 * and must, for compatibility, be escaped using "&gt;" or a character
3523 * reference when it appears in the string "]]>" in content, when that
3524 * string is not marking the end of a CDATA section.
3525 *
3526 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3527 */
3528
3529void
3530xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003531 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003532 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003533 int line = ctxt->input->line;
3534 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003535
3536 SHRINK;
3537 GROW;
3538 /*
3539 * Accelerated common case where input don't need to be
3540 * modified before passing it to the handler.
3541 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003542 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003543 in = ctxt->input->cur;
3544 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003545get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003546 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3547 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003548 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003549 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003550 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003551 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003552 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003553 ctxt->input->line++;
3554 in++;
3555 }
3556 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003557 }
3558 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003559 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003561 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003562 return;
3563 }
3564 in++;
3565 goto get_more;
3566 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003567 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003568 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003569 if ((ctxt->sax->ignorableWhitespace !=
3570 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003571 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003572 const xmlChar *tmp = ctxt->input->cur;
3573 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003574
Daniel Veillarda7374592001-05-10 14:17:55 +00003575 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003576 ctxt->sax->ignorableWhitespace(ctxt->userData,
3577 tmp, nbchar);
3578 } else if (ctxt->sax->characters != NULL)
3579 ctxt->sax->characters(ctxt->userData,
3580 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003581 line = ctxt->input->line;
3582 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003583 } else {
3584 if (ctxt->sax->characters != NULL)
3585 ctxt->sax->characters(ctxt->userData,
3586 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003587 line = ctxt->input->line;
3588 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003589 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
3591 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003592 if (*in == 0xD) {
3593 in++;
3594 if (*in == 0xA) {
3595 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003596 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003597 ctxt->input->line++;
3598 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003599 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003600 in--;
3601 }
3602 if (*in == '<') {
3603 return;
3604 }
3605 if (*in == '&') {
3606 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 }
3608 SHRINK;
3609 GROW;
3610 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003611 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003612 nbchar = 0;
3613 }
Daniel Veillard50582112001-03-26 22:52:16 +00003614 ctxt->input->line = line;
3615 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003616 xmlParseCharDataComplex(ctxt, cdata);
3617}
3618
Daniel Veillard01c13b52002-12-10 15:19:08 +00003619/**
3620 * xmlParseCharDataComplex:
3621 * @ctxt: an XML parser context
3622 * @cdata: int indicating whether we are within a CDATA section
3623 *
3624 * parse a CharData section.this is the fallback function
3625 * of xmlParseCharData() when the parsing requires handling
3626 * of non-ASCII characters.
3627 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003628void
3629xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3631 int nbchar = 0;
3632 int cur, l;
3633 int count = 0;
3634
3635 SHRINK;
3636 GROW;
3637 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003638 while ((cur != '<') && /* checked */
3639 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003640 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003641 if ((cur == ']') && (NXT(1) == ']') &&
3642 (NXT(2) == '>')) {
3643 if (cdata) break;
3644 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003645 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003646 }
3647 }
3648 COPY_BUF(l,buf,nbchar,cur);
3649 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003650 buf[nbchar] = 0;
3651
Owen Taylor3473f882001-02-23 17:55:21 +00003652 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003653 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003654 */
3655 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3656 if (areBlanks(ctxt, buf, nbchar)) {
3657 if (ctxt->sax->ignorableWhitespace != NULL)
3658 ctxt->sax->ignorableWhitespace(ctxt->userData,
3659 buf, nbchar);
3660 } else {
3661 if (ctxt->sax->characters != NULL)
3662 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3663 }
3664 }
3665 nbchar = 0;
3666 }
3667 count++;
3668 if (count > 50) {
3669 GROW;
3670 count = 0;
3671 }
3672 NEXTL(l);
3673 cur = CUR_CHAR(l);
3674 }
3675 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003676 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003677 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003678 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003679 */
3680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3681 if (areBlanks(ctxt, buf, nbchar)) {
3682 if (ctxt->sax->ignorableWhitespace != NULL)
3683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3684 } else {
3685 if (ctxt->sax->characters != NULL)
3686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3687 }
3688 }
3689 }
3690}
3691
3692/**
3693 * xmlParseExternalID:
3694 * @ctxt: an XML parser context
3695 * @publicID: a xmlChar** receiving PubidLiteral
3696 * @strict: indicate whether we should restrict parsing to only
3697 * production [75], see NOTE below
3698 *
3699 * Parse an External ID or a Public ID
3700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003701 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003702 * 'PUBLIC' S PubidLiteral S SystemLiteral
3703 *
3704 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3705 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3706 *
3707 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3708 *
3709 * Returns the function returns SystemLiteral and in the second
3710 * case publicID receives PubidLiteral, is strict is off
3711 * it is possible to return NULL and have publicID set.
3712 */
3713
3714xmlChar *
3715xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3716 xmlChar *URI = NULL;
3717
3718 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003719
3720 *publicID = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00003721 if (memcmp(CUR_PTR, "SYSTEM", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003722 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003723 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3725 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003726 }
3727 SKIP_BLANKS;
3728 URI = xmlParseSystemLiteral(ctxt);
3729 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003730 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00003732 } else if (memcmp(CUR_PTR, "PUBLIC", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003734 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003736 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 SKIP_BLANKS;
3739 *publicID = xmlParsePubidLiteral(ctxt);
3740 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003741 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 if (strict) {
3744 /*
3745 * We don't handle [83] so "S SystemLiteral" is required.
3746 */
William M. Brack76e95df2003-10-18 16:20:14 +00003747 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003749 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003750 }
3751 } else {
3752 /*
3753 * We handle [83] so we return immediately, if
3754 * "S SystemLiteral" is not detected. From a purely parsing
3755 * point of view that's a nice mess.
3756 */
3757 const xmlChar *ptr;
3758 GROW;
3759
3760 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003761 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003762
William M. Brack76e95df2003-10-18 16:20:14 +00003763 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003764 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3765 }
3766 SKIP_BLANKS;
3767 URI = xmlParseSystemLiteral(ctxt);
3768 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003769 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003770 }
3771 }
3772 return(URI);
3773}
3774
3775/**
3776 * xmlParseComment:
3777 * @ctxt: an XML parser context
3778 *
3779 * Skip an XML (SGML) comment <!-- .... -->
3780 * The spec says that "For compatibility, the string "--" (double-hyphen)
3781 * must not occur within comments. "
3782 *
3783 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3784 */
3785void
3786xmlParseComment(xmlParserCtxtPtr ctxt) {
3787 xmlChar *buf = NULL;
3788 int len;
3789 int size = XML_PARSER_BUFFER_SIZE;
3790 int q, ql;
3791 int r, rl;
3792 int cur, l;
3793 xmlParserInputState state;
3794 xmlParserInputPtr input = ctxt->input;
3795 int count = 0;
3796
3797 /*
3798 * Check that there is a comment right here.
3799 */
3800 if ((RAW != '<') || (NXT(1) != '!') ||
3801 (NXT(2) != '-') || (NXT(3) != '-')) return;
3802
3803 state = ctxt->instate;
3804 ctxt->instate = XML_PARSER_COMMENT;
3805 SHRINK;
3806 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 ctxt->instate = state;
3811 return;
3812 }
3813 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003814 if (q == 0)
3815 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 NEXTL(ql);
3817 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003818 if (r == 0)
3819 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 NEXTL(rl);
3821 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003822 if (cur == 0)
3823 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003824 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003825 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003826 ((cur != '>') ||
3827 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003828 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003829 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003830 }
3831 if (len + 5 >= size) {
3832 size *= 2;
3833 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3834 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003835 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 ctxt->instate = state;
3837 return;
3838 }
3839 }
3840 COPY_BUF(ql,buf,len,q);
3841 q = r;
3842 ql = rl;
3843 r = cur;
3844 rl = l;
3845
3846 count++;
3847 if (count > 50) {
3848 GROW;
3849 count = 0;
3850 }
3851 NEXTL(l);
3852 cur = CUR_CHAR(l);
3853 if (cur == 0) {
3854 SHRINK;
3855 GROW;
3856 cur = CUR_CHAR(l);
3857 }
3858 }
3859 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003860 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003862 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 xmlFree(buf);
3864 } else {
3865 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3867 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 NEXT;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3871 (!ctxt->disableSAX))
3872 ctxt->sax->comment(ctxt->userData, buf);
3873 xmlFree(buf);
3874 }
3875 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003876 return;
3877not_terminated:
3878 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3879 "Comment not terminated\n", NULL);
3880 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003881}
3882
3883/**
3884 * xmlParsePITarget:
3885 * @ctxt: an XML parser context
3886 *
3887 * parse the name of a PI
3888 *
3889 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3890 *
3891 * Returns the PITarget name or NULL
3892 */
3893
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003894const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003895xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003896 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003897
3898 name = xmlParseName(ctxt);
3899 if ((name != NULL) &&
3900 ((name[0] == 'x') || (name[0] == 'X')) &&
3901 ((name[1] == 'm') || (name[1] == 'M')) &&
3902 ((name[2] == 'l') || (name[2] == 'L'))) {
3903 int i;
3904 if ((name[0] == 'x') && (name[1] == 'm') &&
3905 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003906 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003907 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003908 return(name);
3909 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003910 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003911 return(name);
3912 }
3913 for (i = 0;;i++) {
3914 if (xmlW3CPIs[i] == NULL) break;
3915 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3916 return(name);
3917 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003918 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3919 "xmlParsePITarget: invalid name prefix 'xml'\n",
3920 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003921 }
3922 return(name);
3923}
3924
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003925#ifdef LIBXML_CATALOG_ENABLED
3926/**
3927 * xmlParseCatalogPI:
3928 * @ctxt: an XML parser context
3929 * @catalog: the PI value string
3930 *
3931 * parse an XML Catalog Processing Instruction.
3932 *
3933 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3934 *
3935 * Occurs only if allowed by the user and if happening in the Misc
3936 * part of the document before any doctype informations
3937 * This will add the given catalog to the parsing context in order
3938 * to be used if there is a resolution need further down in the document
3939 */
3940
3941static void
3942xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3943 xmlChar *URL = NULL;
3944 const xmlChar *tmp, *base;
3945 xmlChar marker;
3946
3947 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003948 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003949 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3950 goto error;
3951 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003952 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003953 if (*tmp != '=') {
3954 return;
3955 }
3956 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003957 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003958 marker = *tmp;
3959 if ((marker != '\'') && (marker != '"'))
3960 goto error;
3961 tmp++;
3962 base = tmp;
3963 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3964 if (*tmp == 0)
3965 goto error;
3966 URL = xmlStrndup(base, tmp - base);
3967 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003968 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003969 if (*tmp != 0)
3970 goto error;
3971
3972 if (URL != NULL) {
3973 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3974 xmlFree(URL);
3975 }
3976 return;
3977
3978error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003979 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3980 "Catalog PI syntax error: %s\n",
3981 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003982 if (URL != NULL)
3983 xmlFree(URL);
3984}
3985#endif
3986
Owen Taylor3473f882001-02-23 17:55:21 +00003987/**
3988 * xmlParsePI:
3989 * @ctxt: an XML parser context
3990 *
3991 * parse an XML Processing Instruction.
3992 *
3993 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3994 *
3995 * The processing is transfered to SAX once parsed.
3996 */
3997
3998void
3999xmlParsePI(xmlParserCtxtPtr ctxt) {
4000 xmlChar *buf = NULL;
4001 int len = 0;
4002 int size = XML_PARSER_BUFFER_SIZE;
4003 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004004 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004005 xmlParserInputState state;
4006 int count = 0;
4007
4008 if ((RAW == '<') && (NXT(1) == '?')) {
4009 xmlParserInputPtr input = ctxt->input;
4010 state = ctxt->instate;
4011 ctxt->instate = XML_PARSER_PI;
4012 /*
4013 * this is a Processing Instruction.
4014 */
4015 SKIP(2);
4016 SHRINK;
4017
4018 /*
4019 * Parse the target name and check for special support like
4020 * namespace.
4021 */
4022 target = xmlParsePITarget(ctxt);
4023 if (target != NULL) {
4024 if ((RAW == '?') && (NXT(1) == '>')) {
4025 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004026 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4027 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004028 }
4029 SKIP(2);
4030
4031 /*
4032 * SAX: PI detected.
4033 */
4034 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4035 (ctxt->sax->processingInstruction != NULL))
4036 ctxt->sax->processingInstruction(ctxt->userData,
4037 target, NULL);
4038 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 return;
4040 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004041 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004042 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004043 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004044 ctxt->instate = state;
4045 return;
4046 }
4047 cur = CUR;
4048 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004049 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4050 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004051 }
4052 SKIP_BLANKS;
4053 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004054 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004055 ((cur != '?') || (NXT(1) != '>'))) {
4056 if (len + 5 >= size) {
4057 size *= 2;
4058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4059 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004060 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004061 ctxt->instate = state;
4062 return;
4063 }
4064 }
4065 count++;
4066 if (count > 50) {
4067 GROW;
4068 count = 0;
4069 }
4070 COPY_BUF(l,buf,len,cur);
4071 NEXTL(l);
4072 cur = CUR_CHAR(l);
4073 if (cur == 0) {
4074 SHRINK;
4075 GROW;
4076 cur = CUR_CHAR(l);
4077 }
4078 }
4079 buf[len] = 0;
4080 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004081 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4082 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004083 } else {
4084 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4086 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004087 }
4088 SKIP(2);
4089
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004090#ifdef LIBXML_CATALOG_ENABLED
4091 if (((state == XML_PARSER_MISC) ||
4092 (state == XML_PARSER_START)) &&
4093 (xmlStrEqual(target, XML_CATALOG_PI))) {
4094 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4095 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4096 (allow == XML_CATA_ALLOW_ALL))
4097 xmlParseCatalogPI(ctxt, buf);
4098 }
4099#endif
4100
4101
Owen Taylor3473f882001-02-23 17:55:21 +00004102 /*
4103 * SAX: PI detected.
4104 */
4105 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4106 (ctxt->sax->processingInstruction != NULL))
4107 ctxt->sax->processingInstruction(ctxt->userData,
4108 target, buf);
4109 }
4110 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004111 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004112 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
4114 ctxt->instate = state;
4115 }
4116}
4117
4118/**
4119 * xmlParseNotationDecl:
4120 * @ctxt: an XML parser context
4121 *
4122 * parse a notation declaration
4123 *
4124 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4125 *
4126 * Hence there is actually 3 choices:
4127 * 'PUBLIC' S PubidLiteral
4128 * 'PUBLIC' S PubidLiteral S SystemLiteral
4129 * and 'SYSTEM' S SystemLiteral
4130 *
4131 * See the NOTE on xmlParseExternalID().
4132 */
4133
4134void
4135xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004136 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004137 xmlChar *Pubid;
4138 xmlChar *Systemid;
4139
Daniel Veillard8f597c32003-10-06 08:19:27 +00004140 if (memcmp(CUR_PTR, "<!NOTATION", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004141 xmlParserInputPtr input = ctxt->input;
4142 SHRINK;
4143 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004144 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4146 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004147 return;
4148 }
4149 SKIP_BLANKS;
4150
Daniel Veillard76d66f42001-05-16 21:05:17 +00004151 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004152 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004153 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004154 return;
4155 }
William M. Brack76e95df2003-10-18 16:20:14 +00004156 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004157 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004158 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004159 return;
4160 }
4161 SKIP_BLANKS;
4162
4163 /*
4164 * Parse the IDs.
4165 */
4166 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4167 SKIP_BLANKS;
4168
4169 if (RAW == '>') {
4170 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004171 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4172 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004173 }
4174 NEXT;
4175 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4176 (ctxt->sax->notationDecl != NULL))
4177 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4178 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004179 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004180 }
Owen Taylor3473f882001-02-23 17:55:21 +00004181 if (Systemid != NULL) xmlFree(Systemid);
4182 if (Pubid != NULL) xmlFree(Pubid);
4183 }
4184}
4185
4186/**
4187 * xmlParseEntityDecl:
4188 * @ctxt: an XML parser context
4189 *
4190 * parse <!ENTITY declarations
4191 *
4192 * [70] EntityDecl ::= GEDecl | PEDecl
4193 *
4194 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4195 *
4196 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4197 *
4198 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4199 *
4200 * [74] PEDef ::= EntityValue | ExternalID
4201 *
4202 * [76] NDataDecl ::= S 'NDATA' S Name
4203 *
4204 * [ VC: Notation Declared ]
4205 * The Name must match the declared name of a notation.
4206 */
4207
4208void
4209xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004210 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 xmlChar *value = NULL;
4212 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004213 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 int isParameter = 0;
4215 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004216 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004217
4218 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004219 if (memcmp(CUR_PTR, "<!ENTITY", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004220 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 SHRINK;
4222 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004223 skipped = SKIP_BLANKS;
4224 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004225 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4226 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004227 }
Owen Taylor3473f882001-02-23 17:55:21 +00004228
4229 if (RAW == '%') {
4230 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004231 skipped = SKIP_BLANKS;
4232 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004233 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4234 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004235 }
Owen Taylor3473f882001-02-23 17:55:21 +00004236 isParameter = 1;
4237 }
4238
Daniel Veillard76d66f42001-05-16 21:05:17 +00004239 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004241 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4242 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004243 return;
4244 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004245 skipped = SKIP_BLANKS;
4246 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004247 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4248 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
Owen Taylor3473f882001-02-23 17:55:21 +00004250
Daniel Veillardf5582f12002-06-11 10:08:16 +00004251 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004252 /*
4253 * handle the various case of definitions...
4254 */
4255 if (isParameter) {
4256 if ((RAW == '"') || (RAW == '\'')) {
4257 value = xmlParseEntityValue(ctxt, &orig);
4258 if (value) {
4259 if ((ctxt->sax != NULL) &&
4260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4261 ctxt->sax->entityDecl(ctxt->userData, name,
4262 XML_INTERNAL_PARAMETER_ENTITY,
4263 NULL, NULL, value);
4264 }
4265 } else {
4266 URI = xmlParseExternalID(ctxt, &literal, 1);
4267 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004268 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 if (URI) {
4271 xmlURIPtr uri;
4272
4273 uri = xmlParseURI((const char *) URI);
4274 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004275 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4276 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004277 /*
4278 * This really ought to be a well formedness error
4279 * but the XML Core WG decided otherwise c.f. issue
4280 * E26 of the XML erratas.
4281 */
Owen Taylor3473f882001-02-23 17:55:21 +00004282 } else {
4283 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004284 /*
4285 * Okay this is foolish to block those but not
4286 * invalid URIs.
4287 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004288 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 } else {
4290 if ((ctxt->sax != NULL) &&
4291 (!ctxt->disableSAX) &&
4292 (ctxt->sax->entityDecl != NULL))
4293 ctxt->sax->entityDecl(ctxt->userData, name,
4294 XML_EXTERNAL_PARAMETER_ENTITY,
4295 literal, URI, NULL);
4296 }
4297 xmlFreeURI(uri);
4298 }
4299 }
4300 }
4301 } else {
4302 if ((RAW == '"') || (RAW == '\'')) {
4303 value = xmlParseEntityValue(ctxt, &orig);
4304 if ((ctxt->sax != NULL) &&
4305 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4306 ctxt->sax->entityDecl(ctxt->userData, name,
4307 XML_INTERNAL_GENERAL_ENTITY,
4308 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004309 /*
4310 * For expat compatibility in SAX mode.
4311 */
4312 if ((ctxt->myDoc == NULL) ||
4313 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4314 if (ctxt->myDoc == NULL) {
4315 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4316 }
4317 if (ctxt->myDoc->intSubset == NULL)
4318 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4319 BAD_CAST "fake", NULL, NULL);
4320
Daniel Veillard1af9a412003-08-20 22:54:39 +00004321 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4322 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004323 }
Owen Taylor3473f882001-02-23 17:55:21 +00004324 } else {
4325 URI = xmlParseExternalID(ctxt, &literal, 1);
4326 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004327 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004328 }
4329 if (URI) {
4330 xmlURIPtr uri;
4331
4332 uri = xmlParseURI((const char *)URI);
4333 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004334 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4335 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004336 /*
4337 * This really ought to be a well formedness error
4338 * but the XML Core WG decided otherwise c.f. issue
4339 * E26 of the XML erratas.
4340 */
Owen Taylor3473f882001-02-23 17:55:21 +00004341 } else {
4342 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004343 /*
4344 * Okay this is foolish to block those but not
4345 * invalid URIs.
4346 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004347 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
4349 xmlFreeURI(uri);
4350 }
4351 }
William M. Brack76e95df2003-10-18 16:20:14 +00004352 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4354 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004355 }
4356 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004357 if (memcmp(CUR_PTR, "NDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004358 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004359 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4361 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004364 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004365 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4366 (ctxt->sax->unparsedEntityDecl != NULL))
4367 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4368 literal, URI, ndata);
4369 } else {
4370 if ((ctxt->sax != NULL) &&
4371 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4372 ctxt->sax->entityDecl(ctxt->userData, name,
4373 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4374 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004375 /*
4376 * For expat compatibility in SAX mode.
4377 * assuming the entity repalcement was asked for
4378 */
4379 if ((ctxt->replaceEntities != 0) &&
4380 ((ctxt->myDoc == NULL) ||
4381 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4382 if (ctxt->myDoc == NULL) {
4383 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4384 }
4385
4386 if (ctxt->myDoc->intSubset == NULL)
4387 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4388 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004389 xmlSAX2EntityDecl(ctxt, name,
4390 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4391 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004392 }
Owen Taylor3473f882001-02-23 17:55:21 +00004393 }
4394 }
4395 }
4396 SKIP_BLANKS;
4397 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004398 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004399 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004400 } else {
4401 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004402 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4403 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004404 }
4405 NEXT;
4406 }
4407 if (orig != NULL) {
4408 /*
4409 * Ugly mechanism to save the raw entity value.
4410 */
4411 xmlEntityPtr cur = NULL;
4412
4413 if (isParameter) {
4414 if ((ctxt->sax != NULL) &&
4415 (ctxt->sax->getParameterEntity != NULL))
4416 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4417 } else {
4418 if ((ctxt->sax != NULL) &&
4419 (ctxt->sax->getEntity != NULL))
4420 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004421 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004422 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004423 }
Owen Taylor3473f882001-02-23 17:55:21 +00004424 }
4425 if (cur != NULL) {
4426 if (cur->orig != NULL)
4427 xmlFree(orig);
4428 else
4429 cur->orig = orig;
4430 } else
4431 xmlFree(orig);
4432 }
Owen Taylor3473f882001-02-23 17:55:21 +00004433 if (value != NULL) xmlFree(value);
4434 if (URI != NULL) xmlFree(URI);
4435 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437}
4438
4439/**
4440 * xmlParseDefaultDecl:
4441 * @ctxt: an XML parser context
4442 * @value: Receive a possible fixed default value for the attribute
4443 *
4444 * Parse an attribute default declaration
4445 *
4446 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4447 *
4448 * [ VC: Required Attribute ]
4449 * if the default declaration is the keyword #REQUIRED, then the
4450 * attribute must be specified for all elements of the type in the
4451 * attribute-list declaration.
4452 *
4453 * [ VC: Attribute Default Legal ]
4454 * The declared default value must meet the lexical constraints of
4455 * the declared attribute type c.f. xmlValidateAttributeDecl()
4456 *
4457 * [ VC: Fixed Attribute Default ]
4458 * if an attribute has a default value declared with the #FIXED
4459 * keyword, instances of that attribute must match the default value.
4460 *
4461 * [ WFC: No < in Attribute Values ]
4462 * handled in xmlParseAttValue()
4463 *
4464 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4465 * or XML_ATTRIBUTE_FIXED.
4466 */
4467
4468int
4469xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4470 int val;
4471 xmlChar *ret;
4472
4473 *value = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004474 if (memcmp(CUR_PTR, "#REQUIRED", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004475 SKIP(9);
4476 return(XML_ATTRIBUTE_REQUIRED);
4477 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00004478 if (memcmp(CUR_PTR, "#IMPLIED", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004479 SKIP(8);
4480 return(XML_ATTRIBUTE_IMPLIED);
4481 }
4482 val = XML_ATTRIBUTE_NONE;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004483 if (memcmp(CUR_PTR, "#FIXED", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004484 SKIP(6);
4485 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004486 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4488 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004489 }
4490 SKIP_BLANKS;
4491 }
4492 ret = xmlParseAttValue(ctxt);
4493 ctxt->instate = XML_PARSER_DTD;
4494 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004495 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004496 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004497 } else
4498 *value = ret;
4499 return(val);
4500}
4501
4502/**
4503 * xmlParseNotationType:
4504 * @ctxt: an XML parser context
4505 *
4506 * parse an Notation attribute type.
4507 *
4508 * Note: the leading 'NOTATION' S part has already being parsed...
4509 *
4510 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4511 *
4512 * [ VC: Notation Attributes ]
4513 * Values of this type must match one of the notation names included
4514 * in the declaration; all notation names in the declaration must be declared.
4515 *
4516 * Returns: the notation attribute tree built while parsing
4517 */
4518
4519xmlEnumerationPtr
4520xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004521 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4523
4524 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004525 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004526 return(NULL);
4527 }
4528 SHRINK;
4529 do {
4530 NEXT;
4531 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004532 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004533 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004534 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4535 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004536 return(ret);
4537 }
4538 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (cur == NULL) return(ret);
4540 if (last == NULL) ret = last = cur;
4541 else {
4542 last->next = cur;
4543 last = cur;
4544 }
4545 SKIP_BLANKS;
4546 } while (RAW == '|');
4547 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004548 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004549 if ((last != NULL) && (last != ret))
4550 xmlFreeEnumeration(last);
4551 return(ret);
4552 }
4553 NEXT;
4554 return(ret);
4555}
4556
4557/**
4558 * xmlParseEnumerationType:
4559 * @ctxt: an XML parser context
4560 *
4561 * parse an Enumeration attribute type.
4562 *
4563 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4564 *
4565 * [ VC: Enumeration ]
4566 * Values of this type must match one of the Nmtoken tokens in
4567 * the declaration
4568 *
4569 * Returns: the enumeration attribute tree built while parsing
4570 */
4571
4572xmlEnumerationPtr
4573xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4574 xmlChar *name;
4575 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4576
4577 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004578 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 return(NULL);
4580 }
4581 SHRINK;
4582 do {
4583 NEXT;
4584 SKIP_BLANKS;
4585 name = xmlParseNmtoken(ctxt);
4586 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004587 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 return(ret);
4589 }
4590 cur = xmlCreateEnumeration(name);
4591 xmlFree(name);
4592 if (cur == NULL) return(ret);
4593 if (last == NULL) ret = last = cur;
4594 else {
4595 last->next = cur;
4596 last = cur;
4597 }
4598 SKIP_BLANKS;
4599 } while (RAW == '|');
4600 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004601 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004602 return(ret);
4603 }
4604 NEXT;
4605 return(ret);
4606}
4607
4608/**
4609 * xmlParseEnumeratedType:
4610 * @ctxt: an XML parser context
4611 * @tree: the enumeration tree built while parsing
4612 *
4613 * parse an Enumerated attribute type.
4614 *
4615 * [57] EnumeratedType ::= NotationType | Enumeration
4616 *
4617 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4618 *
4619 *
4620 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4621 */
4622
4623int
4624xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00004625 if (memcmp(CUR_PTR, "NOTATION", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004626 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4629 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004630 return(0);
4631 }
4632 SKIP_BLANKS;
4633 *tree = xmlParseNotationType(ctxt);
4634 if (*tree == NULL) return(0);
4635 return(XML_ATTRIBUTE_NOTATION);
4636 }
4637 *tree = xmlParseEnumerationType(ctxt);
4638 if (*tree == NULL) return(0);
4639 return(XML_ATTRIBUTE_ENUMERATION);
4640}
4641
4642/**
4643 * xmlParseAttributeType:
4644 * @ctxt: an XML parser context
4645 * @tree: the enumeration tree built while parsing
4646 *
4647 * parse the Attribute list def for an element
4648 *
4649 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4650 *
4651 * [55] StringType ::= 'CDATA'
4652 *
4653 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4654 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4655 *
4656 * Validity constraints for attribute values syntax are checked in
4657 * xmlValidateAttributeValue()
4658 *
4659 * [ VC: ID ]
4660 * Values of type ID must match the Name production. A name must not
4661 * appear more than once in an XML document as a value of this type;
4662 * i.e., ID values must uniquely identify the elements which bear them.
4663 *
4664 * [ VC: One ID per Element Type ]
4665 * No element type may have more than one ID attribute specified.
4666 *
4667 * [ VC: ID Attribute Default ]
4668 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4669 *
4670 * [ VC: IDREF ]
4671 * Values of type IDREF must match the Name production, and values
4672 * of type IDREFS must match Names; each IDREF Name must match the value
4673 * of an ID attribute on some element in the XML document; i.e. IDREF
4674 * values must match the value of some ID attribute.
4675 *
4676 * [ VC: Entity Name ]
4677 * Values of type ENTITY must match the Name production, values
4678 * of type ENTITIES must match Names; each Entity Name must match the
4679 * name of an unparsed entity declared in the DTD.
4680 *
4681 * [ VC: Name Token ]
4682 * Values of type NMTOKEN must match the Nmtoken production; values
4683 * of type NMTOKENS must match Nmtokens.
4684 *
4685 * Returns the attribute type
4686 */
4687int
4688xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4689 SHRINK;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004690 if (memcmp(CUR_PTR, "CDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004691 SKIP(5);
4692 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004693 } else if (memcmp(CUR_PTR, "IDREFS", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004694 SKIP(6);
4695 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004696 } else if (memcmp(CUR_PTR, "IDREF", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004697 SKIP(5);
4698 return(XML_ATTRIBUTE_IDREF);
4699 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4700 SKIP(2);
4701 return(XML_ATTRIBUTE_ID);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004702 } else if (memcmp(CUR_PTR, "ENTITY", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP(6);
4704 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004705 } else if (memcmp(CUR_PTR, "ENTITIES", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004706 SKIP(8);
4707 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004708 } else if (memcmp(CUR_PTR, "NMTOKENS", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004709 SKIP(8);
4710 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004711 } else if (memcmp(CUR_PTR, "NMTOKEN", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004712 SKIP(7);
4713 return(XML_ATTRIBUTE_NMTOKEN);
4714 }
4715 return(xmlParseEnumeratedType(ctxt, tree));
4716}
4717
4718/**
4719 * xmlParseAttributeListDecl:
4720 * @ctxt: an XML parser context
4721 *
4722 * : parse the Attribute list def for an element
4723 *
4724 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4725 *
4726 * [53] AttDef ::= S Name S AttType S DefaultDecl
4727 *
4728 */
4729void
4730xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004731 const xmlChar *elemName;
4732 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004733 xmlEnumerationPtr tree;
4734
Daniel Veillard8f597c32003-10-06 08:19:27 +00004735 if (memcmp(CUR_PTR, "<!ATTLIST", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004736 xmlParserInputPtr input = ctxt->input;
4737
4738 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004741 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004742 }
4743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004744 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4747 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004748 return;
4749 }
4750 SKIP_BLANKS;
4751 GROW;
4752 while (RAW != '>') {
4753 const xmlChar *check = CUR_PTR;
4754 int type;
4755 int def;
4756 xmlChar *defaultValue = NULL;
4757
4758 GROW;
4759 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004760 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004761 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4763 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004764 break;
4765 }
4766 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004767 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004768 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004769 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004770 if (defaultValue != NULL)
4771 xmlFree(defaultValue);
4772 break;
4773 }
4774 SKIP_BLANKS;
4775
4776 type = xmlParseAttributeType(ctxt, &tree);
4777 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004778 if (defaultValue != NULL)
4779 xmlFree(defaultValue);
4780 break;
4781 }
4782
4783 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004784 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004785 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4786 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004787 if (defaultValue != NULL)
4788 xmlFree(defaultValue);
4789 if (tree != NULL)
4790 xmlFreeEnumeration(tree);
4791 break;
4792 }
4793 SKIP_BLANKS;
4794
4795 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4796 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004797 if (defaultValue != NULL)
4798 xmlFree(defaultValue);
4799 if (tree != NULL)
4800 xmlFreeEnumeration(tree);
4801 break;
4802 }
4803
4804 GROW;
4805 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004806 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004807 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004808 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004809 if (defaultValue != NULL)
4810 xmlFree(defaultValue);
4811 if (tree != NULL)
4812 xmlFreeEnumeration(tree);
4813 break;
4814 }
4815 SKIP_BLANKS;
4816 }
4817 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004818 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4819 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (defaultValue != NULL)
4821 xmlFree(defaultValue);
4822 if (tree != NULL)
4823 xmlFreeEnumeration(tree);
4824 break;
4825 }
4826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4827 (ctxt->sax->attributeDecl != NULL))
4828 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4829 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004830 else if (tree != NULL)
4831 xmlFreeEnumeration(tree);
4832
4833 if ((ctxt->sax2) && (defaultValue != NULL) &&
4834 (def != XML_ATTRIBUTE_IMPLIED) &&
4835 (def != XML_ATTRIBUTE_REQUIRED)) {
4836 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4837 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004838 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4839 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4840 }
Owen Taylor3473f882001-02-23 17:55:21 +00004841 if (defaultValue != NULL)
4842 xmlFree(defaultValue);
4843 GROW;
4844 }
4845 if (RAW == '>') {
4846 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4848 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
4850 NEXT;
4851 }
Owen Taylor3473f882001-02-23 17:55:21 +00004852 }
4853}
4854
4855/**
4856 * xmlParseElementMixedContentDecl:
4857 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004858 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004859 *
4860 * parse the declaration for a Mixed Element content
4861 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4862 *
4863 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4864 * '(' S? '#PCDATA' S? ')'
4865 *
4866 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4867 *
4868 * [ VC: No Duplicate Types ]
4869 * The same name must not appear more than once in a single
4870 * mixed-content declaration.
4871 *
4872 * returns: the list of the xmlElementContentPtr describing the element choices
4873 */
4874xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004875xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004876 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004877 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004878
4879 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004880 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004881 SKIP(7);
4882 SKIP_BLANKS;
4883 SHRINK;
4884 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004885 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004886 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4887"Element content declaration doesn't start and stop in the same entity\n",
4888 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004889 }
Owen Taylor3473f882001-02-23 17:55:21 +00004890 NEXT;
4891 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4892 if (RAW == '*') {
4893 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4894 NEXT;
4895 }
4896 return(ret);
4897 }
4898 if ((RAW == '(') || (RAW == '|')) {
4899 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4900 if (ret == NULL) return(NULL);
4901 }
4902 while (RAW == '|') {
4903 NEXT;
4904 if (elem == NULL) {
4905 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4906 if (ret == NULL) return(NULL);
4907 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004908 if (cur != NULL)
4909 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004910 cur = ret;
4911 } else {
4912 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4913 if (n == NULL) return(NULL);
4914 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004915 if (n->c1 != NULL)
4916 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004917 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004918 if (n != NULL)
4919 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004920 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004921 }
4922 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004923 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004925 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004926 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004927 xmlFreeElementContent(cur);
4928 return(NULL);
4929 }
4930 SKIP_BLANKS;
4931 GROW;
4932 }
4933 if ((RAW == ')') && (NXT(1) == '*')) {
4934 if (elem != NULL) {
4935 cur->c2 = xmlNewElementContent(elem,
4936 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004937 if (cur->c2 != NULL)
4938 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004939 }
4940 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004941 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004942 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4943"Element content declaration doesn't start and stop in the same entity\n",
4944 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004945 }
Owen Taylor3473f882001-02-23 17:55:21 +00004946 SKIP(2);
4947 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004948 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004949 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004950 return(NULL);
4951 }
4952
4953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004954 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
4956 return(ret);
4957}
4958
4959/**
4960 * xmlParseElementChildrenContentDecl:
4961 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004962 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004963 *
4964 * parse the declaration for a Mixed Element content
4965 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4966 *
4967 *
4968 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4969 *
4970 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4971 *
4972 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4973 *
4974 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4975 *
4976 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4977 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004978 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004979 * opening or closing parentheses in a choice, seq, or Mixed
4980 * construct is contained in the replacement text for a parameter
4981 * entity, both must be contained in the same replacement text. For
4982 * interoperability, if a parameter-entity reference appears in a
4983 * choice, seq, or Mixed construct, its replacement text should not
4984 * be empty, and neither the first nor last non-blank character of
4985 * the replacement text should be a connector (| or ,).
4986 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004987 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004988 * hierarchy.
4989 */
4990xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004991xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004992 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004993 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004994 xmlChar type = 0;
4995
4996 SKIP_BLANKS;
4997 GROW;
4998 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004999 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005000
Owen Taylor3473f882001-02-23 17:55:21 +00005001 /* Recurse on first child */
5002 NEXT;
5003 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005004 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005005 SKIP_BLANKS;
5006 GROW;
5007 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005008 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005009 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005010 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 return(NULL);
5012 }
5013 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005014 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005015 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005016 return(NULL);
5017 }
Owen Taylor3473f882001-02-23 17:55:21 +00005018 GROW;
5019 if (RAW == '?') {
5020 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5021 NEXT;
5022 } else if (RAW == '*') {
5023 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5024 NEXT;
5025 } else if (RAW == '+') {
5026 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5027 NEXT;
5028 } else {
5029 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5030 }
Owen Taylor3473f882001-02-23 17:55:21 +00005031 GROW;
5032 }
5033 SKIP_BLANKS;
5034 SHRINK;
5035 while (RAW != ')') {
5036 /*
5037 * Each loop we parse one separator and one element.
5038 */
5039 if (RAW == ',') {
5040 if (type == 0) type = CUR;
5041
5042 /*
5043 * Detect "Name | Name , Name" error
5044 */
5045 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005046 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005047 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005048 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005049 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005050 xmlFreeElementContent(last);
5051 if (ret != NULL)
5052 xmlFreeElementContent(ret);
5053 return(NULL);
5054 }
5055 NEXT;
5056
5057 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5058 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005059 if ((last != NULL) && (last != ret))
5060 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005061 xmlFreeElementContent(ret);
5062 return(NULL);
5063 }
5064 if (last == NULL) {
5065 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005066 if (ret != NULL)
5067 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 ret = cur = op;
5069 } else {
5070 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005071 if (op != NULL)
5072 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005073 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005074 if (last != NULL)
5075 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005076 cur =op;
5077 last = NULL;
5078 }
5079 } else if (RAW == '|') {
5080 if (type == 0) type = CUR;
5081
5082 /*
5083 * Detect "Name , Name | Name" error
5084 */
5085 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005087 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005089 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005090 xmlFreeElementContent(last);
5091 if (ret != NULL)
5092 xmlFreeElementContent(ret);
5093 return(NULL);
5094 }
5095 NEXT;
5096
5097 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5098 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005099 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005100 xmlFreeElementContent(last);
5101 if (ret != NULL)
5102 xmlFreeElementContent(ret);
5103 return(NULL);
5104 }
5105 if (last == NULL) {
5106 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005107 if (ret != NULL)
5108 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005109 ret = cur = op;
5110 } else {
5111 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005112 if (op != NULL)
5113 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005114 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005115 if (last != NULL)
5116 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005117 cur =op;
5118 last = NULL;
5119 }
5120 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005121 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if (ret != NULL)
5123 xmlFreeElementContent(ret);
5124 return(NULL);
5125 }
5126 GROW;
5127 SKIP_BLANKS;
5128 GROW;
5129 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005130 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005131 /* Recurse on second child */
5132 NEXT;
5133 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005134 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005135 SKIP_BLANKS;
5136 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005137 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005138 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005139 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005140 if (ret != NULL)
5141 xmlFreeElementContent(ret);
5142 return(NULL);
5143 }
5144 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 if (RAW == '?') {
5146 last->ocur = XML_ELEMENT_CONTENT_OPT;
5147 NEXT;
5148 } else if (RAW == '*') {
5149 last->ocur = XML_ELEMENT_CONTENT_MULT;
5150 NEXT;
5151 } else if (RAW == '+') {
5152 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5153 NEXT;
5154 } else {
5155 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5156 }
5157 }
5158 SKIP_BLANKS;
5159 GROW;
5160 }
5161 if ((cur != NULL) && (last != NULL)) {
5162 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005163 if (last != NULL)
5164 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005165 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005166 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005167 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5168"Element content declaration doesn't start and stop in the same entity\n",
5169 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005170 }
Owen Taylor3473f882001-02-23 17:55:21 +00005171 NEXT;
5172 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005173 if (ret != NULL)
5174 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005175 NEXT;
5176 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005177 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005179 cur = ret;
5180 /*
5181 * Some normalization:
5182 * (a | b* | c?)* == (a | b | c)*
5183 */
5184 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5185 if ((cur->c1 != NULL) &&
5186 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5187 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5188 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5189 if ((cur->c2 != NULL) &&
5190 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5191 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5192 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5193 cur = cur->c2;
5194 }
5195 }
Owen Taylor3473f882001-02-23 17:55:21 +00005196 NEXT;
5197 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005198 if (ret != NULL) {
5199 int found = 0;
5200
Daniel Veillarde470df72001-04-18 21:41:07 +00005201 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005202 /*
5203 * Some normalization:
5204 * (a | b*)+ == (a | b)*
5205 * (a | b?)+ == (a | b)*
5206 */
5207 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5208 if ((cur->c1 != NULL) &&
5209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5212 found = 1;
5213 }
5214 if ((cur->c2 != NULL) &&
5215 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5216 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5217 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5218 found = 1;
5219 }
5220 cur = cur->c2;
5221 }
5222 if (found)
5223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5224 }
Owen Taylor3473f882001-02-23 17:55:21 +00005225 NEXT;
5226 }
5227 return(ret);
5228}
5229
5230/**
5231 * xmlParseElementContentDecl:
5232 * @ctxt: an XML parser context
5233 * @name: the name of the element being defined.
5234 * @result: the Element Content pointer will be stored here if any
5235 *
5236 * parse the declaration for an Element content either Mixed or Children,
5237 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5238 *
5239 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5240 *
5241 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5242 */
5243
5244int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005245xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005246 xmlElementContentPtr *result) {
5247
5248 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005249 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005250 int res;
5251
5252 *result = NULL;
5253
5254 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005255 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005256 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005257 return(-1);
5258 }
5259 NEXT;
5260 GROW;
5261 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005262 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005263 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 res = XML_ELEMENT_TYPE_MIXED;
5265 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005266 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005267 res = XML_ELEMENT_TYPE_ELEMENT;
5268 }
Owen Taylor3473f882001-02-23 17:55:21 +00005269 SKIP_BLANKS;
5270 *result = tree;
5271 return(res);
5272}
5273
5274/**
5275 * xmlParseElementDecl:
5276 * @ctxt: an XML parser context
5277 *
5278 * parse an Element declaration.
5279 *
5280 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5281 *
5282 * [ VC: Unique Element Type Declaration ]
5283 * No element type may be declared more than once
5284 *
5285 * Returns the type of the element, or -1 in case of error
5286 */
5287int
5288xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005289 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005290 int ret = -1;
5291 xmlElementContentPtr content = NULL;
5292
5293 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005294 if (memcmp(CUR_PTR, "<!ELEMENT", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005295 xmlParserInputPtr input = ctxt->input;
5296
5297 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005298 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5300 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005301 }
5302 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005303 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005305 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5306 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005307 return(-1);
5308 }
5309 while ((RAW == 0) && (ctxt->inputNr > 1))
5310 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005311 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005312 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5313 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005314 }
5315 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005316 if (memcmp(CUR_PTR, "EMPTY", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005317 SKIP(5);
5318 /*
5319 * Element must always be empty.
5320 */
5321 ret = XML_ELEMENT_TYPE_EMPTY;
5322 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5323 (NXT(2) == 'Y')) {
5324 SKIP(3);
5325 /*
5326 * Element is a generic container.
5327 */
5328 ret = XML_ELEMENT_TYPE_ANY;
5329 } else if (RAW == '(') {
5330 ret = xmlParseElementContentDecl(ctxt, name, &content);
5331 } else {
5332 /*
5333 * [ WFC: PEs in Internal Subset ] error handling.
5334 */
5335 if ((RAW == '%') && (ctxt->external == 0) &&
5336 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005337 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005338 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005339 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005340 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005341 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5342 }
Owen Taylor3473f882001-02-23 17:55:21 +00005343 return(-1);
5344 }
5345
5346 SKIP_BLANKS;
5347 /*
5348 * Pop-up of finished entities.
5349 */
5350 while ((RAW == 0) && (ctxt->inputNr > 1))
5351 xmlPopInput(ctxt);
5352 SKIP_BLANKS;
5353
5354 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005355 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005356 } else {
5357 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005358 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5359 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005360 }
5361
5362 NEXT;
5363 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5364 (ctxt->sax->elementDecl != NULL))
5365 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5366 content);
5367 }
5368 if (content != NULL) {
5369 xmlFreeElementContent(content);
5370 }
Owen Taylor3473f882001-02-23 17:55:21 +00005371 }
5372 return(ret);
5373}
5374
5375/**
Owen Taylor3473f882001-02-23 17:55:21 +00005376 * xmlParseConditionalSections
5377 * @ctxt: an XML parser context
5378 *
5379 * [61] conditionalSect ::= includeSect | ignoreSect
5380 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5381 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5382 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5383 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5384 */
5385
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005386static void
Owen Taylor3473f882001-02-23 17:55:21 +00005387xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5388 SKIP(3);
5389 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005390 if (memcmp(CUR_PTR, "INCLUDE", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005391 SKIP(7);
5392 SKIP_BLANKS;
5393 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 } else {
5396 NEXT;
5397 }
5398 if (xmlParserDebugEntities) {
5399 if ((ctxt->input != NULL) && (ctxt->input->filename))
5400 xmlGenericError(xmlGenericErrorContext,
5401 "%s(%d): ", ctxt->input->filename,
5402 ctxt->input->line);
5403 xmlGenericError(xmlGenericErrorContext,
5404 "Entering INCLUDE Conditional Section\n");
5405 }
5406
5407 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5408 (NXT(2) != '>'))) {
5409 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005410 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005411
5412 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5413 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005414 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005415 NEXT;
5416 } else if (RAW == '%') {
5417 xmlParsePEReference(ctxt);
5418 } else
5419 xmlParseMarkupDecl(ctxt);
5420
5421 /*
5422 * Pop-up of finished entities.
5423 */
5424 while ((RAW == 0) && (ctxt->inputNr > 1))
5425 xmlPopInput(ctxt);
5426
Daniel Veillardfdc91562002-07-01 21:52:03 +00005427 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005428 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 break;
5430 }
5431 }
5432 if (xmlParserDebugEntities) {
5433 if ((ctxt->input != NULL) && (ctxt->input->filename))
5434 xmlGenericError(xmlGenericErrorContext,
5435 "%s(%d): ", ctxt->input->filename,
5436 ctxt->input->line);
5437 xmlGenericError(xmlGenericErrorContext,
5438 "Leaving INCLUDE Conditional Section\n");
5439 }
5440
Daniel Veillard8f597c32003-10-06 08:19:27 +00005441 } else if (memcmp(CUR_PTR, "IGNORE", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005442 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005443 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005444 int depth = 0;
5445
5446 SKIP(6);
5447 SKIP_BLANKS;
5448 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005449 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005450 } else {
5451 NEXT;
5452 }
5453 if (xmlParserDebugEntities) {
5454 if ((ctxt->input != NULL) && (ctxt->input->filename))
5455 xmlGenericError(xmlGenericErrorContext,
5456 "%s(%d): ", ctxt->input->filename,
5457 ctxt->input->line);
5458 xmlGenericError(xmlGenericErrorContext,
5459 "Entering IGNORE Conditional Section\n");
5460 }
5461
5462 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005463 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005464 * But disable SAX event generating DTD building in the meantime
5465 */
5466 state = ctxt->disableSAX;
5467 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005469 ctxt->instate = XML_PARSER_IGNORE;
5470
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005471 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005472 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5473 depth++;
5474 SKIP(3);
5475 continue;
5476 }
5477 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5478 if (--depth >= 0) SKIP(3);
5479 continue;
5480 }
5481 NEXT;
5482 continue;
5483 }
5484
5485 ctxt->disableSAX = state;
5486 ctxt->instate = instate;
5487
5488 if (xmlParserDebugEntities) {
5489 if ((ctxt->input != NULL) && (ctxt->input->filename))
5490 xmlGenericError(xmlGenericErrorContext,
5491 "%s(%d): ", ctxt->input->filename,
5492 ctxt->input->line);
5493 xmlGenericError(xmlGenericErrorContext,
5494 "Leaving IGNORE Conditional Section\n");
5495 }
5496
5497 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005498 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005499 }
5500
5501 if (RAW == 0)
5502 SHRINK;
5503
5504 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005505 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005506 } else {
5507 SKIP(3);
5508 }
5509}
5510
5511/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005512 * xmlParseMarkupDecl:
5513 * @ctxt: an XML parser context
5514 *
5515 * parse Markup declarations
5516 *
5517 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5518 * NotationDecl | PI | Comment
5519 *
5520 * [ VC: Proper Declaration/PE Nesting ]
5521 * Parameter-entity replacement text must be properly nested with
5522 * markup declarations. That is to say, if either the first character
5523 * or the last character of a markup declaration (markupdecl above) is
5524 * contained in the replacement text for a parameter-entity reference,
5525 * both must be contained in the same replacement text.
5526 *
5527 * [ WFC: PEs in Internal Subset ]
5528 * In the internal DTD subset, parameter-entity references can occur
5529 * only where markup declarations can occur, not within markup declarations.
5530 * (This does not apply to references that occur in external parameter
5531 * entities or to the external subset.)
5532 */
5533void
5534xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5535 GROW;
5536 xmlParseElementDecl(ctxt);
5537 xmlParseAttributeListDecl(ctxt);
5538 xmlParseEntityDecl(ctxt);
5539 xmlParseNotationDecl(ctxt);
5540 xmlParsePI(ctxt);
5541 xmlParseComment(ctxt);
5542 /*
5543 * This is only for internal subset. On external entities,
5544 * the replacement is done before parsing stage
5545 */
5546 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5547 xmlParsePEReference(ctxt);
5548
5549 /*
5550 * Conditional sections are allowed from entities included
5551 * by PE References in the internal subset.
5552 */
5553 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5554 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5555 xmlParseConditionalSections(ctxt);
5556 }
5557 }
5558
5559 ctxt->instate = XML_PARSER_DTD;
5560}
5561
5562/**
5563 * xmlParseTextDecl:
5564 * @ctxt: an XML parser context
5565 *
5566 * parse an XML declaration header for external entities
5567 *
5568 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5569 *
5570 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5571 */
5572
5573void
5574xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5575 xmlChar *version;
5576
5577 /*
5578 * We know that '<?xml' is here.
5579 */
William M. Brack76e95df2003-10-18 16:20:14 +00005580 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005581 SKIP(5);
5582 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005583 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005584 return;
5585 }
5586
William M. Brack76e95df2003-10-18 16:20:14 +00005587 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005588 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005590 }
5591 SKIP_BLANKS;
5592
5593 /*
5594 * We may have the VersionInfo here.
5595 */
5596 version = xmlParseVersionInfo(ctxt);
5597 if (version == NULL)
5598 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005599 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005600 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005603 }
5604 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005605 ctxt->input->version = version;
5606
5607 /*
5608 * We must have the encoding declaration
5609 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005610 xmlParseEncodingDecl(ctxt);
5611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5612 /*
5613 * The XML REC instructs us to stop parsing right here
5614 */
5615 return;
5616 }
5617
5618 SKIP_BLANKS;
5619 if ((RAW == '?') && (NXT(1) == '>')) {
5620 SKIP(2);
5621 } else if (RAW == '>') {
5622 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005623 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005624 NEXT;
5625 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005626 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005627 MOVETO_ENDTAG(CUR_PTR);
5628 NEXT;
5629 }
5630}
5631
5632/**
Owen Taylor3473f882001-02-23 17:55:21 +00005633 * xmlParseExternalSubset:
5634 * @ctxt: an XML parser context
5635 * @ExternalID: the external identifier
5636 * @SystemID: the system identifier (or URL)
5637 *
5638 * parse Markup declarations from an external subset
5639 *
5640 * [30] extSubset ::= textDecl? extSubsetDecl
5641 *
5642 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5643 */
5644void
5645xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5646 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005647 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005648 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005649 if (memcmp(CUR_PTR, "<?xml", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005650 xmlParseTextDecl(ctxt);
5651 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5652 /*
5653 * The XML REC instructs us to stop parsing right here
5654 */
5655 ctxt->instate = XML_PARSER_EOF;
5656 return;
5657 }
5658 }
5659 if (ctxt->myDoc == NULL) {
5660 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5661 }
5662 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5663 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5664
5665 ctxt->instate = XML_PARSER_DTD;
5666 ctxt->external = 1;
5667 while (((RAW == '<') && (NXT(1) == '?')) ||
5668 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005669 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005671 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005672
5673 GROW;
5674 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5675 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005676 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005677 NEXT;
5678 } else if (RAW == '%') {
5679 xmlParsePEReference(ctxt);
5680 } else
5681 xmlParseMarkupDecl(ctxt);
5682
5683 /*
5684 * Pop-up of finished entities.
5685 */
5686 while ((RAW == 0) && (ctxt->inputNr > 1))
5687 xmlPopInput(ctxt);
5688
Daniel Veillardfdc91562002-07-01 21:52:03 +00005689 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005690 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005691 break;
5692 }
5693 }
5694
5695 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005696 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005697 }
5698
5699}
5700
5701/**
5702 * xmlParseReference:
5703 * @ctxt: an XML parser context
5704 *
5705 * parse and handle entity references in content, depending on the SAX
5706 * interface, this may end-up in a call to character() if this is a
5707 * CharRef, a predefined entity, if there is no reference() callback.
5708 * or if the parser was asked to switch to that mode.
5709 *
5710 * [67] Reference ::= EntityRef | CharRef
5711 */
5712void
5713xmlParseReference(xmlParserCtxtPtr ctxt) {
5714 xmlEntityPtr ent;
5715 xmlChar *val;
5716 if (RAW != '&') return;
5717
5718 if (NXT(1) == '#') {
5719 int i = 0;
5720 xmlChar out[10];
5721 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005722 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005723
5724 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5725 /*
5726 * So we are using non-UTF-8 buffers
5727 * Check that the char fit on 8bits, if not
5728 * generate a CharRef.
5729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005730 if (value <= 0xFF) {
5731 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005732 out[1] = 0;
5733 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5734 (!ctxt->disableSAX))
5735 ctxt->sax->characters(ctxt->userData, out, 1);
5736 } else {
5737 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005738 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005740 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005741 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5742 (!ctxt->disableSAX))
5743 ctxt->sax->reference(ctxt->userData, out);
5744 }
5745 } else {
5746 /*
5747 * Just encode the value in UTF-8
5748 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005749 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 out[i] = 0;
5751 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5752 (!ctxt->disableSAX))
5753 ctxt->sax->characters(ctxt->userData, out, i);
5754 }
5755 } else {
5756 ent = xmlParseEntityRef(ctxt);
5757 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005758 if (!ctxt->wellFormed)
5759 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005760 if ((ent->name != NULL) &&
5761 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5762 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005763 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005764
5765
5766 /*
5767 * The first reference to the entity trigger a parsing phase
5768 * where the ent->children is filled with the result from
5769 * the parsing.
5770 */
5771 if (ent->children == NULL) {
5772 xmlChar *value;
5773 value = ent->content;
5774
5775 /*
5776 * Check that this entity is well formed
5777 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005778 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005779 (value[1] == 0) && (value[0] == '<') &&
5780 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5781 /*
5782 * DONE: get definite answer on this !!!
5783 * Lots of entity decls are used to declare a single
5784 * char
5785 * <!ENTITY lt "<">
5786 * Which seems to be valid since
5787 * 2.4: The ampersand character (&) and the left angle
5788 * bracket (<) may appear in their literal form only
5789 * when used ... They are also legal within the literal
5790 * entity value of an internal entity declaration;i
5791 * see "4.3.2 Well-Formed Parsed Entities".
5792 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5793 * Looking at the OASIS test suite and James Clark
5794 * tests, this is broken. However the XML REC uses
5795 * it. Is the XML REC not well-formed ????
5796 * This is a hack to avoid this problem
5797 *
5798 * ANSWER: since lt gt amp .. are already defined,
5799 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005800 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005801 * is lousy but acceptable.
5802 */
5803 list = xmlNewDocText(ctxt->myDoc, value);
5804 if (list != NULL) {
5805 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5806 (ent->children == NULL)) {
5807 ent->children = list;
5808 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005809 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005810 list->parent = (xmlNodePtr) ent;
5811 } else {
5812 xmlFreeNodeList(list);
5813 }
5814 } else if (list != NULL) {
5815 xmlFreeNodeList(list);
5816 }
5817 } else {
5818 /*
5819 * 4.3.2: An internal general parsed entity is well-formed
5820 * if its replacement text matches the production labeled
5821 * content.
5822 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005823
5824 void *user_data;
5825 /*
5826 * This is a bit hackish but this seems the best
5827 * way to make sure both SAX and DOM entity support
5828 * behaves okay.
5829 */
5830 if (ctxt->userData == ctxt)
5831 user_data = NULL;
5832 else
5833 user_data = ctxt->userData;
5834
Owen Taylor3473f882001-02-23 17:55:21 +00005835 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5836 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005837 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5838 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 ctxt->depth--;
5840 } else if (ent->etype ==
5841 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5842 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005843 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005844 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005845 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 ctxt->depth--;
5847 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005848 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005849 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5850 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 }
5852 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005853 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005854 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005855 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005856 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5857 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005858 (ent->children == NULL)) {
5859 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005860 if (ctxt->replaceEntities) {
5861 /*
5862 * Prune it directly in the generated document
5863 * except for single text nodes.
5864 */
5865 if ((list->type == XML_TEXT_NODE) &&
5866 (list->next == NULL)) {
5867 list->parent = (xmlNodePtr) ent;
5868 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005869 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005870 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005871 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005872 while (list != NULL) {
5873 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005874 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005875 if (list->next == NULL)
5876 ent->last = list;
5877 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005878 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005879 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005880#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005881 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5882 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005883#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005884 }
5885 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005886 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005887 while (list != NULL) {
5888 list->parent = (xmlNodePtr) ent;
5889 if (list->next == NULL)
5890 ent->last = list;
5891 list = list->next;
5892 }
Owen Taylor3473f882001-02-23 17:55:21 +00005893 }
5894 } else {
5895 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005896 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005897 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005898 } else if ((ret != XML_ERR_OK) &&
5899 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005900 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005901 } else if (list != NULL) {
5902 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005903 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005904 }
5905 }
5906 }
5907 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5908 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5909 /*
5910 * Create a node.
5911 */
5912 ctxt->sax->reference(ctxt->userData, ent->name);
5913 return;
5914 } else if (ctxt->replaceEntities) {
5915 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5916 /*
5917 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005918 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005919 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005920 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005921 if ((list == NULL) && (ent->owner == 0)) {
5922 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005923 cur = ent->children;
5924 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005925 nw = xmlCopyNode(cur, 1);
5926 if (nw != NULL) {
5927 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005928 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005929 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005930 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005931 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005932 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005933 if (cur == ent->last)
5934 break;
5935 cur = cur->next;
5936 }
Daniel Veillard81273902003-09-30 00:43:48 +00005937#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005938 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005939 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005940#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005941 } else if (list == NULL) {
5942 xmlNodePtr nw = NULL, cur, next, last,
5943 firstChild = NULL;
5944 /*
5945 * Copy the entity child list and make it the new
5946 * entity child list. The goal is to make sure any
5947 * ID or REF referenced will be the one from the
5948 * document content and not the entity copy.
5949 */
5950 cur = ent->children;
5951 ent->children = NULL;
5952 last = ent->last;
5953 ent->last = NULL;
5954 while (cur != NULL) {
5955 next = cur->next;
5956 cur->next = NULL;
5957 cur->parent = NULL;
5958 nw = xmlCopyNode(cur, 1);
5959 if (nw != NULL) {
5960 nw->_private = cur->_private;
5961 if (firstChild == NULL){
5962 firstChild = cur;
5963 }
5964 xmlAddChild((xmlNodePtr) ent, nw);
5965 xmlAddChild(ctxt->node, cur);
5966 }
5967 if (cur == last)
5968 break;
5969 cur = next;
5970 }
5971 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005972#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005973 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5974 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005975#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005976 } else {
5977 /*
5978 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005979 * node with a possible previous text one which
5980 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005981 */
5982 if (ent->children->type == XML_TEXT_NODE)
5983 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5984 if ((ent->last != ent->children) &&
5985 (ent->last->type == XML_TEXT_NODE))
5986 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5987 xmlAddChildList(ctxt->node, ent->children);
5988 }
5989
Owen Taylor3473f882001-02-23 17:55:21 +00005990 /*
5991 * This is to avoid a nasty side effect, see
5992 * characters() in SAX.c
5993 */
5994 ctxt->nodemem = 0;
5995 ctxt->nodelen = 0;
5996 return;
5997 } else {
5998 /*
5999 * Probably running in SAX mode
6000 */
6001 xmlParserInputPtr input;
6002
6003 input = xmlNewEntityInputStream(ctxt, ent);
6004 xmlPushInput(ctxt, input);
6005 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006006 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
6007 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006008 xmlParseTextDecl(ctxt);
6009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6010 /*
6011 * The XML REC instructs us to stop parsing right here
6012 */
6013 ctxt->instate = XML_PARSER_EOF;
6014 return;
6015 }
6016 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006017 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6018 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006019 }
6020 }
6021 return;
6022 }
6023 }
6024 } else {
6025 val = ent->content;
6026 if (val == NULL) return;
6027 /*
6028 * inline the entity.
6029 */
6030 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6031 (!ctxt->disableSAX))
6032 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6033 }
6034 }
6035}
6036
6037/**
6038 * xmlParseEntityRef:
6039 * @ctxt: an XML parser context
6040 *
6041 * parse ENTITY references declarations
6042 *
6043 * [68] EntityRef ::= '&' Name ';'
6044 *
6045 * [ WFC: Entity Declared ]
6046 * In a document without any DTD, a document with only an internal DTD
6047 * subset which contains no parameter entity references, or a document
6048 * with "standalone='yes'", the Name given in the entity reference
6049 * must match that in an entity declaration, except that well-formed
6050 * documents need not declare any of the following entities: amp, lt,
6051 * gt, apos, quot. The declaration of a parameter entity must precede
6052 * any reference to it. Similarly, the declaration of a general entity
6053 * must precede any reference to it which appears in a default value in an
6054 * attribute-list declaration. Note that if entities are declared in the
6055 * external subset or in external parameter entities, a non-validating
6056 * processor is not obligated to read and process their declarations;
6057 * for such documents, the rule that an entity must be declared is a
6058 * well-formedness constraint only if standalone='yes'.
6059 *
6060 * [ WFC: Parsed Entity ]
6061 * An entity reference must not contain the name of an unparsed entity
6062 *
6063 * Returns the xmlEntityPtr if found, or NULL otherwise.
6064 */
6065xmlEntityPtr
6066xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006067 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006068 xmlEntityPtr ent = NULL;
6069
6070 GROW;
6071
6072 if (RAW == '&') {
6073 NEXT;
6074 name = xmlParseName(ctxt);
6075 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006076 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6077 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006078 } else {
6079 if (RAW == ';') {
6080 NEXT;
6081 /*
6082 * Ask first SAX for entity resolution, otherwise try the
6083 * predefined set.
6084 */
6085 if (ctxt->sax != NULL) {
6086 if (ctxt->sax->getEntity != NULL)
6087 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006088 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006089 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006090 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6091 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006092 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006093 }
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
6095 /*
6096 * [ WFC: Entity Declared ]
6097 * In a document without any DTD, a document with only an
6098 * internal DTD subset which contains no parameter entity
6099 * references, or a document with "standalone='yes'", the
6100 * Name given in the entity reference must match that in an
6101 * entity declaration, except that well-formed documents
6102 * need not declare any of the following entities: amp, lt,
6103 * gt, apos, quot.
6104 * The declaration of a parameter entity must precede any
6105 * reference to it.
6106 * Similarly, the declaration of a general entity must
6107 * precede any reference to it which appears in a default
6108 * value in an attribute-list declaration. Note that if
6109 * entities are declared in the external subset or in
6110 * external parameter entities, a non-validating processor
6111 * is not obligated to read and process their declarations;
6112 * for such documents, the rule that an entity must be
6113 * declared is a well-formedness constraint only if
6114 * standalone='yes'.
6115 */
6116 if (ent == NULL) {
6117 if ((ctxt->standalone == 1) ||
6118 ((ctxt->hasExternalSubset == 0) &&
6119 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006120 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006121 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006122 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006123 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006124 "Entity '%s' not defined\n", name);
6125 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006126 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 }
6128
6129 /*
6130 * [ WFC: Parsed Entity ]
6131 * An entity reference must not contain the name of an
6132 * unparsed entity
6133 */
6134 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006135 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006136 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006137 }
6138
6139 /*
6140 * [ WFC: No External Entity References ]
6141 * Attribute values cannot contain direct or indirect
6142 * entity references to external entities.
6143 */
6144 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006146 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6147 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 }
6149 /*
6150 * [ WFC: No < in Attribute Values ]
6151 * The replacement text of any entity referred to directly or
6152 * indirectly in an attribute value (other than "&lt;") must
6153 * not contain a <.
6154 */
6155 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6156 (ent != NULL) &&
6157 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6158 (ent->content != NULL) &&
6159 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006160 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006161 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006162 }
6163
6164 /*
6165 * Internal check, no parameter entities here ...
6166 */
6167 else {
6168 switch (ent->etype) {
6169 case XML_INTERNAL_PARAMETER_ENTITY:
6170 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006171 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6172 "Attempt to reference the parameter entity '%s'\n",
6173 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006174 break;
6175 default:
6176 break;
6177 }
6178 }
6179
6180 /*
6181 * [ WFC: No Recursion ]
6182 * A parsed entity must not contain a recursive reference
6183 * to itself, either directly or indirectly.
6184 * Done somewhere else
6185 */
6186
6187 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006188 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006189 }
Owen Taylor3473f882001-02-23 17:55:21 +00006190 }
6191 }
6192 return(ent);
6193}
6194
6195/**
6196 * xmlParseStringEntityRef:
6197 * @ctxt: an XML parser context
6198 * @str: a pointer to an index in the string
6199 *
6200 * parse ENTITY references declarations, but this version parses it from
6201 * a string value.
6202 *
6203 * [68] EntityRef ::= '&' Name ';'
6204 *
6205 * [ WFC: Entity Declared ]
6206 * In a document without any DTD, a document with only an internal DTD
6207 * subset which contains no parameter entity references, or a document
6208 * with "standalone='yes'", the Name given in the entity reference
6209 * must match that in an entity declaration, except that well-formed
6210 * documents need not declare any of the following entities: amp, lt,
6211 * gt, apos, quot. The declaration of a parameter entity must precede
6212 * any reference to it. Similarly, the declaration of a general entity
6213 * must precede any reference to it which appears in a default value in an
6214 * attribute-list declaration. Note that if entities are declared in the
6215 * external subset or in external parameter entities, a non-validating
6216 * processor is not obligated to read and process their declarations;
6217 * for such documents, the rule that an entity must be declared is a
6218 * well-formedness constraint only if standalone='yes'.
6219 *
6220 * [ WFC: Parsed Entity ]
6221 * An entity reference must not contain the name of an unparsed entity
6222 *
6223 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6224 * is updated to the current location in the string.
6225 */
6226xmlEntityPtr
6227xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6228 xmlChar *name;
6229 const xmlChar *ptr;
6230 xmlChar cur;
6231 xmlEntityPtr ent = NULL;
6232
6233 if ((str == NULL) || (*str == NULL))
6234 return(NULL);
6235 ptr = *str;
6236 cur = *ptr;
6237 if (cur == '&') {
6238 ptr++;
6239 cur = *ptr;
6240 name = xmlParseStringName(ctxt, &ptr);
6241 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006242 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6243 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006244 } else {
6245 if (*ptr == ';') {
6246 ptr++;
6247 /*
6248 * Ask first SAX for entity resolution, otherwise try the
6249 * predefined set.
6250 */
6251 if (ctxt->sax != NULL) {
6252 if (ctxt->sax->getEntity != NULL)
6253 ent = ctxt->sax->getEntity(ctxt->userData, name);
6254 if (ent == NULL)
6255 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006256 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006257 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006258 }
Owen Taylor3473f882001-02-23 17:55:21 +00006259 }
6260 /*
6261 * [ WFC: Entity Declared ]
6262 * In a document without any DTD, a document with only an
6263 * internal DTD subset which contains no parameter entity
6264 * references, or a document with "standalone='yes'", the
6265 * Name given in the entity reference must match that in an
6266 * entity declaration, except that well-formed documents
6267 * need not declare any of the following entities: amp, lt,
6268 * gt, apos, quot.
6269 * The declaration of a parameter entity must precede any
6270 * reference to it.
6271 * Similarly, the declaration of a general entity must
6272 * precede any reference to it which appears in a default
6273 * value in an attribute-list declaration. Note that if
6274 * entities are declared in the external subset or in
6275 * external parameter entities, a non-validating processor
6276 * is not obligated to read and process their declarations;
6277 * for such documents, the rule that an entity must be
6278 * declared is a well-formedness constraint only if
6279 * standalone='yes'.
6280 */
6281 if (ent == NULL) {
6282 if ((ctxt->standalone == 1) ||
6283 ((ctxt->hasExternalSubset == 0) &&
6284 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006285 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006286 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006287 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006288 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006289 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006290 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006291 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006292 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006293 }
6294
6295 /*
6296 * [ WFC: Parsed Entity ]
6297 * An entity reference must not contain the name of an
6298 * unparsed entity
6299 */
6300 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006301 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006302 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 }
6304
6305 /*
6306 * [ WFC: No External Entity References ]
6307 * Attribute values cannot contain direct or indirect
6308 * entity references to external entities.
6309 */
6310 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6311 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006312 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006313 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006314 }
6315 /*
6316 * [ WFC: No < in Attribute Values ]
6317 * The replacement text of any entity referred to directly or
6318 * indirectly in an attribute value (other than "&lt;") must
6319 * not contain a <.
6320 */
6321 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6322 (ent != NULL) &&
6323 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6324 (ent->content != NULL) &&
6325 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006326 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6327 "'<' in entity '%s' is not allowed in attributes values\n",
6328 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006329 }
6330
6331 /*
6332 * Internal check, no parameter entities here ...
6333 */
6334 else {
6335 switch (ent->etype) {
6336 case XML_INTERNAL_PARAMETER_ENTITY:
6337 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006338 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6339 "Attempt to reference the parameter entity '%s'\n",
6340 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006341 break;
6342 default:
6343 break;
6344 }
6345 }
6346
6347 /*
6348 * [ WFC: No Recursion ]
6349 * A parsed entity must not contain a recursive reference
6350 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006351 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006352 */
6353
6354 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006355 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006356 }
6357 xmlFree(name);
6358 }
6359 }
6360 *str = ptr;
6361 return(ent);
6362}
6363
6364/**
6365 * xmlParsePEReference:
6366 * @ctxt: an XML parser context
6367 *
6368 * parse PEReference declarations
6369 * The entity content is handled directly by pushing it's content as
6370 * a new input stream.
6371 *
6372 * [69] PEReference ::= '%' Name ';'
6373 *
6374 * [ WFC: No Recursion ]
6375 * A parsed entity must not contain a recursive
6376 * reference to itself, either directly or indirectly.
6377 *
6378 * [ WFC: Entity Declared ]
6379 * In a document without any DTD, a document with only an internal DTD
6380 * subset which contains no parameter entity references, or a document
6381 * with "standalone='yes'", ... ... The declaration of a parameter
6382 * entity must precede any reference to it...
6383 *
6384 * [ VC: Entity Declared ]
6385 * In a document with an external subset or external parameter entities
6386 * with "standalone='no'", ... ... The declaration of a parameter entity
6387 * must precede any reference to it...
6388 *
6389 * [ WFC: In DTD ]
6390 * Parameter-entity references may only appear in the DTD.
6391 * NOTE: misleading but this is handled.
6392 */
6393void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006394xmlParsePEReference(xmlParserCtxtPtr ctxt)
6395{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006396 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006397 xmlEntityPtr entity = NULL;
6398 xmlParserInputPtr input;
6399
6400 if (RAW == '%') {
6401 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006402 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006403 if (name == NULL) {
6404 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6405 "xmlParsePEReference: no name\n");
6406 } else {
6407 if (RAW == ';') {
6408 NEXT;
6409 if ((ctxt->sax != NULL) &&
6410 (ctxt->sax->getParameterEntity != NULL))
6411 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6412 name);
6413 if (entity == NULL) {
6414 /*
6415 * [ WFC: Entity Declared ]
6416 * In a document without any DTD, a document with only an
6417 * internal DTD subset which contains no parameter entity
6418 * references, or a document with "standalone='yes'", ...
6419 * ... The declaration of a parameter entity must precede
6420 * any reference to it...
6421 */
6422 if ((ctxt->standalone == 1) ||
6423 ((ctxt->hasExternalSubset == 0) &&
6424 (ctxt->hasPErefs == 0))) {
6425 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6426 "PEReference: %%%s; not found\n",
6427 name);
6428 } else {
6429 /*
6430 * [ VC: Entity Declared ]
6431 * In a document with an external subset or external
6432 * parameter entities with "standalone='no'", ...
6433 * ... The declaration of a parameter entity must
6434 * precede any reference to it...
6435 */
6436 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6437 "PEReference: %%%s; not found\n",
6438 name, NULL);
6439 ctxt->valid = 0;
6440 }
6441 } else {
6442 /*
6443 * Internal checking in case the entity quest barfed
6444 */
6445 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6446 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6447 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6448 "Internal: %%%s; is not a parameter entity\n",
6449 name, NULL);
6450 } else if (ctxt->input->free != deallocblankswrapper) {
6451 input =
6452 xmlNewBlanksWrapperInputStream(ctxt, entity);
6453 xmlPushInput(ctxt, input);
6454 } else {
6455 /*
6456 * TODO !!!
6457 * handle the extra spaces added before and after
6458 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6459 */
6460 input = xmlNewEntityInputStream(ctxt, entity);
6461 xmlPushInput(ctxt, input);
6462 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6463 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006464 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006465 xmlParseTextDecl(ctxt);
6466 if (ctxt->errNo ==
6467 XML_ERR_UNSUPPORTED_ENCODING) {
6468 /*
6469 * The XML REC instructs us to stop parsing
6470 * right here
6471 */
6472 ctxt->instate = XML_PARSER_EOF;
6473 return;
6474 }
6475 }
6476 }
6477 }
6478 ctxt->hasPErefs = 1;
6479 } else {
6480 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6481 }
6482 }
Owen Taylor3473f882001-02-23 17:55:21 +00006483 }
6484}
6485
6486/**
6487 * xmlParseStringPEReference:
6488 * @ctxt: an XML parser context
6489 * @str: a pointer to an index in the string
6490 *
6491 * parse PEReference declarations
6492 *
6493 * [69] PEReference ::= '%' Name ';'
6494 *
6495 * [ WFC: No Recursion ]
6496 * A parsed entity must not contain a recursive
6497 * reference to itself, either directly or indirectly.
6498 *
6499 * [ WFC: Entity Declared ]
6500 * In a document without any DTD, a document with only an internal DTD
6501 * subset which contains no parameter entity references, or a document
6502 * with "standalone='yes'", ... ... The declaration of a parameter
6503 * entity must precede any reference to it...
6504 *
6505 * [ VC: Entity Declared ]
6506 * In a document with an external subset or external parameter entities
6507 * with "standalone='no'", ... ... The declaration of a parameter entity
6508 * must precede any reference to it...
6509 *
6510 * [ WFC: In DTD ]
6511 * Parameter-entity references may only appear in the DTD.
6512 * NOTE: misleading but this is handled.
6513 *
6514 * Returns the string of the entity content.
6515 * str is updated to the current value of the index
6516 */
6517xmlEntityPtr
6518xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6519 const xmlChar *ptr;
6520 xmlChar cur;
6521 xmlChar *name;
6522 xmlEntityPtr entity = NULL;
6523
6524 if ((str == NULL) || (*str == NULL)) return(NULL);
6525 ptr = *str;
6526 cur = *ptr;
6527 if (cur == '%') {
6528 ptr++;
6529 cur = *ptr;
6530 name = xmlParseStringName(ctxt, &ptr);
6531 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006532 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6533 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006534 } else {
6535 cur = *ptr;
6536 if (cur == ';') {
6537 ptr++;
6538 cur = *ptr;
6539 if ((ctxt->sax != NULL) &&
6540 (ctxt->sax->getParameterEntity != NULL))
6541 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6542 name);
6543 if (entity == NULL) {
6544 /*
6545 * [ WFC: Entity Declared ]
6546 * In a document without any DTD, a document with only an
6547 * internal DTD subset which contains no parameter entity
6548 * references, or a document with "standalone='yes'", ...
6549 * ... The declaration of a parameter entity must precede
6550 * any reference to it...
6551 */
6552 if ((ctxt->standalone == 1) ||
6553 ((ctxt->hasExternalSubset == 0) &&
6554 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006555 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006556 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006557 } else {
6558 /*
6559 * [ VC: Entity Declared ]
6560 * In a document with an external subset or external
6561 * parameter entities with "standalone='no'", ...
6562 * ... The declaration of a parameter entity must
6563 * precede any reference to it...
6564 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006565 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6566 "PEReference: %%%s; not found\n",
6567 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 ctxt->valid = 0;
6569 }
6570 } else {
6571 /*
6572 * Internal checking in case the entity quest barfed
6573 */
6574 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6575 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006576 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6577 "%%%s; is not a parameter entity\n",
6578 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580 }
6581 ctxt->hasPErefs = 1;
6582 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006583 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006584 }
6585 xmlFree(name);
6586 }
6587 }
6588 *str = ptr;
6589 return(entity);
6590}
6591
6592/**
6593 * xmlParseDocTypeDecl:
6594 * @ctxt: an XML parser context
6595 *
6596 * parse a DOCTYPE declaration
6597 *
6598 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6599 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6600 *
6601 * [ VC: Root Element Type ]
6602 * The Name in the document type declaration must match the element
6603 * type of the root element.
6604 */
6605
6606void
6607xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006608 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006609 xmlChar *ExternalID = NULL;
6610 xmlChar *URI = NULL;
6611
6612 /*
6613 * We know that '<!DOCTYPE' has been detected.
6614 */
6615 SKIP(9);
6616
6617 SKIP_BLANKS;
6618
6619 /*
6620 * Parse the DOCTYPE name.
6621 */
6622 name = xmlParseName(ctxt);
6623 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006624 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6625 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006626 }
6627 ctxt->intSubName = name;
6628
6629 SKIP_BLANKS;
6630
6631 /*
6632 * Check for SystemID and ExternalID
6633 */
6634 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6635
6636 if ((URI != NULL) || (ExternalID != NULL)) {
6637 ctxt->hasExternalSubset = 1;
6638 }
6639 ctxt->extSubURI = URI;
6640 ctxt->extSubSystem = ExternalID;
6641
6642 SKIP_BLANKS;
6643
6644 /*
6645 * Create and update the internal subset.
6646 */
6647 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6648 (!ctxt->disableSAX))
6649 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6650
6651 /*
6652 * Is there any internal subset declarations ?
6653 * they are handled separately in xmlParseInternalSubset()
6654 */
6655 if (RAW == '[')
6656 return;
6657
6658 /*
6659 * We should be at the end of the DOCTYPE declaration.
6660 */
6661 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006662 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664 NEXT;
6665}
6666
6667/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006668 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006669 * @ctxt: an XML parser context
6670 *
6671 * parse the internal subset declaration
6672 *
6673 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6674 */
6675
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006676static void
Owen Taylor3473f882001-02-23 17:55:21 +00006677xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6678 /*
6679 * Is there any DTD definition ?
6680 */
6681 if (RAW == '[') {
6682 ctxt->instate = XML_PARSER_DTD;
6683 NEXT;
6684 /*
6685 * Parse the succession of Markup declarations and
6686 * PEReferences.
6687 * Subsequence (markupdecl | PEReference | S)*
6688 */
6689 while (RAW != ']') {
6690 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006691 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006692
6693 SKIP_BLANKS;
6694 xmlParseMarkupDecl(ctxt);
6695 xmlParsePEReference(ctxt);
6696
6697 /*
6698 * Pop-up of finished entities.
6699 */
6700 while ((RAW == 0) && (ctxt->inputNr > 1))
6701 xmlPopInput(ctxt);
6702
6703 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006704 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006705 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006706 break;
6707 }
6708 }
6709 if (RAW == ']') {
6710 NEXT;
6711 SKIP_BLANKS;
6712 }
6713 }
6714
6715 /*
6716 * We should be at the end of the DOCTYPE declaration.
6717 */
6718 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006719 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006720 }
6721 NEXT;
6722}
6723
Daniel Veillard81273902003-09-30 00:43:48 +00006724#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006725/**
6726 * xmlParseAttribute:
6727 * @ctxt: an XML parser context
6728 * @value: a xmlChar ** used to store the value of the attribute
6729 *
6730 * parse an attribute
6731 *
6732 * [41] Attribute ::= Name Eq AttValue
6733 *
6734 * [ WFC: No External Entity References ]
6735 * Attribute values cannot contain direct or indirect entity references
6736 * to external entities.
6737 *
6738 * [ WFC: No < in Attribute Values ]
6739 * The replacement text of any entity referred to directly or indirectly in
6740 * an attribute value (other than "&lt;") must not contain a <.
6741 *
6742 * [ VC: Attribute Value Type ]
6743 * The attribute must have been declared; the value must be of the type
6744 * declared for it.
6745 *
6746 * [25] Eq ::= S? '=' S?
6747 *
6748 * With namespace:
6749 *
6750 * [NS 11] Attribute ::= QName Eq AttValue
6751 *
6752 * Also the case QName == xmlns:??? is handled independently as a namespace
6753 * definition.
6754 *
6755 * Returns the attribute name, and the value in *value.
6756 */
6757
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006758const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006759xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006760 const xmlChar *name;
6761 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006762
6763 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006764 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006765 name = xmlParseName(ctxt);
6766 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006768 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006769 return(NULL);
6770 }
6771
6772 /*
6773 * read the value
6774 */
6775 SKIP_BLANKS;
6776 if (RAW == '=') {
6777 NEXT;
6778 SKIP_BLANKS;
6779 val = xmlParseAttValue(ctxt);
6780 ctxt->instate = XML_PARSER_CONTENT;
6781 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006782 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006783 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006784 return(NULL);
6785 }
6786
6787 /*
6788 * Check that xml:lang conforms to the specification
6789 * No more registered as an error, just generate a warning now
6790 * since this was deprecated in XML second edition
6791 */
6792 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6793 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006794 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6795 "Malformed value for xml:lang : %s\n",
6796 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006797 }
6798 }
6799
6800 /*
6801 * Check that xml:space conforms to the specification
6802 */
6803 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6804 if (xmlStrEqual(val, BAD_CAST "default"))
6805 *(ctxt->space) = 0;
6806 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6807 *(ctxt->space) = 1;
6808 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006809 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006810"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006811 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006812 }
6813 }
6814
6815 *value = val;
6816 return(name);
6817}
6818
6819/**
6820 * xmlParseStartTag:
6821 * @ctxt: an XML parser context
6822 *
6823 * parse a start of tag either for rule element or
6824 * EmptyElement. In both case we don't parse the tag closing chars.
6825 *
6826 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6827 *
6828 * [ WFC: Unique Att Spec ]
6829 * No attribute name may appear more than once in the same start-tag or
6830 * empty-element tag.
6831 *
6832 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6833 *
6834 * [ WFC: Unique Att Spec ]
6835 * No attribute name may appear more than once in the same start-tag or
6836 * empty-element tag.
6837 *
6838 * With namespace:
6839 *
6840 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6841 *
6842 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6843 *
6844 * Returns the element name parsed
6845 */
6846
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006847const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006848xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006849 const xmlChar *name;
6850 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006851 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006852 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006853 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006854 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006855 int i;
6856
6857 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006858 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006859
6860 name = xmlParseName(ctxt);
6861 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006862 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006863 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006864 return(NULL);
6865 }
6866
6867 /*
6868 * Now parse the attributes, it ends up with the ending
6869 *
6870 * (S Attribute)* S?
6871 */
6872 SKIP_BLANKS;
6873 GROW;
6874
Daniel Veillard21a0f912001-02-25 19:54:14 +00006875 while ((RAW != '>') &&
6876 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006877 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006878 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006879 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006880
6881 attname = xmlParseAttribute(ctxt, &attvalue);
6882 if ((attname != NULL) && (attvalue != NULL)) {
6883 /*
6884 * [ WFC: Unique Att Spec ]
6885 * No attribute name may appear more than once in the same
6886 * start-tag or empty-element tag.
6887 */
6888 for (i = 0; i < nbatts;i += 2) {
6889 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006890 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006891 xmlFree(attvalue);
6892 goto failed;
6893 }
6894 }
Owen Taylor3473f882001-02-23 17:55:21 +00006895 /*
6896 * Add the pair to atts
6897 */
6898 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006899 maxatts = 22; /* allow for 10 attrs by default */
6900 atts = (const xmlChar **)
6901 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006902 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006903 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006904 if (attvalue != NULL)
6905 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006906 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006907 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006908 ctxt->atts = atts;
6909 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006910 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006911 const xmlChar **n;
6912
Owen Taylor3473f882001-02-23 17:55:21 +00006913 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006914 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006915 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006916 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006917 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006918 if (attvalue != NULL)
6919 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006920 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006921 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006922 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006923 ctxt->atts = atts;
6924 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006925 }
6926 atts[nbatts++] = attname;
6927 atts[nbatts++] = attvalue;
6928 atts[nbatts] = NULL;
6929 atts[nbatts + 1] = NULL;
6930 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006931 if (attvalue != NULL)
6932 xmlFree(attvalue);
6933 }
6934
6935failed:
6936
Daniel Veillard3772de32002-12-17 10:31:45 +00006937 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006938 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6939 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006940 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006943 }
6944 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006945 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6946 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006947 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6948 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006949 break;
6950 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006951 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 GROW;
6953 }
6954
6955 /*
6956 * SAX: Start of Element !
6957 */
6958 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006959 (!ctxt->disableSAX)) {
6960 if (nbatts > 0)
6961 ctxt->sax->startElement(ctxt->userData, name, atts);
6962 else
6963 ctxt->sax->startElement(ctxt->userData, name, NULL);
6964 }
Owen Taylor3473f882001-02-23 17:55:21 +00006965
6966 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006967 /* Free only the content strings */
6968 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006969 if (atts[i] != NULL)
6970 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006971 }
6972 return(name);
6973}
6974
6975/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006976 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006977 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006978 * @line: line of the start tag
6979 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006980 *
6981 * parse an end of tag
6982 *
6983 * [42] ETag ::= '</' Name S? '>'
6984 *
6985 * With namespace
6986 *
6987 * [NS 9] ETag ::= '</' QName S? '>'
6988 */
6989
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006990static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006991xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006992 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006993
6994 GROW;
6995 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006996 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006997 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006998 return;
6999 }
7000 SKIP(2);
7001
Daniel Veillard46de64e2002-05-29 08:21:33 +00007002 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007003
7004 /*
7005 * We should definitely be at the ending "S? '>'" part
7006 */
7007 GROW;
7008 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007009 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007010 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007011 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007012 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007013
7014 /*
7015 * [ WFC: Element Type Match ]
7016 * The Name in an element's end-tag must match the element type in the
7017 * start-tag.
7018 *
7019 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007020 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007021 if (name == NULL) name = BAD_CAST "unparseable";
7022 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007023 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007024 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007025 }
7026
7027 /*
7028 * SAX: End of Tag
7029 */
7030 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7031 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007032 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007033
Daniel Veillarde57ec792003-09-10 10:50:59 +00007034 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007036 return;
7037}
7038
7039/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007040 * xmlParseEndTag:
7041 * @ctxt: an XML parser context
7042 *
7043 * parse an end of tag
7044 *
7045 * [42] ETag ::= '</' Name S? '>'
7046 *
7047 * With namespace
7048 *
7049 * [NS 9] ETag ::= '</' QName S? '>'
7050 */
7051
7052void
7053xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007054 xmlParseEndTag1(ctxt, 0);
7055}
Daniel Veillard81273902003-09-30 00:43:48 +00007056#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007057
7058/************************************************************************
7059 * *
7060 * SAX 2 specific operations *
7061 * *
7062 ************************************************************************/
7063
7064static const xmlChar *
7065xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7066 int len = 0, l;
7067 int c;
7068 int count = 0;
7069
7070 /*
7071 * Handler for more complex cases
7072 */
7073 GROW;
7074 c = CUR_CHAR(l);
7075 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007076 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007077 return(NULL);
7078 }
7079
7080 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007081 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007082 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007083 (IS_COMBINING(c)) ||
7084 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007085 if (count++ > 100) {
7086 count = 0;
7087 GROW;
7088 }
7089 len += l;
7090 NEXTL(l);
7091 c = CUR_CHAR(l);
7092 }
7093 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7094}
7095
7096/*
7097 * xmlGetNamespace:
7098 * @ctxt: an XML parser context
7099 * @prefix: the prefix to lookup
7100 *
7101 * Lookup the namespace name for the @prefix (which ca be NULL)
7102 * The prefix must come from the @ctxt->dict dictionnary
7103 *
7104 * Returns the namespace name or NULL if not bound
7105 */
7106static const xmlChar *
7107xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7108 int i;
7109
Daniel Veillarde57ec792003-09-10 10:50:59 +00007110 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007111 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007112 if (ctxt->nsTab[i] == prefix) {
7113 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7114 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007115 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007116 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 return(NULL);
7118}
7119
7120/**
7121 * xmlParseNCName:
7122 * @ctxt: an XML parser context
7123 *
7124 * parse an XML name.
7125 *
7126 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7127 * CombiningChar | Extender
7128 *
7129 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7130 *
7131 * Returns the Name parsed or NULL
7132 */
7133
7134static const xmlChar *
7135xmlParseNCName(xmlParserCtxtPtr ctxt) {
7136 const xmlChar *in;
7137 const xmlChar *ret;
7138 int count = 0;
7139
7140 /*
7141 * Accelerator for simple ASCII names
7142 */
7143 in = ctxt->input->cur;
7144 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7145 ((*in >= 0x41) && (*in <= 0x5A)) ||
7146 (*in == '_')) {
7147 in++;
7148 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7149 ((*in >= 0x41) && (*in <= 0x5A)) ||
7150 ((*in >= 0x30) && (*in <= 0x39)) ||
7151 (*in == '_') || (*in == '-') ||
7152 (*in == '.'))
7153 in++;
7154 if ((*in > 0) && (*in < 0x80)) {
7155 count = in - ctxt->input->cur;
7156 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7157 ctxt->input->cur = in;
7158 ctxt->nbChars += count;
7159 ctxt->input->col += count;
7160 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007161 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007162 }
7163 return(ret);
7164 }
7165 }
7166 return(xmlParseNCNameComplex(ctxt));
7167}
7168
7169/**
7170 * xmlParseQName:
7171 * @ctxt: an XML parser context
7172 * @prefix: pointer to store the prefix part
7173 *
7174 * parse an XML Namespace QName
7175 *
7176 * [6] QName ::= (Prefix ':')? LocalPart
7177 * [7] Prefix ::= NCName
7178 * [8] LocalPart ::= NCName
7179 *
7180 * Returns the Name parsed or NULL
7181 */
7182
7183static const xmlChar *
7184xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7185 const xmlChar *l, *p;
7186
7187 GROW;
7188
7189 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007190 if (l == NULL) {
7191 if (CUR == ':') {
7192 l = xmlParseName(ctxt);
7193 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007194 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7195 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007196 *prefix = NULL;
7197 return(l);
7198 }
7199 }
7200 return(NULL);
7201 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007202 if (CUR == ':') {
7203 NEXT;
7204 p = l;
7205 l = xmlParseNCName(ctxt);
7206 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007207 xmlChar *tmp;
7208
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007209 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7210 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007211 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7212 p = xmlDictLookup(ctxt->dict, tmp, -1);
7213 if (tmp != NULL) xmlFree(tmp);
7214 *prefix = NULL;
7215 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007216 }
7217 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007218 xmlChar *tmp;
7219
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007220 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7221 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007222 NEXT;
7223 tmp = (xmlChar *) xmlParseName(ctxt);
7224 if (tmp != NULL) {
7225 tmp = xmlBuildQName(tmp, l, NULL, 0);
7226 l = xmlDictLookup(ctxt->dict, tmp, -1);
7227 if (tmp != NULL) xmlFree(tmp);
7228 *prefix = p;
7229 return(l);
7230 }
7231 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7232 l = xmlDictLookup(ctxt->dict, tmp, -1);
7233 if (tmp != NULL) xmlFree(tmp);
7234 *prefix = p;
7235 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007236 }
7237 *prefix = p;
7238 } else
7239 *prefix = NULL;
7240 return(l);
7241}
7242
7243/**
7244 * xmlParseQNameAndCompare:
7245 * @ctxt: an XML parser context
7246 * @name: the localname
7247 * @prefix: the prefix, if any.
7248 *
7249 * parse an XML name and compares for match
7250 * (specialized for endtag parsing)
7251 *
7252 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7253 * and the name for mismatch
7254 */
7255
7256static const xmlChar *
7257xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7258 xmlChar const *prefix) {
7259 const xmlChar *cmp = name;
7260 const xmlChar *in;
7261 const xmlChar *ret;
7262 const xmlChar *prefix2;
7263
7264 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7265
7266 GROW;
7267 in = ctxt->input->cur;
7268
7269 cmp = prefix;
7270 while (*in != 0 && *in == *cmp) {
7271 ++in;
7272 ++cmp;
7273 }
7274 if ((*cmp == 0) && (*in == ':')) {
7275 in++;
7276 cmp = name;
7277 while (*in != 0 && *in == *cmp) {
7278 ++in;
7279 ++cmp;
7280 }
William M. Brack76e95df2003-10-18 16:20:14 +00007281 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007282 /* success */
7283 ctxt->input->cur = in;
7284 return((const xmlChar*) 1);
7285 }
7286 }
7287 /*
7288 * all strings coms from the dictionary, equality can be done directly
7289 */
7290 ret = xmlParseQName (ctxt, &prefix2);
7291 if ((ret == name) && (prefix == prefix2))
7292 return((const xmlChar*) 1);
7293 return ret;
7294}
7295
7296/**
7297 * xmlParseAttValueInternal:
7298 * @ctxt: an XML parser context
7299 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007300 * @alloc: whether the attribute was reallocated as a new string
7301 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007302 *
7303 * parse a value for an attribute.
7304 * NOTE: if no normalization is needed, the routine will return pointers
7305 * directly from the data buffer.
7306 *
7307 * 3.3.3 Attribute-Value Normalization:
7308 * Before the value of an attribute is passed to the application or
7309 * checked for validity, the XML processor must normalize it as follows:
7310 * - a character reference is processed by appending the referenced
7311 * character to the attribute value
7312 * - an entity reference is processed by recursively processing the
7313 * replacement text of the entity
7314 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7315 * appending #x20 to the normalized value, except that only a single
7316 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7317 * parsed entity or the literal entity value of an internal parsed entity
7318 * - other characters are processed by appending them to the normalized value
7319 * If the declared value is not CDATA, then the XML processor must further
7320 * process the normalized attribute value by discarding any leading and
7321 * trailing space (#x20) characters, and by replacing sequences of space
7322 * (#x20) characters by a single space (#x20) character.
7323 * All attributes for which no declaration has been read should be treated
7324 * by a non-validating parser as if declared CDATA.
7325 *
7326 * Returns the AttValue parsed or NULL. The value has to be freed by the
7327 * caller if it was copied, this can be detected by val[*len] == 0.
7328 */
7329
7330static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007331xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7332 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007333{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007335 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007336 xmlChar *ret = NULL;
7337
7338 GROW;
7339 in = (xmlChar *) CUR_PTR;
7340 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007341 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007342 return (NULL);
7343 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007344 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007345
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007346 /*
7347 * try to handle in this routine the most common case where no
7348 * allocation of a new string is required and where content is
7349 * pure ASCII.
7350 */
7351 limit = *in++;
7352 end = ctxt->input->end;
7353 start = in;
7354 if (in >= end) {
7355 const xmlChar *oldbase = ctxt->input->base;
7356 GROW;
7357 if (oldbase != ctxt->input->base) {
7358 long delta = ctxt->input->base - oldbase;
7359 start = start + delta;
7360 in = in + delta;
7361 }
7362 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007363 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007364 if (normalize) {
7365 /*
7366 * Skip any leading spaces
7367 */
7368 while ((in < end) && (*in != limit) &&
7369 ((*in == 0x20) || (*in == 0x9) ||
7370 (*in == 0xA) || (*in == 0xD))) {
7371 in++;
7372 start = in;
7373 if (in >= end) {
7374 const xmlChar *oldbase = ctxt->input->base;
7375 GROW;
7376 if (oldbase != ctxt->input->base) {
7377 long delta = ctxt->input->base - oldbase;
7378 start = start + delta;
7379 in = in + delta;
7380 }
7381 end = ctxt->input->end;
7382 }
7383 }
7384 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7385 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7386 if ((*in++ == 0x20) && (*in == 0x20)) break;
7387 if (in >= end) {
7388 const xmlChar *oldbase = ctxt->input->base;
7389 GROW;
7390 if (oldbase != ctxt->input->base) {
7391 long delta = ctxt->input->base - oldbase;
7392 start = start + delta;
7393 in = in + delta;
7394 }
7395 end = ctxt->input->end;
7396 }
7397 }
7398 last = in;
7399 /*
7400 * skip the trailing blanks
7401 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007402 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007403 while ((in < end) && (*in != limit) &&
7404 ((*in == 0x20) || (*in == 0x9) ||
7405 (*in == 0xA) || (*in == 0xD))) {
7406 in++;
7407 if (in >= end) {
7408 const xmlChar *oldbase = ctxt->input->base;
7409 GROW;
7410 if (oldbase != ctxt->input->base) {
7411 long delta = ctxt->input->base - oldbase;
7412 start = start + delta;
7413 in = in + delta;
7414 last = last + delta;
7415 }
7416 end = ctxt->input->end;
7417 }
7418 }
7419 if (*in != limit) goto need_complex;
7420 } else {
7421 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7422 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7423 in++;
7424 if (in >= end) {
7425 const xmlChar *oldbase = ctxt->input->base;
7426 GROW;
7427 if (oldbase != ctxt->input->base) {
7428 long delta = ctxt->input->base - oldbase;
7429 start = start + delta;
7430 in = in + delta;
7431 }
7432 end = ctxt->input->end;
7433 }
7434 }
7435 last = in;
7436 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007438 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007440 *len = last - start;
7441 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007443 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007444 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007445 }
7446 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007447 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007448 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007449need_complex:
7450 if (alloc) *alloc = 1;
7451 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007452}
7453
7454/**
7455 * xmlParseAttribute2:
7456 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007457 * @pref: the element prefix
7458 * @elem: the element name
7459 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007460 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007461 * @len: an int * to save the length of the attribute
7462 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007463 *
7464 * parse an attribute in the new SAX2 framework.
7465 *
7466 * Returns the attribute name, and the value in *value, .
7467 */
7468
7469static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007470xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7471 const xmlChar *pref, const xmlChar *elem,
7472 const xmlChar **prefix, xmlChar **value,
7473 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007474 const xmlChar *name;
7475 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007476 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477
7478 *value = NULL;
7479 GROW;
7480 name = xmlParseQName(ctxt, prefix);
7481 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007482 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7483 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484 return(NULL);
7485 }
7486
7487 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007488 * get the type if needed
7489 */
7490 if (ctxt->attsSpecial != NULL) {
7491 int type;
7492
7493 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7494 pref, elem, *prefix, name);
7495 if (type != 0) normalize = 1;
7496 }
7497
7498 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007499 * read the value
7500 */
7501 SKIP_BLANKS;
7502 if (RAW == '=') {
7503 NEXT;
7504 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007505 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 ctxt->instate = XML_PARSER_CONTENT;
7507 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007508 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007510 return(NULL);
7511 }
7512
7513 /*
7514 * Check that xml:lang conforms to the specification
7515 * No more registered as an error, just generate a warning now
7516 * since this was deprecated in XML second edition
7517 */
7518 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7519 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007520 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7521 "Malformed value for xml:lang : %s\n",
7522 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523 }
7524 }
7525
7526 /*
7527 * Check that xml:space conforms to the specification
7528 */
7529 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7530 if (xmlStrEqual(val, BAD_CAST "default"))
7531 *(ctxt->space) = 0;
7532 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7533 *(ctxt->space) = 1;
7534 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007535 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007536"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7537 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007538 }
7539 }
7540
7541 *value = val;
7542 return(name);
7543}
7544
7545/**
7546 * xmlParseStartTag2:
7547 * @ctxt: an XML parser context
7548 *
7549 * parse a start of tag either for rule element or
7550 * EmptyElement. In both case we don't parse the tag closing chars.
7551 * This routine is called when running SAX2 parsing
7552 *
7553 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7554 *
7555 * [ WFC: Unique Att Spec ]
7556 * No attribute name may appear more than once in the same start-tag or
7557 * empty-element tag.
7558 *
7559 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7560 *
7561 * [ WFC: Unique Att Spec ]
7562 * No attribute name may appear more than once in the same start-tag or
7563 * empty-element tag.
7564 *
7565 * With namespace:
7566 *
7567 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7568 *
7569 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7570 *
7571 * Returns the element name parsed
7572 */
7573
7574static const xmlChar *
7575xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7576 const xmlChar **URI) {
7577 const xmlChar *localname;
7578 const xmlChar *prefix;
7579 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007580 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581 const xmlChar *nsname;
7582 xmlChar *attvalue;
7583 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007584 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007585 int nratts, nbatts, nbdef;
7586 int i, j, nbNs, attval;
7587 const xmlChar *base;
7588 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007589
7590 if (RAW != '<') return(NULL);
7591 NEXT1;
7592
7593 /*
7594 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7595 * point since the attribute values may be stored as pointers to
7596 * the buffer and calling SHRINK would destroy them !
7597 * The Shrinking is only possible once the full set of attribute
7598 * callbacks have been done.
7599 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007600reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007601 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007602 base = ctxt->input->base;
7603 cur = ctxt->input->cur - ctxt->input->base;
7604 nbatts = 0;
7605 nratts = 0;
7606 nbdef = 0;
7607 nbNs = 0;
7608 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609
7610 localname = xmlParseQName(ctxt, &prefix);
7611 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007612 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7613 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007614 return(NULL);
7615 }
7616
7617 /*
7618 * Now parse the attributes, it ends up with the ending
7619 *
7620 * (S Attribute)* S?
7621 */
7622 SKIP_BLANKS;
7623 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625
7626 while ((RAW != '>') &&
7627 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007628 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007629 const xmlChar *q = CUR_PTR;
7630 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007631 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007632
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007633 attname = xmlParseAttribute2(ctxt, prefix, localname,
7634 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007635 if ((attname != NULL) && (attvalue != NULL)) {
7636 if (len < 0) len = xmlStrlen(attvalue);
7637 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007638 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7639 xmlURIPtr uri;
7640
7641 if (*URL != 0) {
7642 uri = xmlParseURI((const char *) URL);
7643 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007644 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7645 "xmlns: %s not a valid URI\n",
7646 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007647 } else {
7648 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007649 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7650 "xmlns: URI %s is not absolute\n",
7651 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007652 }
7653 xmlFreeURI(uri);
7654 }
7655 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007657 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007658 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007659 for (j = 1;j <= nbNs;j++)
7660 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7661 break;
7662 if (j <= nbNs)
7663 xmlErrAttributeDup(ctxt, NULL, attname);
7664 else
7665 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007666 if (alloc != 0) xmlFree(attvalue);
7667 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668 continue;
7669 }
7670 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7672 xmlURIPtr uri;
7673
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007674 if (attname == ctxt->str_xml) {
7675 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007676 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7677 "xml namespace prefix mapped to wrong URI\n",
7678 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007679 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007680 /*
7681 * Do not keep a namespace definition node
7682 */
7683 if (alloc != 0) xmlFree(attvalue);
7684 SKIP_BLANKS;
7685 continue;
7686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 uri = xmlParseURI((const char *) URL);
7688 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007689 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7690 "xmlns:%s: '%s' is not a valid URI\n",
7691 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007692 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007693 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007694 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7695 "xmlns:%s: URI %s is not absolute\n",
7696 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007697 }
7698 xmlFreeURI(uri);
7699 }
7700
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007702 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007703 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007704 for (j = 1;j <= nbNs;j++)
7705 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7706 break;
7707 if (j <= nbNs)
7708 xmlErrAttributeDup(ctxt, aprefix, attname);
7709 else
7710 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007711 if (alloc != 0) xmlFree(attvalue);
7712 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007713 continue;
7714 }
7715
7716 /*
7717 * Add the pair to atts
7718 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007719 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7720 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 if (attvalue[len] == 0)
7722 xmlFree(attvalue);
7723 goto failed;
7724 }
7725 maxatts = ctxt->maxatts;
7726 atts = ctxt->atts;
7727 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007728 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729 atts[nbatts++] = attname;
7730 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007731 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 atts[nbatts++] = attvalue;
7733 attvalue += len;
7734 atts[nbatts++] = attvalue;
7735 /*
7736 * tag if some deallocation is needed
7737 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007738 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 } else {
7740 if ((attvalue != NULL) && (attvalue[len] == 0))
7741 xmlFree(attvalue);
7742 }
7743
7744failed:
7745
7746 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007747 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7749 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007750 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007751 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7752 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 }
7754 SKIP_BLANKS;
7755 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7756 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007757 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007758 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 break;
7760 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007761 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007762 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763 }
7764
Daniel Veillard0fb18932003-09-07 09:14:37 +00007765 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007767 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007768 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007769 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7770 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007771 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007772 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007773 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007775 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007776 /*
7777 * [ WFC: Unique Att Spec ]
7778 * No attribute name may appear more than once in the same
7779 * start-tag or empty-element tag.
7780 * As extended by the Namespace in XML REC.
7781 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007782 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007783 if (atts[i] == atts[j]) {
7784 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007785 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007786 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007788 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007789 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007790 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007791 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007792 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 }
7794 }
7795 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007796 }
7797
7798 /*
7799 * The attributes defaulting
7800 */
7801 if (ctxt->attsDefault != NULL) {
7802 xmlDefAttrsPtr defaults;
7803
7804 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7805 if (defaults != NULL) {
7806 for (i = 0;i < defaults->nbAttrs;i++) {
7807 attname = defaults->values[4 * i];
7808 aprefix = defaults->values[4 * i + 1];
7809
7810 /*
7811 * special work for namespaces defaulted defs
7812 */
7813 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7814 /*
7815 * check that it's not a defined namespace
7816 */
7817 for (j = 1;j <= nbNs;j++)
7818 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7819 break;
7820 if (j <= nbNs) continue;
7821
7822 nsname = xmlGetNamespace(ctxt, NULL);
7823 if (nsname != defaults->values[4 * i + 2]) {
7824 if (nsPush(ctxt, NULL,
7825 defaults->values[4 * i + 2]) > 0)
7826 nbNs++;
7827 }
7828 } else if (aprefix == ctxt->str_xmlns) {
7829 /*
7830 * check that it's not a defined namespace
7831 */
7832 for (j = 1;j <= nbNs;j++)
7833 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7834 break;
7835 if (j <= nbNs) continue;
7836
7837 nsname = xmlGetNamespace(ctxt, attname);
7838 if (nsname != defaults->values[2]) {
7839 if (nsPush(ctxt, attname,
7840 defaults->values[4 * i + 2]) > 0)
7841 nbNs++;
7842 }
7843 } else {
7844 /*
7845 * check that it's not a defined attribute
7846 */
7847 for (j = 0;j < nbatts;j+=5) {
7848 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7849 break;
7850 }
7851 if (j < nbatts) continue;
7852
7853 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7854 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007855 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007856 }
7857 maxatts = ctxt->maxatts;
7858 atts = ctxt->atts;
7859 }
7860 atts[nbatts++] = attname;
7861 atts[nbatts++] = aprefix;
7862 if (aprefix == NULL)
7863 atts[nbatts++] = NULL;
7864 else
7865 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7866 atts[nbatts++] = defaults->values[4 * i + 2];
7867 atts[nbatts++] = defaults->values[4 * i + 3];
7868 nbdef++;
7869 }
7870 }
7871 }
7872 }
7873
7874 nsname = xmlGetNamespace(ctxt, prefix);
7875 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007876 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7877 "Namespace prefix %s on %s is not defined\n",
7878 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007879 }
7880 *pref = prefix;
7881 *URI = nsname;
7882
7883 /*
7884 * SAX: Start of Element !
7885 */
7886 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7887 (!ctxt->disableSAX)) {
7888 if (nbNs > 0)
7889 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7890 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7891 nbatts / 5, nbdef, atts);
7892 else
7893 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7894 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7895 }
7896
7897 /*
7898 * Free up attribute allocated strings if needed
7899 */
7900 if (attval != 0) {
7901 for (i = 3,j = 0; j < nratts;i += 5,j++)
7902 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7903 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904 }
7905
7906 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007907
7908base_changed:
7909 /*
7910 * the attribute strings are valid iif the base didn't changed
7911 */
7912 if (attval != 0) {
7913 for (i = 3,j = 0; j < nratts;i += 5,j++)
7914 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7915 xmlFree((xmlChar *) atts[i]);
7916 }
7917 ctxt->input->cur = ctxt->input->base + cur;
7918 if (ctxt->wellFormed == 1) {
7919 goto reparse;
7920 }
7921 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007922}
7923
7924/**
7925 * xmlParseEndTag2:
7926 * @ctxt: an XML parser context
7927 * @line: line of the start tag
7928 * @nsNr: number of namespaces on the start tag
7929 *
7930 * parse an end of tag
7931 *
7932 * [42] ETag ::= '</' Name S? '>'
7933 *
7934 * With namespace
7935 *
7936 * [NS 9] ETag ::= '</' QName S? '>'
7937 */
7938
7939static void
7940xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7941 const xmlChar *URI, int line, int nsNr) {
7942 const xmlChar *name;
7943
7944 GROW;
7945 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007946 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007947 return;
7948 }
7949 SKIP(2);
7950
7951 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7952
7953 /*
7954 * We should definitely be at the ending "S? '>'" part
7955 */
7956 GROW;
7957 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007958 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007959 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007960 } else
7961 NEXT1;
7962
7963 /*
7964 * [ WFC: Element Type Match ]
7965 * The Name in an element's end-tag must match the element type in the
7966 * start-tag.
7967 *
7968 */
7969 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007970 if (name == NULL) name = BAD_CAST "unparseable";
7971 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007973 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974 }
7975
7976 /*
7977 * SAX: End of Tag
7978 */
7979 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7980 (!ctxt->disableSAX))
7981 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7982
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 spacePop(ctxt);
7984 if (nsNr != 0)
7985 nsPop(ctxt, nsNr);
7986 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007987}
7988
7989/**
Owen Taylor3473f882001-02-23 17:55:21 +00007990 * xmlParseCDSect:
7991 * @ctxt: an XML parser context
7992 *
7993 * Parse escaped pure raw content.
7994 *
7995 * [18] CDSect ::= CDStart CData CDEnd
7996 *
7997 * [19] CDStart ::= '<![CDATA['
7998 *
7999 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8000 *
8001 * [21] CDEnd ::= ']]>'
8002 */
8003void
8004xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8005 xmlChar *buf = NULL;
8006 int len = 0;
8007 int size = XML_PARSER_BUFFER_SIZE;
8008 int r, rl;
8009 int s, sl;
8010 int cur, l;
8011 int count = 0;
8012
Daniel Veillard8f597c32003-10-06 08:19:27 +00008013 /* Check 2.6.0 was NXT(0) not RAW */
8014 if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008015 SKIP(9);
8016 } else
8017 return;
8018
8019 ctxt->instate = XML_PARSER_CDATA_SECTION;
8020 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008021 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008022 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008023 ctxt->instate = XML_PARSER_CONTENT;
8024 return;
8025 }
8026 NEXTL(rl);
8027 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008028 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008029 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008030 ctxt->instate = XML_PARSER_CONTENT;
8031 return;
8032 }
8033 NEXTL(sl);
8034 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008035 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008036 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008038 return;
8039 }
William M. Brack871611b2003-10-18 04:53:14 +00008040 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008041 ((r != ']') || (s != ']') || (cur != '>'))) {
8042 if (len + 5 >= size) {
8043 size *= 2;
8044 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8045 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008046 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008047 return;
8048 }
8049 }
8050 COPY_BUF(rl,buf,len,r);
8051 r = s;
8052 rl = sl;
8053 s = cur;
8054 sl = l;
8055 count++;
8056 if (count > 50) {
8057 GROW;
8058 count = 0;
8059 }
8060 NEXTL(l);
8061 cur = CUR_CHAR(l);
8062 }
8063 buf[len] = 0;
8064 ctxt->instate = XML_PARSER_CONTENT;
8065 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008066 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008067 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008068 xmlFree(buf);
8069 return;
8070 }
8071 NEXTL(l);
8072
8073 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008074 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008075 */
8076 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8077 if (ctxt->sax->cdataBlock != NULL)
8078 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008079 else if (ctxt->sax->characters != NULL)
8080 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008081 }
8082 xmlFree(buf);
8083}
8084
8085/**
8086 * xmlParseContent:
8087 * @ctxt: an XML parser context
8088 *
8089 * Parse a content:
8090 *
8091 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8092 */
8093
8094void
8095xmlParseContent(xmlParserCtxtPtr ctxt) {
8096 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008097 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008098 ((RAW != '<') || (NXT(1) != '/'))) {
8099 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008100 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008101 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008102
8103 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008104 * First case : a Processing Instruction.
8105 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008106 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008107 xmlParsePI(ctxt);
8108 }
8109
8110 /*
8111 * Second case : a CDSection
8112 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008113 /* 2.6.0 test was *cur not RAW */
8114 else if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008115 xmlParseCDSect(ctxt);
8116 }
8117
8118 /*
8119 * Third case : a comment
8120 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008121 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008122 (NXT(2) == '-') && (NXT(3) == '-')) {
8123 xmlParseComment(ctxt);
8124 ctxt->instate = XML_PARSER_CONTENT;
8125 }
8126
8127 /*
8128 * Fourth case : a sub-element.
8129 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008130 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008131 xmlParseElement(ctxt);
8132 }
8133
8134 /*
8135 * Fifth case : a reference. If if has not been resolved,
8136 * parsing returns it's Name, create the node
8137 */
8138
Daniel Veillard21a0f912001-02-25 19:54:14 +00008139 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008140 xmlParseReference(ctxt);
8141 }
8142
8143 /*
8144 * Last case, text. Note that References are handled directly.
8145 */
8146 else {
8147 xmlParseCharData(ctxt, 0);
8148 }
8149
8150 GROW;
8151 /*
8152 * Pop-up of finished entities.
8153 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008154 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008155 xmlPopInput(ctxt);
8156 SHRINK;
8157
Daniel Veillardfdc91562002-07-01 21:52:03 +00008158 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008159 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8160 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008161 ctxt->instate = XML_PARSER_EOF;
8162 break;
8163 }
8164 }
8165}
8166
8167/**
8168 * xmlParseElement:
8169 * @ctxt: an XML parser context
8170 *
8171 * parse an XML element, this is highly recursive
8172 *
8173 * [39] element ::= EmptyElemTag | STag content ETag
8174 *
8175 * [ WFC: Element Type Match ]
8176 * The Name in an element's end-tag must match the element type in the
8177 * start-tag.
8178 *
Owen Taylor3473f882001-02-23 17:55:21 +00008179 */
8180
8181void
8182xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008183 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 const xmlChar *prefix;
8185 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008186 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008187 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008188 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008189 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008190
8191 /* Capture start position */
8192 if (ctxt->record_info) {
8193 node_info.begin_pos = ctxt->input->consumed +
8194 (CUR_PTR - ctxt->input->base);
8195 node_info.begin_line = ctxt->input->line;
8196 }
8197
8198 if (ctxt->spaceNr == 0)
8199 spacePush(ctxt, -1);
8200 else
8201 spacePush(ctxt, *ctxt->space);
8202
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008203 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008204#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008206#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008208#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 else
8210 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008211#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008212 if (name == NULL) {
8213 spacePop(ctxt);
8214 return;
8215 }
8216 namePush(ctxt, name);
8217 ret = ctxt->node;
8218
Daniel Veillard4432df22003-09-28 18:58:27 +00008219#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008220 /*
8221 * [ VC: Root Element Type ]
8222 * The Name in the document type declaration must match the element
8223 * type of the root element.
8224 */
8225 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8226 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8227 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008228#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008229
8230 /*
8231 * Check for an Empty Element.
8232 */
8233 if ((RAW == '/') && (NXT(1) == '>')) {
8234 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008235 if (ctxt->sax2) {
8236 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8237 (!ctxt->disableSAX))
8238 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008239#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008240 } else {
8241 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8242 (!ctxt->disableSAX))
8243 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008244#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008245 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 namePop(ctxt);
8247 spacePop(ctxt);
8248 if (nsNr != ctxt->nsNr)
8249 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 if ( ret != NULL && ctxt->record_info ) {
8251 node_info.end_pos = ctxt->input->consumed +
8252 (CUR_PTR - ctxt->input->base);
8253 node_info.end_line = ctxt->input->line;
8254 node_info.node = ret;
8255 xmlParserAddNodeInfo(ctxt, &node_info);
8256 }
8257 return;
8258 }
8259 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008260 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008261 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008262 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8263 "Couldn't find end of Start Tag %s line %d\n",
8264 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008265
8266 /*
8267 * end of parsing of this node.
8268 */
8269 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008271 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 if (nsNr != ctxt->nsNr)
8273 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008274
8275 /*
8276 * Capture end position and add node
8277 */
8278 if ( ret != NULL && ctxt->record_info ) {
8279 node_info.end_pos = ctxt->input->consumed +
8280 (CUR_PTR - ctxt->input->base);
8281 node_info.end_line = ctxt->input->line;
8282 node_info.node = ret;
8283 xmlParserAddNodeInfo(ctxt, &node_info);
8284 }
8285 return;
8286 }
8287
8288 /*
8289 * Parse the content of the element:
8290 */
8291 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008292 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008293 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008294 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008295 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008296
8297 /*
8298 * end of parsing of this node.
8299 */
8300 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008301 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008302 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008303 if (nsNr != ctxt->nsNr)
8304 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008305 return;
8306 }
8307
8308 /*
8309 * parse the end of tag: '</' should be here.
8310 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008311 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008313 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008314 }
8315#ifdef LIBXML_SAX1_ENABLED
8316 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008317 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008318#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008319
8320 /*
8321 * Capture end position and add node
8322 */
8323 if ( ret != NULL && ctxt->record_info ) {
8324 node_info.end_pos = ctxt->input->consumed +
8325 (CUR_PTR - ctxt->input->base);
8326 node_info.end_line = ctxt->input->line;
8327 node_info.node = ret;
8328 xmlParserAddNodeInfo(ctxt, &node_info);
8329 }
8330}
8331
8332/**
8333 * xmlParseVersionNum:
8334 * @ctxt: an XML parser context
8335 *
8336 * parse the XML version value.
8337 *
8338 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8339 *
8340 * Returns the string giving the XML version number, or NULL
8341 */
8342xmlChar *
8343xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8344 xmlChar *buf = NULL;
8345 int len = 0;
8346 int size = 10;
8347 xmlChar cur;
8348
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008349 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008350 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008351 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008352 return(NULL);
8353 }
8354 cur = CUR;
8355 while (((cur >= 'a') && (cur <= 'z')) ||
8356 ((cur >= 'A') && (cur <= 'Z')) ||
8357 ((cur >= '0') && (cur <= '9')) ||
8358 (cur == '_') || (cur == '.') ||
8359 (cur == ':') || (cur == '-')) {
8360 if (len + 1 >= size) {
8361 size *= 2;
8362 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8363 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008364 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008365 return(NULL);
8366 }
8367 }
8368 buf[len++] = cur;
8369 NEXT;
8370 cur=CUR;
8371 }
8372 buf[len] = 0;
8373 return(buf);
8374}
8375
8376/**
8377 * xmlParseVersionInfo:
8378 * @ctxt: an XML parser context
8379 *
8380 * parse the XML version.
8381 *
8382 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8383 *
8384 * [25] Eq ::= S? '=' S?
8385 *
8386 * Returns the version string, e.g. "1.0"
8387 */
8388
8389xmlChar *
8390xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8391 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008392
Daniel Veillard8f597c32003-10-06 08:19:27 +00008393 if (memcmp(CUR_PTR, "version", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008394 SKIP(7);
8395 SKIP_BLANKS;
8396 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008397 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008398 return(NULL);
8399 }
8400 NEXT;
8401 SKIP_BLANKS;
8402 if (RAW == '"') {
8403 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008404 version = xmlParseVersionNum(ctxt);
8405 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008406 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008407 } else
8408 NEXT;
8409 } else if (RAW == '\''){
8410 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008411 version = xmlParseVersionNum(ctxt);
8412 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008413 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008414 } else
8415 NEXT;
8416 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008417 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008418 }
8419 }
8420 return(version);
8421}
8422
8423/**
8424 * xmlParseEncName:
8425 * @ctxt: an XML parser context
8426 *
8427 * parse the XML encoding name
8428 *
8429 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8430 *
8431 * Returns the encoding name value or NULL
8432 */
8433xmlChar *
8434xmlParseEncName(xmlParserCtxtPtr ctxt) {
8435 xmlChar *buf = NULL;
8436 int len = 0;
8437 int size = 10;
8438 xmlChar cur;
8439
8440 cur = CUR;
8441 if (((cur >= 'a') && (cur <= 'z')) ||
8442 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008443 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008444 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008445 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008446 return(NULL);
8447 }
8448
8449 buf[len++] = cur;
8450 NEXT;
8451 cur = CUR;
8452 while (((cur >= 'a') && (cur <= 'z')) ||
8453 ((cur >= 'A') && (cur <= 'Z')) ||
8454 ((cur >= '0') && (cur <= '9')) ||
8455 (cur == '.') || (cur == '_') ||
8456 (cur == '-')) {
8457 if (len + 1 >= size) {
8458 size *= 2;
8459 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8460 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008461 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008462 return(NULL);
8463 }
8464 }
8465 buf[len++] = cur;
8466 NEXT;
8467 cur = CUR;
8468 if (cur == 0) {
8469 SHRINK;
8470 GROW;
8471 cur = CUR;
8472 }
8473 }
8474 buf[len] = 0;
8475 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008476 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008477 }
8478 return(buf);
8479}
8480
8481/**
8482 * xmlParseEncodingDecl:
8483 * @ctxt: an XML parser context
8484 *
8485 * parse the XML encoding declaration
8486 *
8487 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8488 *
8489 * this setups the conversion filters.
8490 *
8491 * Returns the encoding value or NULL
8492 */
8493
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008494const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008495xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8496 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008497
8498 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008499 if (memcmp(CUR_PTR, "encoding", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008500 SKIP(8);
8501 SKIP_BLANKS;
8502 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008503 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008504 return(NULL);
8505 }
8506 NEXT;
8507 SKIP_BLANKS;
8508 if (RAW == '"') {
8509 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 encoding = xmlParseEncName(ctxt);
8511 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008512 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008513 } else
8514 NEXT;
8515 } else if (RAW == '\''){
8516 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008517 encoding = xmlParseEncName(ctxt);
8518 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008519 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008520 } else
8521 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008522 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008524 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008525 /*
8526 * UTF-16 encoding stwich has already taken place at this stage,
8527 * more over the little-endian/big-endian selection is already done
8528 */
8529 if ((encoding != NULL) &&
8530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008532 if (ctxt->encoding != NULL)
8533 xmlFree((xmlChar *) ctxt->encoding);
8534 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008535 }
8536 /*
8537 * UTF-8 encoding is handled natively
8538 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008539 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008540 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8541 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008542 if (ctxt->encoding != NULL)
8543 xmlFree((xmlChar *) ctxt->encoding);
8544 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008545 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008546 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008547 xmlCharEncodingHandlerPtr handler;
8548
8549 if (ctxt->input->encoding != NULL)
8550 xmlFree((xmlChar *) ctxt->input->encoding);
8551 ctxt->input->encoding = encoding;
8552
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008553 handler = xmlFindCharEncodingHandler((const char *) encoding);
8554 if (handler != NULL) {
8555 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008556 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008557 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008558 "Unsupported encoding %s\n", encoding);
8559 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008560 }
8561 }
8562 }
8563 return(encoding);
8564}
8565
8566/**
8567 * xmlParseSDDecl:
8568 * @ctxt: an XML parser context
8569 *
8570 * parse the XML standalone declaration
8571 *
8572 * [32] SDDecl ::= S 'standalone' Eq
8573 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8574 *
8575 * [ VC: Standalone Document Declaration ]
8576 * TODO The standalone document declaration must have the value "no"
8577 * if any external markup declarations contain declarations of:
8578 * - attributes with default values, if elements to which these
8579 * attributes apply appear in the document without specifications
8580 * of values for these attributes, or
8581 * - entities (other than amp, lt, gt, apos, quot), if references
8582 * to those entities appear in the document, or
8583 * - attributes with values subject to normalization, where the
8584 * attribute appears in the document with a value which will change
8585 * as a result of normalization, or
8586 * - element types with element content, if white space occurs directly
8587 * within any instance of those types.
8588 *
8589 * Returns 1 if standalone, 0 otherwise
8590 */
8591
8592int
8593xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8594 int standalone = -1;
8595
8596 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008597 if (memcmp(CUR_PTR, "standalone", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008598 SKIP(10);
8599 SKIP_BLANKS;
8600 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008601 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008602 return(standalone);
8603 }
8604 NEXT;
8605 SKIP_BLANKS;
8606 if (RAW == '\''){
8607 NEXT;
8608 if ((RAW == 'n') && (NXT(1) == 'o')) {
8609 standalone = 0;
8610 SKIP(2);
8611 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8612 (NXT(2) == 's')) {
8613 standalone = 1;
8614 SKIP(3);
8615 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008616 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008617 }
8618 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008619 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008620 } else
8621 NEXT;
8622 } else if (RAW == '"'){
8623 NEXT;
8624 if ((RAW == 'n') && (NXT(1) == 'o')) {
8625 standalone = 0;
8626 SKIP(2);
8627 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8628 (NXT(2) == 's')) {
8629 standalone = 1;
8630 SKIP(3);
8631 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008632 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008633 }
8634 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008635 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 } else
8637 NEXT;
8638 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008639 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 }
8641 }
8642 return(standalone);
8643}
8644
8645/**
8646 * xmlParseXMLDecl:
8647 * @ctxt: an XML parser context
8648 *
8649 * parse an XML declaration header
8650 *
8651 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8652 */
8653
8654void
8655xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8656 xmlChar *version;
8657
8658 /*
8659 * We know that '<?xml' is here.
8660 */
8661 SKIP(5);
8662
William M. Brack76e95df2003-10-18 16:20:14 +00008663 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008664 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8665 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008666 }
8667 SKIP_BLANKS;
8668
8669 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008670 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008671 */
8672 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008673 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008674 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008675 } else {
8676 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8677 /*
8678 * TODO: Blueberry should be detected here
8679 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008680 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8681 "Unsupported version '%s'\n",
8682 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008683 }
8684 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008685 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008686 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008687 }
Owen Taylor3473f882001-02-23 17:55:21 +00008688
8689 /*
8690 * We may have the encoding declaration
8691 */
William M. Brack76e95df2003-10-18 16:20:14 +00008692 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008693 if ((RAW == '?') && (NXT(1) == '>')) {
8694 SKIP(2);
8695 return;
8696 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008698 }
8699 xmlParseEncodingDecl(ctxt);
8700 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8701 /*
8702 * The XML REC instructs us to stop parsing right here
8703 */
8704 return;
8705 }
8706
8707 /*
8708 * We may have the standalone status.
8709 */
William M. Brack76e95df2003-10-18 16:20:14 +00008710 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008711 if ((RAW == '?') && (NXT(1) == '>')) {
8712 SKIP(2);
8713 return;
8714 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008715 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008716 }
8717 SKIP_BLANKS;
8718 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8719
8720 SKIP_BLANKS;
8721 if ((RAW == '?') && (NXT(1) == '>')) {
8722 SKIP(2);
8723 } else if (RAW == '>') {
8724 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008725 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008726 NEXT;
8727 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008728 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008729 MOVETO_ENDTAG(CUR_PTR);
8730 NEXT;
8731 }
8732}
8733
8734/**
8735 * xmlParseMisc:
8736 * @ctxt: an XML parser context
8737 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008738 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008739 *
8740 * [27] Misc ::= Comment | PI | S
8741 */
8742
8743void
8744xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008745 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillard8f597c32003-10-06 08:19:27 +00008746 (memcmp(CUR_PTR, "<!--", 4) == 0) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008747 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008748 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008749 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008750 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008751 NEXT;
8752 } else
8753 xmlParseComment(ctxt);
8754 }
8755}
8756
8757/**
8758 * xmlParseDocument:
8759 * @ctxt: an XML parser context
8760 *
8761 * parse an XML document (and build a tree if using the standard SAX
8762 * interface).
8763 *
8764 * [1] document ::= prolog element Misc*
8765 *
8766 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8767 *
8768 * Returns 0, -1 in case of error. the parser context is augmented
8769 * as a result of the parsing.
8770 */
8771
8772int
8773xmlParseDocument(xmlParserCtxtPtr ctxt) {
8774 xmlChar start[4];
8775 xmlCharEncoding enc;
8776
8777 xmlInitParser();
8778
8779 GROW;
8780
8781 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 * SAX: detecting the level.
8783 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008784 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785
8786 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008787 * SAX: beginning of the document processing.
8788 */
8789 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8790 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8791
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008792 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8793 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008794 /*
8795 * Get the 4 first bytes and decode the charset
8796 * if enc != XML_CHAR_ENCODING_NONE
8797 * plug some encoding conversion routines.
8798 */
8799 start[0] = RAW;
8800 start[1] = NXT(1);
8801 start[2] = NXT(2);
8802 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008803 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008804 if (enc != XML_CHAR_ENCODING_NONE) {
8805 xmlSwitchEncoding(ctxt, enc);
8806 }
Owen Taylor3473f882001-02-23 17:55:21 +00008807 }
8808
8809
8810 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008811 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008812 }
8813
8814 /*
8815 * Check for the XMLDecl in the Prolog.
8816 */
8817 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00008818 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008819
8820 /*
8821 * Note that we will switch encoding on the fly.
8822 */
8823 xmlParseXMLDecl(ctxt);
8824 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8825 /*
8826 * The XML REC instructs us to stop parsing right here
8827 */
8828 return(-1);
8829 }
8830 ctxt->standalone = ctxt->input->standalone;
8831 SKIP_BLANKS;
8832 } else {
8833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8834 }
8835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8836 ctxt->sax->startDocument(ctxt->userData);
8837
8838 /*
8839 * The Misc part of the Prolog
8840 */
8841 GROW;
8842 xmlParseMisc(ctxt);
8843
8844 /*
8845 * Then possibly doc type declaration(s) and more Misc
8846 * (doctypedecl Misc*)?
8847 */
8848 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008849 if (memcmp(CUR_PTR, "<!DOCTYPE", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008850
8851 ctxt->inSubset = 1;
8852 xmlParseDocTypeDecl(ctxt);
8853 if (RAW == '[') {
8854 ctxt->instate = XML_PARSER_DTD;
8855 xmlParseInternalSubset(ctxt);
8856 }
8857
8858 /*
8859 * Create and update the external subset.
8860 */
8861 ctxt->inSubset = 2;
8862 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8863 (!ctxt->disableSAX))
8864 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8865 ctxt->extSubSystem, ctxt->extSubURI);
8866 ctxt->inSubset = 0;
8867
8868
8869 ctxt->instate = XML_PARSER_PROLOG;
8870 xmlParseMisc(ctxt);
8871 }
8872
8873 /*
8874 * Time to start parsing the tree itself
8875 */
8876 GROW;
8877 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008878 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8879 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008880 } else {
8881 ctxt->instate = XML_PARSER_CONTENT;
8882 xmlParseElement(ctxt);
8883 ctxt->instate = XML_PARSER_EPILOG;
8884
8885
8886 /*
8887 * The Misc part at the end
8888 */
8889 xmlParseMisc(ctxt);
8890
Daniel Veillard561b7f82002-03-20 21:55:57 +00008891 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008892 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008893 }
8894 ctxt->instate = XML_PARSER_EOF;
8895 }
8896
8897 /*
8898 * SAX: end of the document processing.
8899 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008900 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008901 ctxt->sax->endDocument(ctxt->userData);
8902
Daniel Veillard5997aca2002-03-18 18:36:20 +00008903 /*
8904 * Remove locally kept entity definitions if the tree was not built
8905 */
8906 if ((ctxt->myDoc != NULL) &&
8907 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8908 xmlFreeDoc(ctxt->myDoc);
8909 ctxt->myDoc = NULL;
8910 }
8911
Daniel Veillardc7612992002-02-17 22:47:37 +00008912 if (! ctxt->wellFormed) {
8913 ctxt->valid = 0;
8914 return(-1);
8915 }
Owen Taylor3473f882001-02-23 17:55:21 +00008916 return(0);
8917}
8918
8919/**
8920 * xmlParseExtParsedEnt:
8921 * @ctxt: an XML parser context
8922 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008923 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008924 * An external general parsed entity is well-formed if it matches the
8925 * production labeled extParsedEnt.
8926 *
8927 * [78] extParsedEnt ::= TextDecl? content
8928 *
8929 * Returns 0, -1 in case of error. the parser context is augmented
8930 * as a result of the parsing.
8931 */
8932
8933int
8934xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8935 xmlChar start[4];
8936 xmlCharEncoding enc;
8937
8938 xmlDefaultSAXHandlerInit();
8939
Daniel Veillard309f81d2003-09-23 09:02:53 +00008940 xmlDetectSAX2(ctxt);
8941
Owen Taylor3473f882001-02-23 17:55:21 +00008942 GROW;
8943
8944 /*
8945 * SAX: beginning of the document processing.
8946 */
8947 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8948 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8949
8950 /*
8951 * Get the 4 first bytes and decode the charset
8952 * if enc != XML_CHAR_ENCODING_NONE
8953 * plug some encoding conversion routines.
8954 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008955 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8956 start[0] = RAW;
8957 start[1] = NXT(1);
8958 start[2] = NXT(2);
8959 start[3] = NXT(3);
8960 enc = xmlDetectCharEncoding(start, 4);
8961 if (enc != XML_CHAR_ENCODING_NONE) {
8962 xmlSwitchEncoding(ctxt, enc);
8963 }
Owen Taylor3473f882001-02-23 17:55:21 +00008964 }
8965
8966
8967 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008968 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 }
8970
8971 /*
8972 * Check for the XMLDecl in the Prolog.
8973 */
8974 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00008975 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008976
8977 /*
8978 * Note that we will switch encoding on the fly.
8979 */
8980 xmlParseXMLDecl(ctxt);
8981 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8982 /*
8983 * The XML REC instructs us to stop parsing right here
8984 */
8985 return(-1);
8986 }
8987 SKIP_BLANKS;
8988 } else {
8989 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8990 }
8991 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8992 ctxt->sax->startDocument(ctxt->userData);
8993
8994 /*
8995 * Doing validity checking on chunk doesn't make sense
8996 */
8997 ctxt->instate = XML_PARSER_CONTENT;
8998 ctxt->validate = 0;
8999 ctxt->loadsubset = 0;
9000 ctxt->depth = 0;
9001
9002 xmlParseContent(ctxt);
9003
9004 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009005 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009006 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009007 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009008 }
9009
9010 /*
9011 * SAX: end of the document processing.
9012 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009013 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009014 ctxt->sax->endDocument(ctxt->userData);
9015
9016 if (! ctxt->wellFormed) return(-1);
9017 return(0);
9018}
9019
Daniel Veillard73b013f2003-09-30 12:36:01 +00009020#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009021/************************************************************************
9022 * *
9023 * Progressive parsing interfaces *
9024 * *
9025 ************************************************************************/
9026
9027/**
9028 * xmlParseLookupSequence:
9029 * @ctxt: an XML parser context
9030 * @first: the first char to lookup
9031 * @next: the next char to lookup or zero
9032 * @third: the next char to lookup or zero
9033 *
9034 * Try to find if a sequence (first, next, third) or just (first next) or
9035 * (first) is available in the input stream.
9036 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9037 * to avoid rescanning sequences of bytes, it DOES change the state of the
9038 * parser, do not use liberally.
9039 *
9040 * Returns the index to the current parsing point if the full sequence
9041 * is available, -1 otherwise.
9042 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009043static int
Owen Taylor3473f882001-02-23 17:55:21 +00009044xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9045 xmlChar next, xmlChar third) {
9046 int base, len;
9047 xmlParserInputPtr in;
9048 const xmlChar *buf;
9049
9050 in = ctxt->input;
9051 if (in == NULL) return(-1);
9052 base = in->cur - in->base;
9053 if (base < 0) return(-1);
9054 if (ctxt->checkIndex > base)
9055 base = ctxt->checkIndex;
9056 if (in->buf == NULL) {
9057 buf = in->base;
9058 len = in->length;
9059 } else {
9060 buf = in->buf->buffer->content;
9061 len = in->buf->buffer->use;
9062 }
9063 /* take into account the sequence length */
9064 if (third) len -= 2;
9065 else if (next) len --;
9066 for (;base < len;base++) {
9067 if (buf[base] == first) {
9068 if (third != 0) {
9069 if ((buf[base + 1] != next) ||
9070 (buf[base + 2] != third)) continue;
9071 } else if (next != 0) {
9072 if (buf[base + 1] != next) continue;
9073 }
9074 ctxt->checkIndex = 0;
9075#ifdef DEBUG_PUSH
9076 if (next == 0)
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: lookup '%c' found at %d\n",
9079 first, base);
9080 else if (third == 0)
9081 xmlGenericError(xmlGenericErrorContext,
9082 "PP: lookup '%c%c' found at %d\n",
9083 first, next, base);
9084 else
9085 xmlGenericError(xmlGenericErrorContext,
9086 "PP: lookup '%c%c%c' found at %d\n",
9087 first, next, third, base);
9088#endif
9089 return(base - (in->cur - in->base));
9090 }
9091 }
9092 ctxt->checkIndex = base;
9093#ifdef DEBUG_PUSH
9094 if (next == 0)
9095 xmlGenericError(xmlGenericErrorContext,
9096 "PP: lookup '%c' failed\n", first);
9097 else if (third == 0)
9098 xmlGenericError(xmlGenericErrorContext,
9099 "PP: lookup '%c%c' failed\n", first, next);
9100 else
9101 xmlGenericError(xmlGenericErrorContext,
9102 "PP: lookup '%c%c%c' failed\n", first, next, third);
9103#endif
9104 return(-1);
9105}
9106
9107/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009108 * xmlParseGetLasts:
9109 * @ctxt: an XML parser context
9110 * @lastlt: pointer to store the last '<' from the input
9111 * @lastgt: pointer to store the last '>' from the input
9112 *
9113 * Lookup the last < and > in the current chunk
9114 */
9115static void
9116xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9117 const xmlChar **lastgt) {
9118 const xmlChar *tmp;
9119
9120 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9121 xmlGenericError(xmlGenericErrorContext,
9122 "Internal error: xmlParseGetLasts\n");
9123 return;
9124 }
9125 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9126 tmp = ctxt->input->end;
9127 tmp--;
9128 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9129 (*tmp != '>')) tmp--;
9130 if (tmp < ctxt->input->base) {
9131 *lastlt = NULL;
9132 *lastgt = NULL;
9133 } else if (*tmp == '<') {
9134 *lastlt = tmp;
9135 tmp--;
9136 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9137 if (tmp < ctxt->input->base)
9138 *lastgt = NULL;
9139 else
9140 *lastgt = tmp;
9141 } else {
9142 *lastgt = tmp;
9143 tmp--;
9144 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9145 if (tmp < ctxt->input->base)
9146 *lastlt = NULL;
9147 else
9148 *lastlt = tmp;
9149 }
9150
9151 } else {
9152 *lastlt = NULL;
9153 *lastgt = NULL;
9154 }
9155}
9156/**
Owen Taylor3473f882001-02-23 17:55:21 +00009157 * xmlParseTryOrFinish:
9158 * @ctxt: an XML parser context
9159 * @terminate: last chunk indicator
9160 *
9161 * Try to progress on parsing
9162 *
9163 * Returns zero if no parsing was possible
9164 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009165static int
Owen Taylor3473f882001-02-23 17:55:21 +00009166xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9167 int ret = 0;
9168 int avail;
9169 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009170 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009171
9172#ifdef DEBUG_PUSH
9173 switch (ctxt->instate) {
9174 case XML_PARSER_EOF:
9175 xmlGenericError(xmlGenericErrorContext,
9176 "PP: try EOF\n"); break;
9177 case XML_PARSER_START:
9178 xmlGenericError(xmlGenericErrorContext,
9179 "PP: try START\n"); break;
9180 case XML_PARSER_MISC:
9181 xmlGenericError(xmlGenericErrorContext,
9182 "PP: try MISC\n");break;
9183 case XML_PARSER_COMMENT:
9184 xmlGenericError(xmlGenericErrorContext,
9185 "PP: try COMMENT\n");break;
9186 case XML_PARSER_PROLOG:
9187 xmlGenericError(xmlGenericErrorContext,
9188 "PP: try PROLOG\n");break;
9189 case XML_PARSER_START_TAG:
9190 xmlGenericError(xmlGenericErrorContext,
9191 "PP: try START_TAG\n");break;
9192 case XML_PARSER_CONTENT:
9193 xmlGenericError(xmlGenericErrorContext,
9194 "PP: try CONTENT\n");break;
9195 case XML_PARSER_CDATA_SECTION:
9196 xmlGenericError(xmlGenericErrorContext,
9197 "PP: try CDATA_SECTION\n");break;
9198 case XML_PARSER_END_TAG:
9199 xmlGenericError(xmlGenericErrorContext,
9200 "PP: try END_TAG\n");break;
9201 case XML_PARSER_ENTITY_DECL:
9202 xmlGenericError(xmlGenericErrorContext,
9203 "PP: try ENTITY_DECL\n");break;
9204 case XML_PARSER_ENTITY_VALUE:
9205 xmlGenericError(xmlGenericErrorContext,
9206 "PP: try ENTITY_VALUE\n");break;
9207 case XML_PARSER_ATTRIBUTE_VALUE:
9208 xmlGenericError(xmlGenericErrorContext,
9209 "PP: try ATTRIBUTE_VALUE\n");break;
9210 case XML_PARSER_DTD:
9211 xmlGenericError(xmlGenericErrorContext,
9212 "PP: try DTD\n");break;
9213 case XML_PARSER_EPILOG:
9214 xmlGenericError(xmlGenericErrorContext,
9215 "PP: try EPILOG\n");break;
9216 case XML_PARSER_PI:
9217 xmlGenericError(xmlGenericErrorContext,
9218 "PP: try PI\n");break;
9219 case XML_PARSER_IGNORE:
9220 xmlGenericError(xmlGenericErrorContext,
9221 "PP: try IGNORE\n");break;
9222 }
9223#endif
9224
Daniel Veillarda880b122003-04-21 21:36:41 +00009225 if (ctxt->input->cur - ctxt->input->base > 4096) {
9226 xmlSHRINK(ctxt);
9227 ctxt->checkIndex = 0;
9228 }
9229 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009230
Daniel Veillarda880b122003-04-21 21:36:41 +00009231 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009232 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9233 return(0);
9234
9235
Owen Taylor3473f882001-02-23 17:55:21 +00009236 /*
9237 * Pop-up of finished entities.
9238 */
9239 while ((RAW == 0) && (ctxt->inputNr > 1))
9240 xmlPopInput(ctxt);
9241
9242 if (ctxt->input ==NULL) break;
9243 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009244 avail = ctxt->input->length -
9245 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009246 else {
9247 /*
9248 * If we are operating on converted input, try to flush
9249 * remainng chars to avoid them stalling in the non-converted
9250 * buffer.
9251 */
9252 if ((ctxt->input->buf->raw != NULL) &&
9253 (ctxt->input->buf->raw->use > 0)) {
9254 int base = ctxt->input->base -
9255 ctxt->input->buf->buffer->content;
9256 int current = ctxt->input->cur - ctxt->input->base;
9257
9258 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9259 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9260 ctxt->input->cur = ctxt->input->base + current;
9261 ctxt->input->end =
9262 &ctxt->input->buf->buffer->content[
9263 ctxt->input->buf->buffer->use];
9264 }
9265 avail = ctxt->input->buf->buffer->use -
9266 (ctxt->input->cur - ctxt->input->base);
9267 }
Owen Taylor3473f882001-02-23 17:55:21 +00009268 if (avail < 1)
9269 goto done;
9270 switch (ctxt->instate) {
9271 case XML_PARSER_EOF:
9272 /*
9273 * Document parsing is done !
9274 */
9275 goto done;
9276 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009277 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9278 xmlChar start[4];
9279 xmlCharEncoding enc;
9280
9281 /*
9282 * Very first chars read from the document flow.
9283 */
9284 if (avail < 4)
9285 goto done;
9286
9287 /*
9288 * Get the 4 first bytes and decode the charset
9289 * if enc != XML_CHAR_ENCODING_NONE
9290 * plug some encoding conversion routines.
9291 */
9292 start[0] = RAW;
9293 start[1] = NXT(1);
9294 start[2] = NXT(2);
9295 start[3] = NXT(3);
9296 enc = xmlDetectCharEncoding(start, 4);
9297 if (enc != XML_CHAR_ENCODING_NONE) {
9298 xmlSwitchEncoding(ctxt, enc);
9299 }
9300 break;
9301 }
Owen Taylor3473f882001-02-23 17:55:21 +00009302
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009303 if (avail < 2)
9304 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009305 cur = ctxt->input->cur[0];
9306 next = ctxt->input->cur[1];
9307 if (cur == 0) {
9308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9309 ctxt->sax->setDocumentLocator(ctxt->userData,
9310 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009311 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009312 ctxt->instate = XML_PARSER_EOF;
9313#ifdef DEBUG_PUSH
9314 xmlGenericError(xmlGenericErrorContext,
9315 "PP: entering EOF\n");
9316#endif
9317 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9318 ctxt->sax->endDocument(ctxt->userData);
9319 goto done;
9320 }
9321 if ((cur == '<') && (next == '?')) {
9322 /* PI or XML decl */
9323 if (avail < 5) return(ret);
9324 if ((!terminate) &&
9325 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9326 return(ret);
9327 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9328 ctxt->sax->setDocumentLocator(ctxt->userData,
9329 &xmlDefaultSAXLocator);
9330 if ((ctxt->input->cur[2] == 'x') &&
9331 (ctxt->input->cur[3] == 'm') &&
9332 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009333 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009334 ret += 5;
9335#ifdef DEBUG_PUSH
9336 xmlGenericError(xmlGenericErrorContext,
9337 "PP: Parsing XML Decl\n");
9338#endif
9339 xmlParseXMLDecl(ctxt);
9340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9341 /*
9342 * The XML REC instructs us to stop parsing right
9343 * here
9344 */
9345 ctxt->instate = XML_PARSER_EOF;
9346 return(0);
9347 }
9348 ctxt->standalone = ctxt->input->standalone;
9349 if ((ctxt->encoding == NULL) &&
9350 (ctxt->input->encoding != NULL))
9351 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9352 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9353 (!ctxt->disableSAX))
9354 ctxt->sax->startDocument(ctxt->userData);
9355 ctxt->instate = XML_PARSER_MISC;
9356#ifdef DEBUG_PUSH
9357 xmlGenericError(xmlGenericErrorContext,
9358 "PP: entering MISC\n");
9359#endif
9360 } else {
9361 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9362 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9363 (!ctxt->disableSAX))
9364 ctxt->sax->startDocument(ctxt->userData);
9365 ctxt->instate = XML_PARSER_MISC;
9366#ifdef DEBUG_PUSH
9367 xmlGenericError(xmlGenericErrorContext,
9368 "PP: entering MISC\n");
9369#endif
9370 }
9371 } else {
9372 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9373 ctxt->sax->setDocumentLocator(ctxt->userData,
9374 &xmlDefaultSAXLocator);
9375 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9376 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9377 (!ctxt->disableSAX))
9378 ctxt->sax->startDocument(ctxt->userData);
9379 ctxt->instate = XML_PARSER_MISC;
9380#ifdef DEBUG_PUSH
9381 xmlGenericError(xmlGenericErrorContext,
9382 "PP: entering MISC\n");
9383#endif
9384 }
9385 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009386 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009387 const xmlChar *name;
9388 const xmlChar *prefix;
9389 const xmlChar *URI;
9390 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009391
9392 if ((avail < 2) && (ctxt->inputNr == 1))
9393 goto done;
9394 cur = ctxt->input->cur[0];
9395 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009396 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009397 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009398 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9399 ctxt->sax->endDocument(ctxt->userData);
9400 goto done;
9401 }
9402 if (!terminate) {
9403 if (ctxt->progressive) {
9404 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9405 goto done;
9406 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9407 goto done;
9408 }
9409 }
9410 if (ctxt->spaceNr == 0)
9411 spacePush(ctxt, -1);
9412 else
9413 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009414#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009415 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009416#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009417 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009418#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009419 else
9420 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009421#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009422 if (name == NULL) {
9423 spacePop(ctxt);
9424 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009425 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9426 ctxt->sax->endDocument(ctxt->userData);
9427 goto done;
9428 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009429#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009430 /*
9431 * [ VC: Root Element Type ]
9432 * The Name in the document type declaration must match
9433 * the element type of the root element.
9434 */
9435 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9436 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9437 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009438#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009439
9440 /*
9441 * Check for an Empty Element.
9442 */
9443 if ((RAW == '/') && (NXT(1) == '>')) {
9444 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009445
9446 if (ctxt->sax2) {
9447 if ((ctxt->sax != NULL) &&
9448 (ctxt->sax->endElementNs != NULL) &&
9449 (!ctxt->disableSAX))
9450 ctxt->sax->endElementNs(ctxt->userData, name,
9451 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009452#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009453 } else {
9454 if ((ctxt->sax != NULL) &&
9455 (ctxt->sax->endElement != NULL) &&
9456 (!ctxt->disableSAX))
9457 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009458#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009459 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009460 spacePop(ctxt);
9461 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009462 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009463 } else {
9464 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009465 }
9466 break;
9467 }
9468 if (RAW == '>') {
9469 NEXT;
9470 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009471 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 "Couldn't find end of Start Tag %s\n",
9473 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009474 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009475 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009477 if (ctxt->sax2)
9478 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009479#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009480 else
9481 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009482#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009483
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009485 break;
9486 }
9487 case XML_PARSER_CONTENT: {
9488 const xmlChar *test;
9489 unsigned int cons;
9490 if ((avail < 2) && (ctxt->inputNr == 1))
9491 goto done;
9492 cur = ctxt->input->cur[0];
9493 next = ctxt->input->cur[1];
9494
9495 test = CUR_PTR;
9496 cons = ctxt->input->consumed;
9497 if ((cur == '<') && (next == '/')) {
9498 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 break;
9500 } else if ((cur == '<') && (next == '?')) {
9501 if ((!terminate) &&
9502 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9503 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009504 xmlParsePI(ctxt);
9505 } else if ((cur == '<') && (next != '!')) {
9506 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 break;
9508 } else if ((cur == '<') && (next == '!') &&
9509 (ctxt->input->cur[2] == '-') &&
9510 (ctxt->input->cur[3] == '-')) {
9511 if ((!terminate) &&
9512 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9513 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009514 xmlParseComment(ctxt);
9515 ctxt->instate = XML_PARSER_CONTENT;
9516 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9517 (ctxt->input->cur[2] == '[') &&
9518 (ctxt->input->cur[3] == 'C') &&
9519 (ctxt->input->cur[4] == 'D') &&
9520 (ctxt->input->cur[5] == 'A') &&
9521 (ctxt->input->cur[6] == 'T') &&
9522 (ctxt->input->cur[7] == 'A') &&
9523 (ctxt->input->cur[8] == '[')) {
9524 SKIP(9);
9525 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009526 break;
9527 } else if ((cur == '<') && (next == '!') &&
9528 (avail < 9)) {
9529 goto done;
9530 } else if (cur == '&') {
9531 if ((!terminate) &&
9532 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9533 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009534 xmlParseReference(ctxt);
9535 } else {
9536 /* TODO Avoid the extra copy, handle directly !!! */
9537 /*
9538 * Goal of the following test is:
9539 * - minimize calls to the SAX 'character' callback
9540 * when they are mergeable
9541 * - handle an problem for isBlank when we only parse
9542 * a sequence of blank chars and the next one is
9543 * not available to check against '<' presence.
9544 * - tries to homogenize the differences in SAX
9545 * callbacks between the push and pull versions
9546 * of the parser.
9547 */
9548 if ((ctxt->inputNr == 1) &&
9549 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9550 if (!terminate) {
9551 if (ctxt->progressive) {
9552 if ((lastlt == NULL) ||
9553 (ctxt->input->cur > lastlt))
9554 goto done;
9555 } else if (xmlParseLookupSequence(ctxt,
9556 '<', 0, 0) < 0) {
9557 goto done;
9558 }
9559 }
9560 }
9561 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009562 xmlParseCharData(ctxt, 0);
9563 }
9564 /*
9565 * Pop-up of finished entities.
9566 */
9567 while ((RAW == 0) && (ctxt->inputNr > 1))
9568 xmlPopInput(ctxt);
9569 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009570 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9571 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009572 ctxt->instate = XML_PARSER_EOF;
9573 break;
9574 }
9575 break;
9576 }
9577 case XML_PARSER_END_TAG:
9578 if (avail < 2)
9579 goto done;
9580 if (!terminate) {
9581 if (ctxt->progressive) {
9582 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9583 goto done;
9584 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9585 goto done;
9586 }
9587 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009588 if (ctxt->sax2) {
9589 xmlParseEndTag2(ctxt,
9590 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9591 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9592 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9593 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009594 }
9595#ifdef LIBXML_SAX1_ENABLED
9596 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009597 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009598#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009599 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009600 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009601 } else {
9602 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 }
9604 break;
9605 case XML_PARSER_CDATA_SECTION: {
9606 /*
9607 * The Push mode need to have the SAX callback for
9608 * cdataBlock merge back contiguous callbacks.
9609 */
9610 int base;
9611
9612 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9613 if (base < 0) {
9614 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9615 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9616 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009617 ctxt->sax->cdataBlock(ctxt->userData,
9618 ctxt->input->cur,
9619 XML_PARSER_BIG_BUFFER_SIZE);
9620 else if (ctxt->sax->characters != NULL)
9621 ctxt->sax->characters(ctxt->userData,
9622 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009623 XML_PARSER_BIG_BUFFER_SIZE);
9624 }
9625 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9626 ctxt->checkIndex = 0;
9627 }
9628 goto done;
9629 } else {
9630 if ((ctxt->sax != NULL) && (base > 0) &&
9631 (!ctxt->disableSAX)) {
9632 if (ctxt->sax->cdataBlock != NULL)
9633 ctxt->sax->cdataBlock(ctxt->userData,
9634 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009635 else if (ctxt->sax->characters != NULL)
9636 ctxt->sax->characters(ctxt->userData,
9637 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009638 }
9639 SKIP(base + 3);
9640 ctxt->checkIndex = 0;
9641 ctxt->instate = XML_PARSER_CONTENT;
9642#ifdef DEBUG_PUSH
9643 xmlGenericError(xmlGenericErrorContext,
9644 "PP: entering CONTENT\n");
9645#endif
9646 }
9647 break;
9648 }
Owen Taylor3473f882001-02-23 17:55:21 +00009649 case XML_PARSER_MISC:
9650 SKIP_BLANKS;
9651 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009652 avail = ctxt->input->length -
9653 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009654 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009655 avail = ctxt->input->buf->buffer->use -
9656 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009657 if (avail < 2)
9658 goto done;
9659 cur = ctxt->input->cur[0];
9660 next = ctxt->input->cur[1];
9661 if ((cur == '<') && (next == '?')) {
9662 if ((!terminate) &&
9663 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9664 goto done;
9665#ifdef DEBUG_PUSH
9666 xmlGenericError(xmlGenericErrorContext,
9667 "PP: Parsing PI\n");
9668#endif
9669 xmlParsePI(ctxt);
9670 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009671 (ctxt->input->cur[2] == '-') &&
9672 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009673 if ((!terminate) &&
9674 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9675 goto done;
9676#ifdef DEBUG_PUSH
9677 xmlGenericError(xmlGenericErrorContext,
9678 "PP: Parsing Comment\n");
9679#endif
9680 xmlParseComment(ctxt);
9681 ctxt->instate = XML_PARSER_MISC;
9682 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009683 (ctxt->input->cur[2] == 'D') &&
9684 (ctxt->input->cur[3] == 'O') &&
9685 (ctxt->input->cur[4] == 'C') &&
9686 (ctxt->input->cur[5] == 'T') &&
9687 (ctxt->input->cur[6] == 'Y') &&
9688 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009689 (ctxt->input->cur[8] == 'E')) {
9690 if ((!terminate) &&
9691 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9692 goto done;
9693#ifdef DEBUG_PUSH
9694 xmlGenericError(xmlGenericErrorContext,
9695 "PP: Parsing internal subset\n");
9696#endif
9697 ctxt->inSubset = 1;
9698 xmlParseDocTypeDecl(ctxt);
9699 if (RAW == '[') {
9700 ctxt->instate = XML_PARSER_DTD;
9701#ifdef DEBUG_PUSH
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: entering DTD\n");
9704#endif
9705 } else {
9706 /*
9707 * Create and update the external subset.
9708 */
9709 ctxt->inSubset = 2;
9710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9711 (ctxt->sax->externalSubset != NULL))
9712 ctxt->sax->externalSubset(ctxt->userData,
9713 ctxt->intSubName, ctxt->extSubSystem,
9714 ctxt->extSubURI);
9715 ctxt->inSubset = 0;
9716 ctxt->instate = XML_PARSER_PROLOG;
9717#ifdef DEBUG_PUSH
9718 xmlGenericError(xmlGenericErrorContext,
9719 "PP: entering PROLOG\n");
9720#endif
9721 }
9722 } else if ((cur == '<') && (next == '!') &&
9723 (avail < 9)) {
9724 goto done;
9725 } else {
9726 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009727 ctxt->progressive = 1;
9728 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009729#ifdef DEBUG_PUSH
9730 xmlGenericError(xmlGenericErrorContext,
9731 "PP: entering START_TAG\n");
9732#endif
9733 }
9734 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009735 case XML_PARSER_PROLOG:
9736 SKIP_BLANKS;
9737 if (ctxt->input->buf == NULL)
9738 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9739 else
9740 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9741 if (avail < 2)
9742 goto done;
9743 cur = ctxt->input->cur[0];
9744 next = ctxt->input->cur[1];
9745 if ((cur == '<') && (next == '?')) {
9746 if ((!terminate) &&
9747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9748 goto done;
9749#ifdef DEBUG_PUSH
9750 xmlGenericError(xmlGenericErrorContext,
9751 "PP: Parsing PI\n");
9752#endif
9753 xmlParsePI(ctxt);
9754 } else if ((cur == '<') && (next == '!') &&
9755 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9756 if ((!terminate) &&
9757 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9758 goto done;
9759#ifdef DEBUG_PUSH
9760 xmlGenericError(xmlGenericErrorContext,
9761 "PP: Parsing Comment\n");
9762#endif
9763 xmlParseComment(ctxt);
9764 ctxt->instate = XML_PARSER_PROLOG;
9765 } else if ((cur == '<') && (next == '!') &&
9766 (avail < 4)) {
9767 goto done;
9768 } else {
9769 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009770 ctxt->progressive = 1;
9771 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009772#ifdef DEBUG_PUSH
9773 xmlGenericError(xmlGenericErrorContext,
9774 "PP: entering START_TAG\n");
9775#endif
9776 }
9777 break;
9778 case XML_PARSER_EPILOG:
9779 SKIP_BLANKS;
9780 if (ctxt->input->buf == NULL)
9781 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9782 else
9783 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9784 if (avail < 2)
9785 goto done;
9786 cur = ctxt->input->cur[0];
9787 next = ctxt->input->cur[1];
9788 if ((cur == '<') && (next == '?')) {
9789 if ((!terminate) &&
9790 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9791 goto done;
9792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: Parsing PI\n");
9795#endif
9796 xmlParsePI(ctxt);
9797 ctxt->instate = XML_PARSER_EPILOG;
9798 } else if ((cur == '<') && (next == '!') &&
9799 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9800 if ((!terminate) &&
9801 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9802 goto done;
9803#ifdef DEBUG_PUSH
9804 xmlGenericError(xmlGenericErrorContext,
9805 "PP: Parsing Comment\n");
9806#endif
9807 xmlParseComment(ctxt);
9808 ctxt->instate = XML_PARSER_EPILOG;
9809 } else if ((cur == '<') && (next == '!') &&
9810 (avail < 4)) {
9811 goto done;
9812 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009813 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009814 ctxt->instate = XML_PARSER_EOF;
9815#ifdef DEBUG_PUSH
9816 xmlGenericError(xmlGenericErrorContext,
9817 "PP: entering EOF\n");
9818#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009819 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009820 ctxt->sax->endDocument(ctxt->userData);
9821 goto done;
9822 }
9823 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009824 case XML_PARSER_DTD: {
9825 /*
9826 * Sorry but progressive parsing of the internal subset
9827 * is not expected to be supported. We first check that
9828 * the full content of the internal subset is available and
9829 * the parsing is launched only at that point.
9830 * Internal subset ends up with "']' S? '>'" in an unescaped
9831 * section and not in a ']]>' sequence which are conditional
9832 * sections (whoever argued to keep that crap in XML deserve
9833 * a place in hell !).
9834 */
9835 int base, i;
9836 xmlChar *buf;
9837 xmlChar quote = 0;
9838
9839 base = ctxt->input->cur - ctxt->input->base;
9840 if (base < 0) return(0);
9841 if (ctxt->checkIndex > base)
9842 base = ctxt->checkIndex;
9843 buf = ctxt->input->buf->buffer->content;
9844 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9845 base++) {
9846 if (quote != 0) {
9847 if (buf[base] == quote)
9848 quote = 0;
9849 continue;
9850 }
9851 if (buf[base] == '"') {
9852 quote = '"';
9853 continue;
9854 }
9855 if (buf[base] == '\'') {
9856 quote = '\'';
9857 continue;
9858 }
9859 if (buf[base] == ']') {
9860 if ((unsigned int) base +1 >=
9861 ctxt->input->buf->buffer->use)
9862 break;
9863 if (buf[base + 1] == ']') {
9864 /* conditional crap, skip both ']' ! */
9865 base++;
9866 continue;
9867 }
9868 for (i = 0;
9869 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9870 i++) {
9871 if (buf[base + i] == '>')
9872 goto found_end_int_subset;
9873 }
9874 break;
9875 }
9876 }
9877 /*
9878 * We didn't found the end of the Internal subset
9879 */
9880 if (quote == 0)
9881 ctxt->checkIndex = base;
9882#ifdef DEBUG_PUSH
9883 if (next == 0)
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: lookup of int subset end filed\n");
9886#endif
9887 goto done;
9888
9889found_end_int_subset:
9890 xmlParseInternalSubset(ctxt);
9891 ctxt->inSubset = 2;
9892 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9893 (ctxt->sax->externalSubset != NULL))
9894 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9895 ctxt->extSubSystem, ctxt->extSubURI);
9896 ctxt->inSubset = 0;
9897 ctxt->instate = XML_PARSER_PROLOG;
9898 ctxt->checkIndex = 0;
9899#ifdef DEBUG_PUSH
9900 xmlGenericError(xmlGenericErrorContext,
9901 "PP: entering PROLOG\n");
9902#endif
9903 break;
9904 }
9905 case XML_PARSER_COMMENT:
9906 xmlGenericError(xmlGenericErrorContext,
9907 "PP: internal error, state == COMMENT\n");
9908 ctxt->instate = XML_PARSER_CONTENT;
9909#ifdef DEBUG_PUSH
9910 xmlGenericError(xmlGenericErrorContext,
9911 "PP: entering CONTENT\n");
9912#endif
9913 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009914 case XML_PARSER_IGNORE:
9915 xmlGenericError(xmlGenericErrorContext,
9916 "PP: internal error, state == IGNORE");
9917 ctxt->instate = XML_PARSER_DTD;
9918#ifdef DEBUG_PUSH
9919 xmlGenericError(xmlGenericErrorContext,
9920 "PP: entering DTD\n");
9921#endif
9922 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009923 case XML_PARSER_PI:
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: internal error, state == PI\n");
9926 ctxt->instate = XML_PARSER_CONTENT;
9927#ifdef DEBUG_PUSH
9928 xmlGenericError(xmlGenericErrorContext,
9929 "PP: entering CONTENT\n");
9930#endif
9931 break;
9932 case XML_PARSER_ENTITY_DECL:
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: internal error, state == ENTITY_DECL\n");
9935 ctxt->instate = XML_PARSER_DTD;
9936#ifdef DEBUG_PUSH
9937 xmlGenericError(xmlGenericErrorContext,
9938 "PP: entering DTD\n");
9939#endif
9940 break;
9941 case XML_PARSER_ENTITY_VALUE:
9942 xmlGenericError(xmlGenericErrorContext,
9943 "PP: internal error, state == ENTITY_VALUE\n");
9944 ctxt->instate = XML_PARSER_CONTENT;
9945#ifdef DEBUG_PUSH
9946 xmlGenericError(xmlGenericErrorContext,
9947 "PP: entering DTD\n");
9948#endif
9949 break;
9950 case XML_PARSER_ATTRIBUTE_VALUE:
9951 xmlGenericError(xmlGenericErrorContext,
9952 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9953 ctxt->instate = XML_PARSER_START_TAG;
9954#ifdef DEBUG_PUSH
9955 xmlGenericError(xmlGenericErrorContext,
9956 "PP: entering START_TAG\n");
9957#endif
9958 break;
9959 case XML_PARSER_SYSTEM_LITERAL:
9960 xmlGenericError(xmlGenericErrorContext,
9961 "PP: internal error, state == SYSTEM_LITERAL\n");
9962 ctxt->instate = XML_PARSER_START_TAG;
9963#ifdef DEBUG_PUSH
9964 xmlGenericError(xmlGenericErrorContext,
9965 "PP: entering START_TAG\n");
9966#endif
9967 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009968 case XML_PARSER_PUBLIC_LITERAL:
9969 xmlGenericError(xmlGenericErrorContext,
9970 "PP: internal error, state == PUBLIC_LITERAL\n");
9971 ctxt->instate = XML_PARSER_START_TAG;
9972#ifdef DEBUG_PUSH
9973 xmlGenericError(xmlGenericErrorContext,
9974 "PP: entering START_TAG\n");
9975#endif
9976 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009977 }
9978 }
9979done:
9980#ifdef DEBUG_PUSH
9981 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9982#endif
9983 return(ret);
9984}
9985
9986/**
Owen Taylor3473f882001-02-23 17:55:21 +00009987 * xmlParseChunk:
9988 * @ctxt: an XML parser context
9989 * @chunk: an char array
9990 * @size: the size in byte of the chunk
9991 * @terminate: last chunk indicator
9992 *
9993 * Parse a Chunk of memory
9994 *
9995 * Returns zero if no error, the xmlParserErrors otherwise.
9996 */
9997int
9998xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9999 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010000 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10001 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010002 if (ctxt->instate == XML_PARSER_START)
10003 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010004 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10005 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10006 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10007 int cur = ctxt->input->cur - ctxt->input->base;
10008
10009 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10010 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10011 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010012 ctxt->input->end =
10013 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010014#ifdef DEBUG_PUSH
10015 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10016#endif
10017
Owen Taylor3473f882001-02-23 17:55:21 +000010018 } else if (ctxt->instate != XML_PARSER_EOF) {
10019 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10020 xmlParserInputBufferPtr in = ctxt->input->buf;
10021 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10022 (in->raw != NULL)) {
10023 int nbchars;
10024
10025 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10026 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010027 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010028 xmlGenericError(xmlGenericErrorContext,
10029 "xmlParseChunk: encoder error\n");
10030 return(XML_ERR_INVALID_ENCODING);
10031 }
10032 }
10033 }
10034 }
10035 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010036 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10037 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010038 if (terminate) {
10039 /*
10040 * Check for termination
10041 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010042 int avail = 0;
10043 if (ctxt->input->buf == NULL)
10044 avail = ctxt->input->length -
10045 (ctxt->input->cur - ctxt->input->base);
10046 else
10047 avail = ctxt->input->buf->buffer->use -
10048 (ctxt->input->cur - ctxt->input->base);
10049
Owen Taylor3473f882001-02-23 17:55:21 +000010050 if ((ctxt->instate != XML_PARSER_EOF) &&
10051 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010052 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010053 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010054 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010055 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010056 }
Owen Taylor3473f882001-02-23 17:55:21 +000010057 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010058 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010059 ctxt->sax->endDocument(ctxt->userData);
10060 }
10061 ctxt->instate = XML_PARSER_EOF;
10062 }
10063 return((xmlParserErrors) ctxt->errNo);
10064}
10065
10066/************************************************************************
10067 * *
10068 * I/O front end functions to the parser *
10069 * *
10070 ************************************************************************/
10071
10072/**
10073 * xmlStopParser:
10074 * @ctxt: an XML parser context
10075 *
10076 * Blocks further parser processing
10077 */
10078void
10079xmlStopParser(xmlParserCtxtPtr ctxt) {
10080 ctxt->instate = XML_PARSER_EOF;
10081 if (ctxt->input != NULL)
10082 ctxt->input->cur = BAD_CAST"";
10083}
10084
10085/**
10086 * xmlCreatePushParserCtxt:
10087 * @sax: a SAX handler
10088 * @user_data: The user data returned on SAX callbacks
10089 * @chunk: a pointer to an array of chars
10090 * @size: number of chars in the array
10091 * @filename: an optional file name or URI
10092 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010093 * Create a parser context for using the XML parser in push mode.
10094 * If @buffer and @size are non-NULL, the data is used to detect
10095 * the encoding. The remaining characters will be parsed so they
10096 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010097 * To allow content encoding detection, @size should be >= 4
10098 * The value of @filename is used for fetching external entities
10099 * and error/warning reports.
10100 *
10101 * Returns the new parser context or NULL
10102 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010103
Owen Taylor3473f882001-02-23 17:55:21 +000010104xmlParserCtxtPtr
10105xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10106 const char *chunk, int size, const char *filename) {
10107 xmlParserCtxtPtr ctxt;
10108 xmlParserInputPtr inputStream;
10109 xmlParserInputBufferPtr buf;
10110 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10111
10112 /*
10113 * plug some encoding conversion routines
10114 */
10115 if ((chunk != NULL) && (size >= 4))
10116 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10117
10118 buf = xmlAllocParserInputBuffer(enc);
10119 if (buf == NULL) return(NULL);
10120
10121 ctxt = xmlNewParserCtxt();
10122 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010123 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010124 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010125 return(NULL);
10126 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010127 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10128 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010129 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010130 xmlFreeParserInputBuffer(buf);
10131 xmlFreeParserCtxt(ctxt);
10132 return(NULL);
10133 }
Owen Taylor3473f882001-02-23 17:55:21 +000010134 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010135#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010136 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010137#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010138 xmlFree(ctxt->sax);
10139 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10140 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010141 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010142 xmlFreeParserInputBuffer(buf);
10143 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010144 return(NULL);
10145 }
10146 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10147 if (user_data != NULL)
10148 ctxt->userData = user_data;
10149 }
10150 if (filename == NULL) {
10151 ctxt->directory = NULL;
10152 } else {
10153 ctxt->directory = xmlParserGetDirectory(filename);
10154 }
10155
10156 inputStream = xmlNewInputStream(ctxt);
10157 if (inputStream == NULL) {
10158 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010159 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010160 return(NULL);
10161 }
10162
10163 if (filename == NULL)
10164 inputStream->filename = NULL;
10165 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010166 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010167 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010168 inputStream->buf = buf;
10169 inputStream->base = inputStream->buf->buffer->content;
10170 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010171 inputStream->end =
10172 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010173
10174 inputPush(ctxt, inputStream);
10175
10176 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10177 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010178 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10179 int cur = ctxt->input->cur - ctxt->input->base;
10180
Owen Taylor3473f882001-02-23 17:55:21 +000010181 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010182
10183 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10184 ctxt->input->cur = ctxt->input->base + cur;
10185 ctxt->input->end =
10186 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010187#ifdef DEBUG_PUSH
10188 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10189#endif
10190 }
10191
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010192 if (enc != XML_CHAR_ENCODING_NONE) {
10193 xmlSwitchEncoding(ctxt, enc);
10194 }
10195
Owen Taylor3473f882001-02-23 17:55:21 +000010196 return(ctxt);
10197}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010198#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010199
10200/**
10201 * xmlCreateIOParserCtxt:
10202 * @sax: a SAX handler
10203 * @user_data: The user data returned on SAX callbacks
10204 * @ioread: an I/O read function
10205 * @ioclose: an I/O close function
10206 * @ioctx: an I/O handler
10207 * @enc: the charset encoding if known
10208 *
10209 * Create a parser context for using the XML parser with an existing
10210 * I/O stream
10211 *
10212 * Returns the new parser context or NULL
10213 */
10214xmlParserCtxtPtr
10215xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10216 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10217 void *ioctx, xmlCharEncoding enc) {
10218 xmlParserCtxtPtr ctxt;
10219 xmlParserInputPtr inputStream;
10220 xmlParserInputBufferPtr buf;
10221
10222 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10223 if (buf == NULL) return(NULL);
10224
10225 ctxt = xmlNewParserCtxt();
10226 if (ctxt == NULL) {
10227 xmlFree(buf);
10228 return(NULL);
10229 }
10230 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010231#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010232 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010233#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010234 xmlFree(ctxt->sax);
10235 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10236 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010237 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010238 xmlFree(ctxt);
10239 return(NULL);
10240 }
10241 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10242 if (user_data != NULL)
10243 ctxt->userData = user_data;
10244 }
10245
10246 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10247 if (inputStream == NULL) {
10248 xmlFreeParserCtxt(ctxt);
10249 return(NULL);
10250 }
10251 inputPush(ctxt, inputStream);
10252
10253 return(ctxt);
10254}
10255
Daniel Veillard4432df22003-09-28 18:58:27 +000010256#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010257/************************************************************************
10258 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010259 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010260 * *
10261 ************************************************************************/
10262
10263/**
10264 * xmlIOParseDTD:
10265 * @sax: the SAX handler block or NULL
10266 * @input: an Input Buffer
10267 * @enc: the charset encoding if known
10268 *
10269 * Load and parse a DTD
10270 *
10271 * Returns the resulting xmlDtdPtr or NULL in case of error.
10272 * @input will be freed at parsing end.
10273 */
10274
10275xmlDtdPtr
10276xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10277 xmlCharEncoding enc) {
10278 xmlDtdPtr ret = NULL;
10279 xmlParserCtxtPtr ctxt;
10280 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010281 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283 if (input == NULL)
10284 return(NULL);
10285
10286 ctxt = xmlNewParserCtxt();
10287 if (ctxt == NULL) {
10288 return(NULL);
10289 }
10290
10291 /*
10292 * Set-up the SAX context
10293 */
10294 if (sax != NULL) {
10295 if (ctxt->sax != NULL)
10296 xmlFree(ctxt->sax);
10297 ctxt->sax = sax;
10298 ctxt->userData = NULL;
10299 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010300 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010301
10302 /*
10303 * generate a parser input from the I/O handler
10304 */
10305
10306 pinput = xmlNewIOInputStream(ctxt, input, enc);
10307 if (pinput == NULL) {
10308 if (sax != NULL) ctxt->sax = NULL;
10309 xmlFreeParserCtxt(ctxt);
10310 return(NULL);
10311 }
10312
10313 /*
10314 * plug some encoding conversion routines here.
10315 */
10316 xmlPushInput(ctxt, pinput);
10317
10318 pinput->filename = NULL;
10319 pinput->line = 1;
10320 pinput->col = 1;
10321 pinput->base = ctxt->input->cur;
10322 pinput->cur = ctxt->input->cur;
10323 pinput->free = NULL;
10324
10325 /*
10326 * let's parse that entity knowing it's an external subset.
10327 */
10328 ctxt->inSubset = 2;
10329 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10330 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10331 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010332
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010333 if ((enc == XML_CHAR_ENCODING_NONE) &&
10334 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010335 /*
10336 * Get the 4 first bytes and decode the charset
10337 * if enc != XML_CHAR_ENCODING_NONE
10338 * plug some encoding conversion routines.
10339 */
10340 start[0] = RAW;
10341 start[1] = NXT(1);
10342 start[2] = NXT(2);
10343 start[3] = NXT(3);
10344 enc = xmlDetectCharEncoding(start, 4);
10345 if (enc != XML_CHAR_ENCODING_NONE) {
10346 xmlSwitchEncoding(ctxt, enc);
10347 }
10348 }
10349
Owen Taylor3473f882001-02-23 17:55:21 +000010350 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10351
10352 if (ctxt->myDoc != NULL) {
10353 if (ctxt->wellFormed) {
10354 ret = ctxt->myDoc->extSubset;
10355 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010356 if (ret != NULL) {
10357 xmlNodePtr tmp;
10358
10359 ret->doc = NULL;
10360 tmp = ret->children;
10361 while (tmp != NULL) {
10362 tmp->doc = NULL;
10363 tmp = tmp->next;
10364 }
10365 }
Owen Taylor3473f882001-02-23 17:55:21 +000010366 } else {
10367 ret = NULL;
10368 }
10369 xmlFreeDoc(ctxt->myDoc);
10370 ctxt->myDoc = NULL;
10371 }
10372 if (sax != NULL) ctxt->sax = NULL;
10373 xmlFreeParserCtxt(ctxt);
10374
10375 return(ret);
10376}
10377
10378/**
10379 * xmlSAXParseDTD:
10380 * @sax: the SAX handler block
10381 * @ExternalID: a NAME* containing the External ID of the DTD
10382 * @SystemID: a NAME* containing the URL to the DTD
10383 *
10384 * Load and parse an external subset.
10385 *
10386 * Returns the resulting xmlDtdPtr or NULL in case of error.
10387 */
10388
10389xmlDtdPtr
10390xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10391 const xmlChar *SystemID) {
10392 xmlDtdPtr ret = NULL;
10393 xmlParserCtxtPtr ctxt;
10394 xmlParserInputPtr input = NULL;
10395 xmlCharEncoding enc;
10396
10397 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10398
10399 ctxt = xmlNewParserCtxt();
10400 if (ctxt == NULL) {
10401 return(NULL);
10402 }
10403
10404 /*
10405 * Set-up the SAX context
10406 */
10407 if (sax != NULL) {
10408 if (ctxt->sax != NULL)
10409 xmlFree(ctxt->sax);
10410 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010411 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010412 }
10413
10414 /*
10415 * Ask the Entity resolver to load the damn thing
10416 */
10417
10418 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010419 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010420 if (input == NULL) {
10421 if (sax != NULL) ctxt->sax = NULL;
10422 xmlFreeParserCtxt(ctxt);
10423 return(NULL);
10424 }
10425
10426 /*
10427 * plug some encoding conversion routines here.
10428 */
10429 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010430 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10431 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10432 xmlSwitchEncoding(ctxt, enc);
10433 }
Owen Taylor3473f882001-02-23 17:55:21 +000010434
10435 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010436 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010437 input->line = 1;
10438 input->col = 1;
10439 input->base = ctxt->input->cur;
10440 input->cur = ctxt->input->cur;
10441 input->free = NULL;
10442
10443 /*
10444 * let's parse that entity knowing it's an external subset.
10445 */
10446 ctxt->inSubset = 2;
10447 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10448 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10449 ExternalID, SystemID);
10450 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10451
10452 if (ctxt->myDoc != NULL) {
10453 if (ctxt->wellFormed) {
10454 ret = ctxt->myDoc->extSubset;
10455 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010456 if (ret != NULL) {
10457 xmlNodePtr tmp;
10458
10459 ret->doc = NULL;
10460 tmp = ret->children;
10461 while (tmp != NULL) {
10462 tmp->doc = NULL;
10463 tmp = tmp->next;
10464 }
10465 }
Owen Taylor3473f882001-02-23 17:55:21 +000010466 } else {
10467 ret = NULL;
10468 }
10469 xmlFreeDoc(ctxt->myDoc);
10470 ctxt->myDoc = NULL;
10471 }
10472 if (sax != NULL) ctxt->sax = NULL;
10473 xmlFreeParserCtxt(ctxt);
10474
10475 return(ret);
10476}
10477
Daniel Veillard4432df22003-09-28 18:58:27 +000010478
Owen Taylor3473f882001-02-23 17:55:21 +000010479/**
10480 * xmlParseDTD:
10481 * @ExternalID: a NAME* containing the External ID of the DTD
10482 * @SystemID: a NAME* containing the URL to the DTD
10483 *
10484 * Load and parse an external subset.
10485 *
10486 * Returns the resulting xmlDtdPtr or NULL in case of error.
10487 */
10488
10489xmlDtdPtr
10490xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10491 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10492}
Daniel Veillard4432df22003-09-28 18:58:27 +000010493#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010494
10495/************************************************************************
10496 * *
10497 * Front ends when parsing an Entity *
10498 * *
10499 ************************************************************************/
10500
10501/**
Owen Taylor3473f882001-02-23 17:55:21 +000010502 * xmlParseCtxtExternalEntity:
10503 * @ctx: the existing parsing context
10504 * @URL: the URL for the entity to load
10505 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010506 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010507 *
10508 * Parse an external general entity within an existing parsing context
10509 * An external general parsed entity is well-formed if it matches the
10510 * production labeled extParsedEnt.
10511 *
10512 * [78] extParsedEnt ::= TextDecl? content
10513 *
10514 * Returns 0 if the entity is well formed, -1 in case of args problem and
10515 * the parser error code otherwise
10516 */
10517
10518int
10519xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010520 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010521 xmlParserCtxtPtr ctxt;
10522 xmlDocPtr newDoc;
10523 xmlSAXHandlerPtr oldsax = NULL;
10524 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010525 xmlChar start[4];
10526 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010527
10528 if (ctx->depth > 40) {
10529 return(XML_ERR_ENTITY_LOOP);
10530 }
10531
Daniel Veillardcda96922001-08-21 10:56:31 +000010532 if (lst != NULL)
10533 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010534 if ((URL == NULL) && (ID == NULL))
10535 return(-1);
10536 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10537 return(-1);
10538
10539
10540 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10541 if (ctxt == NULL) return(-1);
10542 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010543 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010544 oldsax = ctxt->sax;
10545 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010546 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010547 newDoc = xmlNewDoc(BAD_CAST "1.0");
10548 if (newDoc == NULL) {
10549 xmlFreeParserCtxt(ctxt);
10550 return(-1);
10551 }
10552 if (ctx->myDoc != NULL) {
10553 newDoc->intSubset = ctx->myDoc->intSubset;
10554 newDoc->extSubset = ctx->myDoc->extSubset;
10555 }
10556 if (ctx->myDoc->URL != NULL) {
10557 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10558 }
10559 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10560 if (newDoc->children == NULL) {
10561 ctxt->sax = oldsax;
10562 xmlFreeParserCtxt(ctxt);
10563 newDoc->intSubset = NULL;
10564 newDoc->extSubset = NULL;
10565 xmlFreeDoc(newDoc);
10566 return(-1);
10567 }
10568 nodePush(ctxt, newDoc->children);
10569 if (ctx->myDoc == NULL) {
10570 ctxt->myDoc = newDoc;
10571 } else {
10572 ctxt->myDoc = ctx->myDoc;
10573 newDoc->children->doc = ctx->myDoc;
10574 }
10575
Daniel Veillard87a764e2001-06-20 17:41:10 +000010576 /*
10577 * Get the 4 first bytes and decode the charset
10578 * if enc != XML_CHAR_ENCODING_NONE
10579 * plug some encoding conversion routines.
10580 */
10581 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010582 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10583 start[0] = RAW;
10584 start[1] = NXT(1);
10585 start[2] = NXT(2);
10586 start[3] = NXT(3);
10587 enc = xmlDetectCharEncoding(start, 4);
10588 if (enc != XML_CHAR_ENCODING_NONE) {
10589 xmlSwitchEncoding(ctxt, enc);
10590 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010591 }
10592
Owen Taylor3473f882001-02-23 17:55:21 +000010593 /*
10594 * Parse a possible text declaration first
10595 */
William M. Brack76e95df2003-10-18 16:20:14 +000010596 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010597 xmlParseTextDecl(ctxt);
10598 }
10599
10600 /*
10601 * Doing validity checking on chunk doesn't make sense
10602 */
10603 ctxt->instate = XML_PARSER_CONTENT;
10604 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010605 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010606 ctxt->loadsubset = ctx->loadsubset;
10607 ctxt->depth = ctx->depth + 1;
10608 ctxt->replaceEntities = ctx->replaceEntities;
10609 if (ctxt->validate) {
10610 ctxt->vctxt.error = ctx->vctxt.error;
10611 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010612 } else {
10613 ctxt->vctxt.error = NULL;
10614 ctxt->vctxt.warning = NULL;
10615 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010616 ctxt->vctxt.nodeTab = NULL;
10617 ctxt->vctxt.nodeNr = 0;
10618 ctxt->vctxt.nodeMax = 0;
10619 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010620
10621 xmlParseContent(ctxt);
10622
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010623 ctx->validate = ctxt->validate;
10624 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010625 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010626 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010628 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010629 }
10630 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010631 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010632 }
10633
10634 if (!ctxt->wellFormed) {
10635 if (ctxt->errNo == 0)
10636 ret = 1;
10637 else
10638 ret = ctxt->errNo;
10639 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010640 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010641 xmlNodePtr cur;
10642
10643 /*
10644 * Return the newly created nodeset after unlinking it from
10645 * they pseudo parent.
10646 */
10647 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010648 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010649 while (cur != NULL) {
10650 cur->parent = NULL;
10651 cur = cur->next;
10652 }
10653 newDoc->children->children = NULL;
10654 }
10655 ret = 0;
10656 }
10657 ctxt->sax = oldsax;
10658 xmlFreeParserCtxt(ctxt);
10659 newDoc->intSubset = NULL;
10660 newDoc->extSubset = NULL;
10661 xmlFreeDoc(newDoc);
10662
10663 return(ret);
10664}
10665
10666/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010667 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010668 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010669 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010670 * @sax: the SAX handler bloc (possibly NULL)
10671 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10672 * @depth: Used for loop detection, use 0
10673 * @URL: the URL for the entity to load
10674 * @ID: the System ID for the entity to load
10675 * @list: the return value for the set of parsed nodes
10676 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010677 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010678 *
10679 * Returns 0 if the entity is well formed, -1 in case of args problem and
10680 * the parser error code otherwise
10681 */
10682
Daniel Veillard7d515752003-09-26 19:12:37 +000010683static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010684xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10685 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010686 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010687 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010688 xmlParserCtxtPtr ctxt;
10689 xmlDocPtr newDoc;
10690 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010691 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010692 xmlChar start[4];
10693 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010694
10695 if (depth > 40) {
10696 return(XML_ERR_ENTITY_LOOP);
10697 }
10698
10699
10700
10701 if (list != NULL)
10702 *list = NULL;
10703 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010704 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010705 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010706 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010707
10708
10709 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010710 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010711 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010712 if (oldctxt != NULL) {
10713 ctxt->_private = oldctxt->_private;
10714 ctxt->loadsubset = oldctxt->loadsubset;
10715 ctxt->validate = oldctxt->validate;
10716 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010717 ctxt->record_info = oldctxt->record_info;
10718 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10719 ctxt->node_seq.length = oldctxt->node_seq.length;
10720 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010721 } else {
10722 /*
10723 * Doing validity checking on chunk without context
10724 * doesn't make sense
10725 */
10726 ctxt->_private = NULL;
10727 ctxt->validate = 0;
10728 ctxt->external = 2;
10729 ctxt->loadsubset = 0;
10730 }
Owen Taylor3473f882001-02-23 17:55:21 +000010731 if (sax != NULL) {
10732 oldsax = ctxt->sax;
10733 ctxt->sax = sax;
10734 if (user_data != NULL)
10735 ctxt->userData = user_data;
10736 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010737 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010738 newDoc = xmlNewDoc(BAD_CAST "1.0");
10739 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010740 ctxt->node_seq.maximum = 0;
10741 ctxt->node_seq.length = 0;
10742 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010743 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010744 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010745 }
10746 if (doc != NULL) {
10747 newDoc->intSubset = doc->intSubset;
10748 newDoc->extSubset = doc->extSubset;
10749 }
10750 if (doc->URL != NULL) {
10751 newDoc->URL = xmlStrdup(doc->URL);
10752 }
10753 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10754 if (newDoc->children == NULL) {
10755 if (sax != NULL)
10756 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010757 ctxt->node_seq.maximum = 0;
10758 ctxt->node_seq.length = 0;
10759 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010760 xmlFreeParserCtxt(ctxt);
10761 newDoc->intSubset = NULL;
10762 newDoc->extSubset = NULL;
10763 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010764 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010765 }
10766 nodePush(ctxt, newDoc->children);
10767 if (doc == NULL) {
10768 ctxt->myDoc = newDoc;
10769 } else {
10770 ctxt->myDoc = doc;
10771 newDoc->children->doc = doc;
10772 }
10773
Daniel Veillard87a764e2001-06-20 17:41:10 +000010774 /*
10775 * Get the 4 first bytes and decode the charset
10776 * if enc != XML_CHAR_ENCODING_NONE
10777 * plug some encoding conversion routines.
10778 */
10779 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010780 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10781 start[0] = RAW;
10782 start[1] = NXT(1);
10783 start[2] = NXT(2);
10784 start[3] = NXT(3);
10785 enc = xmlDetectCharEncoding(start, 4);
10786 if (enc != XML_CHAR_ENCODING_NONE) {
10787 xmlSwitchEncoding(ctxt, enc);
10788 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010789 }
10790
Owen Taylor3473f882001-02-23 17:55:21 +000010791 /*
10792 * Parse a possible text declaration first
10793 */
William M. Brack76e95df2003-10-18 16:20:14 +000010794 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010795 xmlParseTextDecl(ctxt);
10796 }
10797
Owen Taylor3473f882001-02-23 17:55:21 +000010798 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010799 ctxt->depth = depth;
10800
10801 xmlParseContent(ctxt);
10802
Daniel Veillard561b7f82002-03-20 21:55:57 +000010803 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010804 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010805 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010806 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010807 }
10808 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010809 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010810 }
10811
10812 if (!ctxt->wellFormed) {
10813 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010814 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010815 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010816 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010817 } else {
10818 if (list != NULL) {
10819 xmlNodePtr cur;
10820
10821 /*
10822 * Return the newly created nodeset after unlinking it from
10823 * they pseudo parent.
10824 */
10825 cur = newDoc->children->children;
10826 *list = cur;
10827 while (cur != NULL) {
10828 cur->parent = NULL;
10829 cur = cur->next;
10830 }
10831 newDoc->children->children = NULL;
10832 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010833 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010834 }
10835 if (sax != NULL)
10836 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010837 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10838 oldctxt->node_seq.length = ctxt->node_seq.length;
10839 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010840 ctxt->node_seq.maximum = 0;
10841 ctxt->node_seq.length = 0;
10842 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010843 xmlFreeParserCtxt(ctxt);
10844 newDoc->intSubset = NULL;
10845 newDoc->extSubset = NULL;
10846 xmlFreeDoc(newDoc);
10847
10848 return(ret);
10849}
10850
Daniel Veillard81273902003-09-30 00:43:48 +000010851#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010852/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010853 * xmlParseExternalEntity:
10854 * @doc: the document the chunk pertains to
10855 * @sax: the SAX handler bloc (possibly NULL)
10856 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10857 * @depth: Used for loop detection, use 0
10858 * @URL: the URL for the entity to load
10859 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010860 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010861 *
10862 * Parse an external general entity
10863 * An external general parsed entity is well-formed if it matches the
10864 * production labeled extParsedEnt.
10865 *
10866 * [78] extParsedEnt ::= TextDecl? content
10867 *
10868 * Returns 0 if the entity is well formed, -1 in case of args problem and
10869 * the parser error code otherwise
10870 */
10871
10872int
10873xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010874 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010875 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010876 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010877}
10878
10879/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010880 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010881 * @doc: the document the chunk pertains to
10882 * @sax: the SAX handler bloc (possibly NULL)
10883 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10884 * @depth: Used for loop detection, use 0
10885 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010886 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010887 *
10888 * Parse a well-balanced chunk of an XML document
10889 * called by the parser
10890 * The allowed sequence for the Well Balanced Chunk is the one defined by
10891 * the content production in the XML grammar:
10892 *
10893 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10894 *
10895 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10896 * the parser error code otherwise
10897 */
10898
10899int
10900xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010901 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010902 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10903 depth, string, lst, 0 );
10904}
Daniel Veillard81273902003-09-30 00:43:48 +000010905#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010906
10907/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010908 * xmlParseBalancedChunkMemoryInternal:
10909 * @oldctxt: the existing parsing context
10910 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10911 * @user_data: the user data field for the parser context
10912 * @lst: the return value for the set of parsed nodes
10913 *
10914 *
10915 * Parse a well-balanced chunk of an XML document
10916 * called by the parser
10917 * The allowed sequence for the Well Balanced Chunk is the one defined by
10918 * the content production in the XML grammar:
10919 *
10920 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10921 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010922 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10923 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010924 *
10925 * In case recover is set to 1, the nodelist will not be empty even if
10926 * the parsed chunk is not well balanced.
10927 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010928static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010929xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10930 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10931 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010932 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010933 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010934 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010935 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010936 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010937
10938 if (oldctxt->depth > 40) {
10939 return(XML_ERR_ENTITY_LOOP);
10940 }
10941
10942
10943 if (lst != NULL)
10944 *lst = NULL;
10945 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010946 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010947
10948 size = xmlStrlen(string);
10949
10950 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010951 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010952 if (user_data != NULL)
10953 ctxt->userData = user_data;
10954 else
10955 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010956 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10957 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010958
10959 oldsax = ctxt->sax;
10960 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010961 xmlDetectSAX2(ctxt);
10962
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010963 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010964 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010965 newDoc = xmlNewDoc(BAD_CAST "1.0");
10966 if (newDoc == NULL) {
10967 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010968 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010969 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010970 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010971 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010972 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010973 } else {
10974 ctxt->myDoc = oldctxt->myDoc;
10975 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010976 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010977 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010978 BAD_CAST "pseudoroot", NULL);
10979 if (ctxt->myDoc->children == NULL) {
10980 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010981 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010982 xmlFreeParserCtxt(ctxt);
10983 if (newDoc != NULL)
10984 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010985 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010986 }
10987 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010988 ctxt->instate = XML_PARSER_CONTENT;
10989 ctxt->depth = oldctxt->depth + 1;
10990
Daniel Veillard328f48c2002-11-15 15:24:34 +000010991 ctxt->validate = 0;
10992 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010993 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10994 /*
10995 * ID/IDREF registration will be done in xmlValidateElement below
10996 */
10997 ctxt->loadsubset |= XML_SKIP_IDS;
10998 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010999 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011000
Daniel Veillard68e9e742002-11-16 15:35:11 +000011001 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011002 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011003 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011004 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011005 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011006 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011007 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011008 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011009 }
11010
11011 if (!ctxt->wellFormed) {
11012 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011013 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011014 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011015 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011016 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011017 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011018 }
11019
William M. Brack7b9154b2003-09-27 19:23:50 +000011020 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011021 xmlNodePtr cur;
11022
11023 /*
11024 * Return the newly created nodeset after unlinking it from
11025 * they pseudo parent.
11026 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011027 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011028 *lst = cur;
11029 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011030#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011031 if (oldctxt->validate && oldctxt->wellFormed &&
11032 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11033 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11034 oldctxt->myDoc, cur);
11035 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011036#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011037 cur->parent = NULL;
11038 cur = cur->next;
11039 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011040 ctxt->myDoc->children->children = NULL;
11041 }
11042 if (ctxt->myDoc != NULL) {
11043 xmlFreeNode(ctxt->myDoc->children);
11044 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011045 }
11046
11047 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011048 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011049 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011050 if (newDoc != NULL)
11051 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011052
11053 return(ret);
11054}
11055
Daniel Veillard81273902003-09-30 00:43:48 +000011056#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011057/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011058 * xmlParseBalancedChunkMemoryRecover:
11059 * @doc: the document the chunk pertains to
11060 * @sax: the SAX handler bloc (possibly NULL)
11061 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11062 * @depth: Used for loop detection, use 0
11063 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11064 * @lst: the return value for the set of parsed nodes
11065 * @recover: return nodes even if the data is broken (use 0)
11066 *
11067 *
11068 * Parse a well-balanced chunk of an XML document
11069 * called by the parser
11070 * The allowed sequence for the Well Balanced Chunk is the one defined by
11071 * the content production in the XML grammar:
11072 *
11073 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11074 *
11075 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11076 * the parser error code otherwise
11077 *
11078 * In case recover is set to 1, the nodelist will not be empty even if
11079 * the parsed chunk is not well balanced.
11080 */
11081int
11082xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11083 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11084 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011085 xmlParserCtxtPtr ctxt;
11086 xmlDocPtr newDoc;
11087 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011088 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011089 int size;
11090 int ret = 0;
11091
11092 if (depth > 40) {
11093 return(XML_ERR_ENTITY_LOOP);
11094 }
11095
11096
Daniel Veillardcda96922001-08-21 10:56:31 +000011097 if (lst != NULL)
11098 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011099 if (string == NULL)
11100 return(-1);
11101
11102 size = xmlStrlen(string);
11103
11104 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11105 if (ctxt == NULL) return(-1);
11106 ctxt->userData = ctxt;
11107 if (sax != NULL) {
11108 oldsax = ctxt->sax;
11109 ctxt->sax = sax;
11110 if (user_data != NULL)
11111 ctxt->userData = user_data;
11112 }
11113 newDoc = xmlNewDoc(BAD_CAST "1.0");
11114 if (newDoc == NULL) {
11115 xmlFreeParserCtxt(ctxt);
11116 return(-1);
11117 }
11118 if (doc != NULL) {
11119 newDoc->intSubset = doc->intSubset;
11120 newDoc->extSubset = doc->extSubset;
11121 }
11122 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11123 if (newDoc->children == NULL) {
11124 if (sax != NULL)
11125 ctxt->sax = oldsax;
11126 xmlFreeParserCtxt(ctxt);
11127 newDoc->intSubset = NULL;
11128 newDoc->extSubset = NULL;
11129 xmlFreeDoc(newDoc);
11130 return(-1);
11131 }
11132 nodePush(ctxt, newDoc->children);
11133 if (doc == NULL) {
11134 ctxt->myDoc = newDoc;
11135 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011136 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011137 newDoc->children->doc = doc;
11138 }
11139 ctxt->instate = XML_PARSER_CONTENT;
11140 ctxt->depth = depth;
11141
11142 /*
11143 * Doing validity checking on chunk doesn't make sense
11144 */
11145 ctxt->validate = 0;
11146 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011147 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011148
Daniel Veillardb39bc392002-10-26 19:29:51 +000011149 if ( doc != NULL ){
11150 content = doc->children;
11151 doc->children = NULL;
11152 xmlParseContent(ctxt);
11153 doc->children = content;
11154 }
11155 else {
11156 xmlParseContent(ctxt);
11157 }
Owen Taylor3473f882001-02-23 17:55:21 +000011158 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011159 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011160 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011161 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011162 }
11163 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011164 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011165 }
11166
11167 if (!ctxt->wellFormed) {
11168 if (ctxt->errNo == 0)
11169 ret = 1;
11170 else
11171 ret = ctxt->errNo;
11172 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011173 ret = 0;
11174 }
11175
11176 if (lst != NULL && (ret == 0 || recover == 1)) {
11177 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011178
11179 /*
11180 * Return the newly created nodeset after unlinking it from
11181 * they pseudo parent.
11182 */
11183 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011184 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011185 while (cur != NULL) {
11186 cur->parent = NULL;
11187 cur = cur->next;
11188 }
11189 newDoc->children->children = NULL;
11190 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011191
Owen Taylor3473f882001-02-23 17:55:21 +000011192 if (sax != NULL)
11193 ctxt->sax = oldsax;
11194 xmlFreeParserCtxt(ctxt);
11195 newDoc->intSubset = NULL;
11196 newDoc->extSubset = NULL;
11197 xmlFreeDoc(newDoc);
11198
11199 return(ret);
11200}
11201
11202/**
11203 * xmlSAXParseEntity:
11204 * @sax: the SAX handler block
11205 * @filename: the filename
11206 *
11207 * parse an XML external entity out of context and build a tree.
11208 * It use the given SAX function block to handle the parsing callback.
11209 * If sax is NULL, fallback to the default DOM tree building routines.
11210 *
11211 * [78] extParsedEnt ::= TextDecl? content
11212 *
11213 * This correspond to a "Well Balanced" chunk
11214 *
11215 * Returns the resulting document tree
11216 */
11217
11218xmlDocPtr
11219xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11220 xmlDocPtr ret;
11221 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011222
11223 ctxt = xmlCreateFileParserCtxt(filename);
11224 if (ctxt == NULL) {
11225 return(NULL);
11226 }
11227 if (sax != NULL) {
11228 if (ctxt->sax != NULL)
11229 xmlFree(ctxt->sax);
11230 ctxt->sax = sax;
11231 ctxt->userData = NULL;
11232 }
11233
Owen Taylor3473f882001-02-23 17:55:21 +000011234 xmlParseExtParsedEnt(ctxt);
11235
11236 if (ctxt->wellFormed)
11237 ret = ctxt->myDoc;
11238 else {
11239 ret = NULL;
11240 xmlFreeDoc(ctxt->myDoc);
11241 ctxt->myDoc = NULL;
11242 }
11243 if (sax != NULL)
11244 ctxt->sax = NULL;
11245 xmlFreeParserCtxt(ctxt);
11246
11247 return(ret);
11248}
11249
11250/**
11251 * xmlParseEntity:
11252 * @filename: the filename
11253 *
11254 * parse an XML external entity out of context and build a tree.
11255 *
11256 * [78] extParsedEnt ::= TextDecl? content
11257 *
11258 * This correspond to a "Well Balanced" chunk
11259 *
11260 * Returns the resulting document tree
11261 */
11262
11263xmlDocPtr
11264xmlParseEntity(const char *filename) {
11265 return(xmlSAXParseEntity(NULL, filename));
11266}
Daniel Veillard81273902003-09-30 00:43:48 +000011267#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011268
11269/**
11270 * xmlCreateEntityParserCtxt:
11271 * @URL: the entity URL
11272 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011273 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011274 *
11275 * Create a parser context for an external entity
11276 * Automatic support for ZLIB/Compress compressed document is provided
11277 * by default if found at compile-time.
11278 *
11279 * Returns the new parser context or NULL
11280 */
11281xmlParserCtxtPtr
11282xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11283 const xmlChar *base) {
11284 xmlParserCtxtPtr ctxt;
11285 xmlParserInputPtr inputStream;
11286 char *directory = NULL;
11287 xmlChar *uri;
11288
11289 ctxt = xmlNewParserCtxt();
11290 if (ctxt == NULL) {
11291 return(NULL);
11292 }
11293
11294 uri = xmlBuildURI(URL, base);
11295
11296 if (uri == NULL) {
11297 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11298 if (inputStream == NULL) {
11299 xmlFreeParserCtxt(ctxt);
11300 return(NULL);
11301 }
11302
11303 inputPush(ctxt, inputStream);
11304
11305 if ((ctxt->directory == NULL) && (directory == NULL))
11306 directory = xmlParserGetDirectory((char *)URL);
11307 if ((ctxt->directory == NULL) && (directory != NULL))
11308 ctxt->directory = directory;
11309 } else {
11310 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11311 if (inputStream == NULL) {
11312 xmlFree(uri);
11313 xmlFreeParserCtxt(ctxt);
11314 return(NULL);
11315 }
11316
11317 inputPush(ctxt, inputStream);
11318
11319 if ((ctxt->directory == NULL) && (directory == NULL))
11320 directory = xmlParserGetDirectory((char *)uri);
11321 if ((ctxt->directory == NULL) && (directory != NULL))
11322 ctxt->directory = directory;
11323 xmlFree(uri);
11324 }
Owen Taylor3473f882001-02-23 17:55:21 +000011325 return(ctxt);
11326}
11327
11328/************************************************************************
11329 * *
11330 * Front ends when parsing from a file *
11331 * *
11332 ************************************************************************/
11333
11334/**
11335 * xmlCreateFileParserCtxt:
11336 * @filename: the filename
11337 *
11338 * Create a parser context for a file content.
11339 * Automatic support for ZLIB/Compress compressed document is provided
11340 * by default if found at compile-time.
11341 *
11342 * Returns the new parser context or NULL
11343 */
11344xmlParserCtxtPtr
11345xmlCreateFileParserCtxt(const char *filename)
11346{
11347 xmlParserCtxtPtr ctxt;
11348 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011349 char *directory = NULL;
11350
Owen Taylor3473f882001-02-23 17:55:21 +000011351 ctxt = xmlNewParserCtxt();
11352 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011353 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011354 return(NULL);
11355 }
11356
Igor Zlatkovicce076162003-02-23 13:39:39 +000011357
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011358 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011359 if (inputStream == NULL) {
11360 xmlFreeParserCtxt(ctxt);
11361 return(NULL);
11362 }
11363
Owen Taylor3473f882001-02-23 17:55:21 +000011364 inputPush(ctxt, inputStream);
11365 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011366 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011367 if ((ctxt->directory == NULL) && (directory != NULL))
11368 ctxt->directory = directory;
11369
11370 return(ctxt);
11371}
11372
Daniel Veillard81273902003-09-30 00:43:48 +000011373#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011374/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011375 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011376 * @sax: the SAX handler block
11377 * @filename: the filename
11378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11379 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011380 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011381 *
11382 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11383 * compressed document is provided by default if found at compile-time.
11384 * It use the given SAX function block to handle the parsing callback.
11385 * If sax is NULL, fallback to the default DOM tree building routines.
11386 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011387 * User data (void *) is stored within the parser context in the
11388 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011389 *
Owen Taylor3473f882001-02-23 17:55:21 +000011390 * Returns the resulting document tree
11391 */
11392
11393xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011394xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11395 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011396 xmlDocPtr ret;
11397 xmlParserCtxtPtr ctxt;
11398 char *directory = NULL;
11399
Daniel Veillard635ef722001-10-29 11:48:19 +000011400 xmlInitParser();
11401
Owen Taylor3473f882001-02-23 17:55:21 +000011402 ctxt = xmlCreateFileParserCtxt(filename);
11403 if (ctxt == NULL) {
11404 return(NULL);
11405 }
11406 if (sax != NULL) {
11407 if (ctxt->sax != NULL)
11408 xmlFree(ctxt->sax);
11409 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011410 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011411 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011412 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011413 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011414 }
Owen Taylor3473f882001-02-23 17:55:21 +000011415
11416 if ((ctxt->directory == NULL) && (directory == NULL))
11417 directory = xmlParserGetDirectory(filename);
11418 if ((ctxt->directory == NULL) && (directory != NULL))
11419 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11420
Daniel Veillarddad3f682002-11-17 16:47:27 +000011421 ctxt->recovery = recovery;
11422
Owen Taylor3473f882001-02-23 17:55:21 +000011423 xmlParseDocument(ctxt);
11424
William M. Brackc07329e2003-09-08 01:57:30 +000011425 if ((ctxt->wellFormed) || recovery) {
11426 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011427 if (ret != NULL) {
11428 if (ctxt->input->buf->compressed > 0)
11429 ret->compression = 9;
11430 else
11431 ret->compression = ctxt->input->buf->compressed;
11432 }
William M. Brackc07329e2003-09-08 01:57:30 +000011433 }
Owen Taylor3473f882001-02-23 17:55:21 +000011434 else {
11435 ret = NULL;
11436 xmlFreeDoc(ctxt->myDoc);
11437 ctxt->myDoc = NULL;
11438 }
11439 if (sax != NULL)
11440 ctxt->sax = NULL;
11441 xmlFreeParserCtxt(ctxt);
11442
11443 return(ret);
11444}
11445
11446/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011447 * xmlSAXParseFile:
11448 * @sax: the SAX handler block
11449 * @filename: the filename
11450 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11451 * documents
11452 *
11453 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11454 * compressed document is provided by default if found at compile-time.
11455 * It use the given SAX function block to handle the parsing callback.
11456 * If sax is NULL, fallback to the default DOM tree building routines.
11457 *
11458 * Returns the resulting document tree
11459 */
11460
11461xmlDocPtr
11462xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11463 int recovery) {
11464 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11465}
11466
11467/**
Owen Taylor3473f882001-02-23 17:55:21 +000011468 * xmlRecoverDoc:
11469 * @cur: a pointer to an array of xmlChar
11470 *
11471 * parse an XML in-memory document and build a tree.
11472 * In the case the document is not Well Formed, a tree is built anyway
11473 *
11474 * Returns the resulting document tree
11475 */
11476
11477xmlDocPtr
11478xmlRecoverDoc(xmlChar *cur) {
11479 return(xmlSAXParseDoc(NULL, cur, 1));
11480}
11481
11482/**
11483 * xmlParseFile:
11484 * @filename: the filename
11485 *
11486 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11487 * compressed document is provided by default if found at compile-time.
11488 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011489 * Returns the resulting document tree if the file was wellformed,
11490 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011491 */
11492
11493xmlDocPtr
11494xmlParseFile(const char *filename) {
11495 return(xmlSAXParseFile(NULL, filename, 0));
11496}
11497
11498/**
11499 * xmlRecoverFile:
11500 * @filename: the filename
11501 *
11502 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11503 * compressed document is provided by default if found at compile-time.
11504 * In the case the document is not Well Formed, a tree is built anyway
11505 *
11506 * Returns the resulting document tree
11507 */
11508
11509xmlDocPtr
11510xmlRecoverFile(const char *filename) {
11511 return(xmlSAXParseFile(NULL, filename, 1));
11512}
11513
11514
11515/**
11516 * xmlSetupParserForBuffer:
11517 * @ctxt: an XML parser context
11518 * @buffer: a xmlChar * buffer
11519 * @filename: a file name
11520 *
11521 * Setup the parser context to parse a new buffer; Clears any prior
11522 * contents from the parser context. The buffer parameter must not be
11523 * NULL, but the filename parameter can be
11524 */
11525void
11526xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11527 const char* filename)
11528{
11529 xmlParserInputPtr input;
11530
11531 input = xmlNewInputStream(ctxt);
11532 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011533 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011534 xmlFree(ctxt);
11535 return;
11536 }
11537
11538 xmlClearParserCtxt(ctxt);
11539 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011540 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011541 input->base = buffer;
11542 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011543 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011544 inputPush(ctxt, input);
11545}
11546
11547/**
11548 * xmlSAXUserParseFile:
11549 * @sax: a SAX handler
11550 * @user_data: The user data returned on SAX callbacks
11551 * @filename: a file name
11552 *
11553 * parse an XML file and call the given SAX handler routines.
11554 * Automatic support for ZLIB/Compress compressed document is provided
11555 *
11556 * Returns 0 in case of success or a error number otherwise
11557 */
11558int
11559xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11560 const char *filename) {
11561 int ret = 0;
11562 xmlParserCtxtPtr ctxt;
11563
11564 ctxt = xmlCreateFileParserCtxt(filename);
11565 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011566#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011567 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011568#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011569 xmlFree(ctxt->sax);
11570 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011571 xmlDetectSAX2(ctxt);
11572
Owen Taylor3473f882001-02-23 17:55:21 +000011573 if (user_data != NULL)
11574 ctxt->userData = user_data;
11575
11576 xmlParseDocument(ctxt);
11577
11578 if (ctxt->wellFormed)
11579 ret = 0;
11580 else {
11581 if (ctxt->errNo != 0)
11582 ret = ctxt->errNo;
11583 else
11584 ret = -1;
11585 }
11586 if (sax != NULL)
11587 ctxt->sax = NULL;
11588 xmlFreeParserCtxt(ctxt);
11589
11590 return ret;
11591}
Daniel Veillard81273902003-09-30 00:43:48 +000011592#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011593
11594/************************************************************************
11595 * *
11596 * Front ends when parsing from memory *
11597 * *
11598 ************************************************************************/
11599
11600/**
11601 * xmlCreateMemoryParserCtxt:
11602 * @buffer: a pointer to a char array
11603 * @size: the size of the array
11604 *
11605 * Create a parser context for an XML in-memory document.
11606 *
11607 * Returns the new parser context or NULL
11608 */
11609xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011610xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011611 xmlParserCtxtPtr ctxt;
11612 xmlParserInputPtr input;
11613 xmlParserInputBufferPtr buf;
11614
11615 if (buffer == NULL)
11616 return(NULL);
11617 if (size <= 0)
11618 return(NULL);
11619
11620 ctxt = xmlNewParserCtxt();
11621 if (ctxt == NULL)
11622 return(NULL);
11623
Daniel Veillard53350552003-09-18 13:35:51 +000011624 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011625 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011626 if (buf == NULL) {
11627 xmlFreeParserCtxt(ctxt);
11628 return(NULL);
11629 }
Owen Taylor3473f882001-02-23 17:55:21 +000011630
11631 input = xmlNewInputStream(ctxt);
11632 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011633 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011634 xmlFreeParserCtxt(ctxt);
11635 return(NULL);
11636 }
11637
11638 input->filename = NULL;
11639 input->buf = buf;
11640 input->base = input->buf->buffer->content;
11641 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011642 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011643
11644 inputPush(ctxt, input);
11645 return(ctxt);
11646}
11647
Daniel Veillard81273902003-09-30 00:43:48 +000011648#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011649/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011650 * xmlSAXParseMemoryWithData:
11651 * @sax: the SAX handler block
11652 * @buffer: an pointer to a char array
11653 * @size: the size of the array
11654 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11655 * documents
11656 * @data: the userdata
11657 *
11658 * parse an XML in-memory block and use the given SAX function block
11659 * to handle the parsing callback. If sax is NULL, fallback to the default
11660 * DOM tree building routines.
11661 *
11662 * User data (void *) is stored within the parser context in the
11663 * context's _private member, so it is available nearly everywhere in libxml
11664 *
11665 * Returns the resulting document tree
11666 */
11667
11668xmlDocPtr
11669xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11670 int size, int recovery, void *data) {
11671 xmlDocPtr ret;
11672 xmlParserCtxtPtr ctxt;
11673
11674 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11675 if (ctxt == NULL) return(NULL);
11676 if (sax != NULL) {
11677 if (ctxt->sax != NULL)
11678 xmlFree(ctxt->sax);
11679 ctxt->sax = sax;
11680 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011681 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011682 if (data!=NULL) {
11683 ctxt->_private=data;
11684 }
11685
Daniel Veillardadba5f12003-04-04 16:09:01 +000011686 ctxt->recovery = recovery;
11687
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011688 xmlParseDocument(ctxt);
11689
11690 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11691 else {
11692 ret = NULL;
11693 xmlFreeDoc(ctxt->myDoc);
11694 ctxt->myDoc = NULL;
11695 }
11696 if (sax != NULL)
11697 ctxt->sax = NULL;
11698 xmlFreeParserCtxt(ctxt);
11699
11700 return(ret);
11701}
11702
11703/**
Owen Taylor3473f882001-02-23 17:55:21 +000011704 * xmlSAXParseMemory:
11705 * @sax: the SAX handler block
11706 * @buffer: an pointer to a char array
11707 * @size: the size of the array
11708 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11709 * documents
11710 *
11711 * parse an XML in-memory block and use the given SAX function block
11712 * to handle the parsing callback. If sax is NULL, fallback to the default
11713 * DOM tree building routines.
11714 *
11715 * Returns the resulting document tree
11716 */
11717xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011718xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11719 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011720 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011721}
11722
11723/**
11724 * xmlParseMemory:
11725 * @buffer: an pointer to a char array
11726 * @size: the size of the array
11727 *
11728 * parse an XML in-memory block and build a tree.
11729 *
11730 * Returns the resulting document tree
11731 */
11732
Daniel Veillard50822cb2001-07-26 20:05:51 +000011733xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011734 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11735}
11736
11737/**
11738 * xmlRecoverMemory:
11739 * @buffer: an pointer to a char array
11740 * @size: the size of the array
11741 *
11742 * parse an XML in-memory block and build a tree.
11743 * In the case the document is not Well Formed, a tree is built anyway
11744 *
11745 * Returns the resulting document tree
11746 */
11747
Daniel Veillard50822cb2001-07-26 20:05:51 +000011748xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011749 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11750}
11751
11752/**
11753 * xmlSAXUserParseMemory:
11754 * @sax: a SAX handler
11755 * @user_data: The user data returned on SAX callbacks
11756 * @buffer: an in-memory XML document input
11757 * @size: the length of the XML document in bytes
11758 *
11759 * A better SAX parsing routine.
11760 * parse an XML in-memory buffer and call the given SAX handler routines.
11761 *
11762 * Returns 0 in case of success or a error number otherwise
11763 */
11764int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011765 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011766 int ret = 0;
11767 xmlParserCtxtPtr ctxt;
11768 xmlSAXHandlerPtr oldsax = NULL;
11769
Daniel Veillard9e923512002-08-14 08:48:52 +000011770 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011771 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11772 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011773 oldsax = ctxt->sax;
11774 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011775 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011776 if (user_data != NULL)
11777 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011778
11779 xmlParseDocument(ctxt);
11780
11781 if (ctxt->wellFormed)
11782 ret = 0;
11783 else {
11784 if (ctxt->errNo != 0)
11785 ret = ctxt->errNo;
11786 else
11787 ret = -1;
11788 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011789 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011790 xmlFreeParserCtxt(ctxt);
11791
11792 return ret;
11793}
Daniel Veillard81273902003-09-30 00:43:48 +000011794#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011795
11796/**
11797 * xmlCreateDocParserCtxt:
11798 * @cur: a pointer to an array of xmlChar
11799 *
11800 * Creates a parser context for an XML in-memory document.
11801 *
11802 * Returns the new parser context or NULL
11803 */
11804xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011805xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011806 int len;
11807
11808 if (cur == NULL)
11809 return(NULL);
11810 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011811 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011812}
11813
Daniel Veillard81273902003-09-30 00:43:48 +000011814#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011815/**
11816 * xmlSAXParseDoc:
11817 * @sax: the SAX handler block
11818 * @cur: a pointer to an array of xmlChar
11819 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11820 * documents
11821 *
11822 * parse an XML in-memory document and build a tree.
11823 * It use the given SAX function block to handle the parsing callback.
11824 * If sax is NULL, fallback to the default DOM tree building routines.
11825 *
11826 * Returns the resulting document tree
11827 */
11828
11829xmlDocPtr
11830xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11831 xmlDocPtr ret;
11832 xmlParserCtxtPtr ctxt;
11833
11834 if (cur == NULL) return(NULL);
11835
11836
11837 ctxt = xmlCreateDocParserCtxt(cur);
11838 if (ctxt == NULL) return(NULL);
11839 if (sax != NULL) {
11840 ctxt->sax = sax;
11841 ctxt->userData = NULL;
11842 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011843 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011844
11845 xmlParseDocument(ctxt);
11846 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11847 else {
11848 ret = NULL;
11849 xmlFreeDoc(ctxt->myDoc);
11850 ctxt->myDoc = NULL;
11851 }
11852 if (sax != NULL)
11853 ctxt->sax = NULL;
11854 xmlFreeParserCtxt(ctxt);
11855
11856 return(ret);
11857}
11858
11859/**
11860 * xmlParseDoc:
11861 * @cur: a pointer to an array of xmlChar
11862 *
11863 * parse an XML in-memory document and build a tree.
11864 *
11865 * Returns the resulting document tree
11866 */
11867
11868xmlDocPtr
11869xmlParseDoc(xmlChar *cur) {
11870 return(xmlSAXParseDoc(NULL, cur, 0));
11871}
Daniel Veillard81273902003-09-30 00:43:48 +000011872#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011873
Daniel Veillard81273902003-09-30 00:43:48 +000011874#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011875/************************************************************************
11876 * *
11877 * Specific function to keep track of entities references *
11878 * and used by the XSLT debugger *
11879 * *
11880 ************************************************************************/
11881
11882static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11883
11884/**
11885 * xmlAddEntityReference:
11886 * @ent : A valid entity
11887 * @firstNode : A valid first node for children of entity
11888 * @lastNode : A valid last node of children entity
11889 *
11890 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11891 */
11892static void
11893xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11894 xmlNodePtr lastNode)
11895{
11896 if (xmlEntityRefFunc != NULL) {
11897 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11898 }
11899}
11900
11901
11902/**
11903 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011904 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011905 *
11906 * Set the function to call call back when a xml reference has been made
11907 */
11908void
11909xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11910{
11911 xmlEntityRefFunc = func;
11912}
Daniel Veillard81273902003-09-30 00:43:48 +000011913#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011914
11915/************************************************************************
11916 * *
11917 * Miscellaneous *
11918 * *
11919 ************************************************************************/
11920
11921#ifdef LIBXML_XPATH_ENABLED
11922#include <libxml/xpath.h>
11923#endif
11924
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011925extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011926static int xmlParserInitialized = 0;
11927
11928/**
11929 * xmlInitParser:
11930 *
11931 * Initialization function for the XML parser.
11932 * This is not reentrant. Call once before processing in case of
11933 * use in multithreaded programs.
11934 */
11935
11936void
11937xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011938 if (xmlParserInitialized != 0)
11939 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011940
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011941 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11942 (xmlGenericError == NULL))
11943 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011944 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011945 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011946 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011947 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011948 xmlDefaultSAXHandlerInit();
11949 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011950#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011951 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011952#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011953#ifdef LIBXML_HTML_ENABLED
11954 htmlInitAutoClose();
11955 htmlDefaultSAXHandlerInit();
11956#endif
11957#ifdef LIBXML_XPATH_ENABLED
11958 xmlXPathInit();
11959#endif
11960 xmlParserInitialized = 1;
11961}
11962
11963/**
11964 * xmlCleanupParser:
11965 *
11966 * Cleanup function for the XML parser. It tries to reclaim all
11967 * parsing related global memory allocated for the parser processing.
11968 * It doesn't deallocate any document related memory. Calling this
11969 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011970 * One should call xmlCleanupParser() only when the process has
11971 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011972 */
11973
11974void
11975xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011976 if (!xmlParserInitialized)
11977 return;
11978
Owen Taylor3473f882001-02-23 17:55:21 +000011979 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011980#ifdef LIBXML_CATALOG_ENABLED
11981 xmlCatalogCleanup();
11982#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011983 xmlCleanupInputCallbacks();
11984#ifdef LIBXML_OUTPUT_ENABLED
11985 xmlCleanupOutputCallbacks();
11986#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011987 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011988 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011989 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000011990 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011991}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011992
11993/************************************************************************
11994 * *
11995 * New set (2.6.0) of simpler and more flexible APIs *
11996 * *
11997 ************************************************************************/
11998
11999/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012000 * DICT_FREE:
12001 * @str: a string
12002 *
12003 * Free a string if it is not owned by the "dict" dictionnary in the
12004 * current scope
12005 */
12006#define DICT_FREE(str) \
12007 if ((str) && ((!dict) || \
12008 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12009 xmlFree((char *)(str));
12010
12011/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012012 * xmlCtxtReset:
12013 * @ctxt: an XML parser context
12014 *
12015 * Reset a parser context
12016 */
12017void
12018xmlCtxtReset(xmlParserCtxtPtr ctxt)
12019{
12020 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012021 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012022
12023 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12024 xmlFreeInputStream(input);
12025 }
12026 ctxt->inputNr = 0;
12027 ctxt->input = NULL;
12028
12029 ctxt->spaceNr = 0;
12030 ctxt->spaceTab[0] = -1;
12031 ctxt->space = &ctxt->spaceTab[0];
12032
12033
12034 ctxt->nodeNr = 0;
12035 ctxt->node = NULL;
12036
12037 ctxt->nameNr = 0;
12038 ctxt->name = NULL;
12039
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012040 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012041 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012042 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012043 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012044 DICT_FREE(ctxt->directory);
12045 ctxt->directory = NULL;
12046 DICT_FREE(ctxt->extSubURI);
12047 ctxt->extSubURI = NULL;
12048 DICT_FREE(ctxt->extSubSystem);
12049 ctxt->extSubSystem = NULL;
12050 if (ctxt->myDoc != NULL)
12051 xmlFreeDoc(ctxt->myDoc);
12052 ctxt->myDoc = NULL;
12053
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012054 ctxt->standalone = -1;
12055 ctxt->hasExternalSubset = 0;
12056 ctxt->hasPErefs = 0;
12057 ctxt->html = 0;
12058 ctxt->external = 0;
12059 ctxt->instate = XML_PARSER_START;
12060 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012061
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012062 ctxt->wellFormed = 1;
12063 ctxt->nsWellFormed = 1;
12064 ctxt->valid = 1;
12065 ctxt->vctxt.userData = ctxt;
12066 ctxt->vctxt.error = xmlParserValidityError;
12067 ctxt->vctxt.warning = xmlParserValidityWarning;
12068 ctxt->record_info = 0;
12069 ctxt->nbChars = 0;
12070 ctxt->checkIndex = 0;
12071 ctxt->inSubset = 0;
12072 ctxt->errNo = XML_ERR_OK;
12073 ctxt->depth = 0;
12074 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12075 ctxt->catalogs = NULL;
12076 xmlInitNodeInfoSeq(&ctxt->node_seq);
12077
12078 if (ctxt->attsDefault != NULL) {
12079 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12080 ctxt->attsDefault = NULL;
12081 }
12082 if (ctxt->attsSpecial != NULL) {
12083 xmlHashFree(ctxt->attsSpecial, NULL);
12084 ctxt->attsSpecial = NULL;
12085 }
12086
Daniel Veillard4432df22003-09-28 18:58:27 +000012087#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012088 if (ctxt->catalogs != NULL)
12089 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012090#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012091}
12092
12093/**
12094 * xmlCtxtUseOptions:
12095 * @ctxt: an XML parser context
12096 * @options: a combination of xmlParserOption(s)
12097 *
12098 * Applies the options to the parser context
12099 *
12100 * Returns 0 in case of success, the set of unknown or unimplemented options
12101 * in case of error.
12102 */
12103int
12104xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12105{
12106 if (options & XML_PARSE_RECOVER) {
12107 ctxt->recovery = 1;
12108 options -= XML_PARSE_RECOVER;
12109 } else
12110 ctxt->recovery = 0;
12111 if (options & XML_PARSE_DTDLOAD) {
12112 ctxt->loadsubset = XML_DETECT_IDS;
12113 options -= XML_PARSE_DTDLOAD;
12114 } else
12115 ctxt->loadsubset = 0;
12116 if (options & XML_PARSE_DTDATTR) {
12117 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12118 options -= XML_PARSE_DTDATTR;
12119 }
12120 if (options & XML_PARSE_NOENT) {
12121 ctxt->replaceEntities = 1;
12122 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12123 options -= XML_PARSE_NOENT;
12124 } else
12125 ctxt->replaceEntities = 0;
12126 if (options & XML_PARSE_NOWARNING) {
12127 ctxt->sax->warning = NULL;
12128 options -= XML_PARSE_NOWARNING;
12129 }
12130 if (options & XML_PARSE_NOERROR) {
12131 ctxt->sax->error = NULL;
12132 ctxt->sax->fatalError = NULL;
12133 options -= XML_PARSE_NOERROR;
12134 }
12135 if (options & XML_PARSE_PEDANTIC) {
12136 ctxt->pedantic = 1;
12137 options -= XML_PARSE_PEDANTIC;
12138 } else
12139 ctxt->pedantic = 0;
12140 if (options & XML_PARSE_NOBLANKS) {
12141 ctxt->keepBlanks = 0;
12142 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12143 options -= XML_PARSE_NOBLANKS;
12144 } else
12145 ctxt->keepBlanks = 1;
12146 if (options & XML_PARSE_DTDVALID) {
12147 ctxt->validate = 1;
12148 if (options & XML_PARSE_NOWARNING)
12149 ctxt->vctxt.warning = NULL;
12150 if (options & XML_PARSE_NOERROR)
12151 ctxt->vctxt.error = NULL;
12152 options -= XML_PARSE_DTDVALID;
12153 } else
12154 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012155#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012156 if (options & XML_PARSE_SAX1) {
12157 ctxt->sax->startElement = xmlSAX2StartElement;
12158 ctxt->sax->endElement = xmlSAX2EndElement;
12159 ctxt->sax->startElementNs = NULL;
12160 ctxt->sax->endElementNs = NULL;
12161 ctxt->sax->initialized = 1;
12162 options -= XML_PARSE_SAX1;
12163 }
Daniel Veillard81273902003-09-30 00:43:48 +000012164#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012165 if (options & XML_PARSE_NODICT) {
12166 ctxt->dictNames = 0;
12167 options -= XML_PARSE_NODICT;
12168 } else {
12169 ctxt->dictNames = 1;
12170 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012171 if (options & XML_PARSE_NOCDATA) {
12172 ctxt->sax->cdataBlock = NULL;
12173 options -= XML_PARSE_NOCDATA;
12174 }
12175 if (options & XML_PARSE_NSCLEAN) {
12176 ctxt->options |= XML_PARSE_NSCLEAN;
12177 options -= XML_PARSE_NSCLEAN;
12178 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012179 return (options);
12180}
12181
12182/**
12183 * xmlDoRead:
12184 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012185 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012186 * @encoding: the document encoding, or NULL
12187 * @options: a combination of xmlParserOption(s)
12188 * @reuse: keep the context for reuse
12189 *
12190 * Common front-end for the xmlRead functions
12191 *
12192 * Returns the resulting document tree or NULL
12193 */
12194static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012195xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12196 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012197{
12198 xmlDocPtr ret;
12199
12200 xmlCtxtUseOptions(ctxt, options);
12201 if (encoding != NULL) {
12202 xmlCharEncodingHandlerPtr hdlr;
12203
12204 hdlr = xmlFindCharEncodingHandler(encoding);
12205 if (hdlr != NULL)
12206 xmlSwitchToEncoding(ctxt, hdlr);
12207 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012208 if ((URL != NULL) && (ctxt->input != NULL) &&
12209 (ctxt->input->filename == NULL))
12210 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012211 xmlParseDocument(ctxt);
12212 if ((ctxt->wellFormed) || ctxt->recovery)
12213 ret = ctxt->myDoc;
12214 else {
12215 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012216 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012217 if ((ctxt->dictNames) &&
12218 (ctxt->myDoc->dict == ctxt->dict))
12219 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012220 xmlFreeDoc(ctxt->myDoc);
12221 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012222 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012223 ctxt->myDoc = NULL;
12224 if (!reuse) {
12225 if ((ctxt->dictNames) &&
12226 (ret != NULL) &&
12227 (ret->dict == ctxt->dict))
12228 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012229 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012230 } else {
12231 /* Must duplicate the reference to the dictionary */
12232 if ((ctxt->dictNames) &&
12233 (ret != NULL) &&
12234 (ret->dict == ctxt->dict))
12235 xmlDictReference(ctxt->dict);
12236 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012237
12238 return (ret);
12239}
12240
12241/**
12242 * xmlReadDoc:
12243 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012244 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012245 * @encoding: the document encoding, or NULL
12246 * @options: a combination of xmlParserOption(s)
12247 *
12248 * parse an XML in-memory document and build a tree.
12249 *
12250 * Returns the resulting document tree
12251 */
12252xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012253xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012254{
12255 xmlParserCtxtPtr ctxt;
12256
12257 if (cur == NULL)
12258 return (NULL);
12259
12260 ctxt = xmlCreateDocParserCtxt(cur);
12261 if (ctxt == NULL)
12262 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012263 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012264}
12265
12266/**
12267 * xmlReadFile:
12268 * @filename: a file or URL
12269 * @encoding: the document encoding, or NULL
12270 * @options: a combination of xmlParserOption(s)
12271 *
12272 * parse an XML file from the filesystem or the network.
12273 *
12274 * Returns the resulting document tree
12275 */
12276xmlDocPtr
12277xmlReadFile(const char *filename, const char *encoding, int options)
12278{
12279 xmlParserCtxtPtr ctxt;
12280
12281 ctxt = xmlCreateFileParserCtxt(filename);
12282 if (ctxt == NULL)
12283 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012284 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012285}
12286
12287/**
12288 * xmlReadMemory:
12289 * @buffer: a pointer to a char array
12290 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012291 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012292 * @encoding: the document encoding, or NULL
12293 * @options: a combination of xmlParserOption(s)
12294 *
12295 * parse an XML in-memory document and build a tree.
12296 *
12297 * Returns the resulting document tree
12298 */
12299xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012300xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012301{
12302 xmlParserCtxtPtr ctxt;
12303
12304 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12305 if (ctxt == NULL)
12306 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012307 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012308}
12309
12310/**
12311 * xmlReadFd:
12312 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012313 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012314 * @encoding: the document encoding, or NULL
12315 * @options: a combination of xmlParserOption(s)
12316 *
12317 * parse an XML from a file descriptor and build a tree.
12318 *
12319 * Returns the resulting document tree
12320 */
12321xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012322xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012323{
12324 xmlParserCtxtPtr ctxt;
12325 xmlParserInputBufferPtr input;
12326 xmlParserInputPtr stream;
12327
12328 if (fd < 0)
12329 return (NULL);
12330
12331 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12332 if (input == NULL)
12333 return (NULL);
12334 ctxt = xmlNewParserCtxt();
12335 if (ctxt == NULL) {
12336 xmlFreeParserInputBuffer(input);
12337 return (NULL);
12338 }
12339 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12340 if (stream == NULL) {
12341 xmlFreeParserInputBuffer(input);
12342 xmlFreeParserCtxt(ctxt);
12343 return (NULL);
12344 }
12345 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012346 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012347}
12348
12349/**
12350 * xmlReadIO:
12351 * @ioread: an I/O read function
12352 * @ioclose: an I/O close function
12353 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012354 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012355 * @encoding: the document encoding, or NULL
12356 * @options: a combination of xmlParserOption(s)
12357 *
12358 * parse an XML document from I/O functions and source and build a tree.
12359 *
12360 * Returns the resulting document tree
12361 */
12362xmlDocPtr
12363xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012364 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012365{
12366 xmlParserCtxtPtr ctxt;
12367 xmlParserInputBufferPtr input;
12368 xmlParserInputPtr stream;
12369
12370 if (ioread == NULL)
12371 return (NULL);
12372
12373 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12374 XML_CHAR_ENCODING_NONE);
12375 if (input == NULL)
12376 return (NULL);
12377 ctxt = xmlNewParserCtxt();
12378 if (ctxt == NULL) {
12379 xmlFreeParserInputBuffer(input);
12380 return (NULL);
12381 }
12382 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12383 if (stream == NULL) {
12384 xmlFreeParserInputBuffer(input);
12385 xmlFreeParserCtxt(ctxt);
12386 return (NULL);
12387 }
12388 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012389 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012390}
12391
12392/**
12393 * xmlCtxtReadDoc:
12394 * @ctxt: an XML parser context
12395 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012396 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012397 * @encoding: the document encoding, or NULL
12398 * @options: a combination of xmlParserOption(s)
12399 *
12400 * parse an XML in-memory document and build a tree.
12401 * This reuses the existing @ctxt parser context
12402 *
12403 * Returns the resulting document tree
12404 */
12405xmlDocPtr
12406xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012407 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012408{
12409 xmlParserInputPtr stream;
12410
12411 if (cur == NULL)
12412 return (NULL);
12413 if (ctxt == NULL)
12414 return (NULL);
12415
12416 xmlCtxtReset(ctxt);
12417
12418 stream = xmlNewStringInputStream(ctxt, cur);
12419 if (stream == NULL) {
12420 return (NULL);
12421 }
12422 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012423 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012424}
12425
12426/**
12427 * xmlCtxtReadFile:
12428 * @ctxt: an XML parser context
12429 * @filename: a file or URL
12430 * @encoding: the document encoding, or NULL
12431 * @options: a combination of xmlParserOption(s)
12432 *
12433 * parse an XML file from the filesystem or the network.
12434 * This reuses the existing @ctxt parser context
12435 *
12436 * Returns the resulting document tree
12437 */
12438xmlDocPtr
12439xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12440 const char *encoding, int options)
12441{
12442 xmlParserInputPtr stream;
12443
12444 if (filename == NULL)
12445 return (NULL);
12446 if (ctxt == NULL)
12447 return (NULL);
12448
12449 xmlCtxtReset(ctxt);
12450
12451 stream = xmlNewInputFromFile(ctxt, filename);
12452 if (stream == NULL) {
12453 return (NULL);
12454 }
12455 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012456 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012457}
12458
12459/**
12460 * xmlCtxtReadMemory:
12461 * @ctxt: an XML parser context
12462 * @buffer: a pointer to a char array
12463 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012464 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012465 * @encoding: the document encoding, or NULL
12466 * @options: a combination of xmlParserOption(s)
12467 *
12468 * parse an XML in-memory document and build a tree.
12469 * This reuses the existing @ctxt parser context
12470 *
12471 * Returns the resulting document tree
12472 */
12473xmlDocPtr
12474xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012475 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012476{
12477 xmlParserInputBufferPtr input;
12478 xmlParserInputPtr stream;
12479
12480 if (ctxt == NULL)
12481 return (NULL);
12482 if (buffer == NULL)
12483 return (NULL);
12484
12485 xmlCtxtReset(ctxt);
12486
12487 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12488 if (input == NULL) {
12489 return(NULL);
12490 }
12491
12492 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12493 if (stream == NULL) {
12494 xmlFreeParserInputBuffer(input);
12495 return(NULL);
12496 }
12497
12498 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012499 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012500}
12501
12502/**
12503 * xmlCtxtReadFd:
12504 * @ctxt: an XML parser context
12505 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012506 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012507 * @encoding: the document encoding, or NULL
12508 * @options: a combination of xmlParserOption(s)
12509 *
12510 * parse an XML from a file descriptor and build a tree.
12511 * This reuses the existing @ctxt parser context
12512 *
12513 * Returns the resulting document tree
12514 */
12515xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012516xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12517 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012518{
12519 xmlParserInputBufferPtr input;
12520 xmlParserInputPtr stream;
12521
12522 if (fd < 0)
12523 return (NULL);
12524 if (ctxt == NULL)
12525 return (NULL);
12526
12527 xmlCtxtReset(ctxt);
12528
12529
12530 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12531 if (input == NULL)
12532 return (NULL);
12533 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12534 if (stream == NULL) {
12535 xmlFreeParserInputBuffer(input);
12536 return (NULL);
12537 }
12538 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012539 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012540}
12541
12542/**
12543 * xmlCtxtReadIO:
12544 * @ctxt: an XML parser context
12545 * @ioread: an I/O read function
12546 * @ioclose: an I/O close function
12547 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012548 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012549 * @encoding: the document encoding, or NULL
12550 * @options: a combination of xmlParserOption(s)
12551 *
12552 * parse an XML document from I/O functions and source and build a tree.
12553 * This reuses the existing @ctxt parser context
12554 *
12555 * Returns the resulting document tree
12556 */
12557xmlDocPtr
12558xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12559 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012560 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561 const char *encoding, int options)
12562{
12563 xmlParserInputBufferPtr input;
12564 xmlParserInputPtr stream;
12565
12566 if (ioread == NULL)
12567 return (NULL);
12568 if (ctxt == NULL)
12569 return (NULL);
12570
12571 xmlCtxtReset(ctxt);
12572
12573 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12574 XML_CHAR_ENCODING_NONE);
12575 if (input == NULL)
12576 return (NULL);
12577 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12578 if (stream == NULL) {
12579 xmlFreeParserInputBuffer(input);
12580 return (NULL);
12581 }
12582 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012583 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012584}