blob: 05c31a9675201967e575d7d083e157bfc72e9feb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
609 (ctxt->str_xml_ns == NULL)) {
610 xmlErrMemory(ctxt, NULL);
611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000612}
613
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614typedef struct _xmlDefAttrs xmlDefAttrs;
615typedef xmlDefAttrs *xmlDefAttrsPtr;
616struct _xmlDefAttrs {
617 int nbAttrs; /* number of defaulted attributes on that element */
618 int maxAttrs; /* the size of the array */
619 const xmlChar *values[4]; /* array of localname/prefix/values */
620};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000621
622/**
623 * xmlAddDefAttrs:
624 * @ctxt: an XML parser context
625 * @fullname: the element fullname
626 * @fullattr: the attribute fullname
627 * @value: the attribute value
628 *
629 * Add a defaulted attribute for an element
630 */
631static void
632xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
633 const xmlChar *fullname,
634 const xmlChar *fullattr,
635 const xmlChar *value) {
636 xmlDefAttrsPtr defaults;
637 int len;
638 const xmlChar *name;
639 const xmlChar *prefix;
640
641 if (ctxt->attsDefault == NULL) {
642 ctxt->attsDefault = xmlHashCreate(10);
643 if (ctxt->attsDefault == NULL)
644 goto mem_error;
645 }
646
647 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000648 * split the element name into prefix:localname , the string found
649 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000650 */
651 name = xmlSplitQName3(fullname, &len);
652 if (name == NULL) {
653 name = xmlDictLookup(ctxt->dict, fullname, -1);
654 prefix = NULL;
655 } else {
656 name = xmlDictLookup(ctxt->dict, name, -1);
657 prefix = xmlDictLookup(ctxt->dict, fullname, len);
658 }
659
660 /*
661 * make sure there is some storage
662 */
663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
664 if (defaults == NULL) {
665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000666 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000667 if (defaults == NULL)
668 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000670 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
672 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 xmlDefAttrsPtr temp;
674
675 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000676 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000677 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 defaults->maxAttrs *= 2;
681 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
682 }
683
684 /*
685 * plit the element name into prefix:localname , the string found
686 * are within the DTD and hen not associated to namespace names.
687 */
688 name = xmlSplitQName3(fullattr, &len);
689 if (name == NULL) {
690 name = xmlDictLookup(ctxt->dict, fullattr, -1);
691 prefix = NULL;
692 } else {
693 name = xmlDictLookup(ctxt->dict, name, -1);
694 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
695 }
696
697 defaults->values[4 * defaults->nbAttrs] = name;
698 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
699 /* intern the string and precompute the end */
700 len = xmlStrlen(value);
701 value = xmlDictLookup(ctxt->dict, value, len);
702 defaults->values[4 * defaults->nbAttrs + 2] = value;
703 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
704 defaults->nbAttrs++;
705
706 return;
707
708mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000709 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000710 return;
711}
712
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000713/**
714 * xmlAddSpecialAttr:
715 * @ctxt: an XML parser context
716 * @fullname: the element fullname
717 * @fullattr: the attribute fullname
718 * @type: the attribute type
719 *
720 * Register that this attribute is not CDATA
721 */
722static void
723xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
724 const xmlChar *fullname,
725 const xmlChar *fullattr,
726 int type)
727{
728 if (ctxt->attsSpecial == NULL) {
729 ctxt->attsSpecial = xmlHashCreate(10);
730 if (ctxt->attsSpecial == NULL)
731 goto mem_error;
732 }
733
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000734 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
735 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000736 return;
737
738mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000739 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000740 return;
741}
742
Daniel Veillard4432df22003-09-28 18:58:27 +0000743/**
744 * xmlCheckLanguageID:
745 * @lang: pointer to the string value
746 *
747 * Checks that the value conforms to the LanguageID production:
748 *
749 * NOTE: this is somewhat deprecated, those productions were removed from
750 * the XML Second edition.
751 *
752 * [33] LanguageID ::= Langcode ('-' Subcode)*
753 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
754 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
755 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
756 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
757 * [38] Subcode ::= ([a-z] | [A-Z])+
758 *
759 * Returns 1 if correct 0 otherwise
760 **/
761int
762xmlCheckLanguageID(const xmlChar * lang)
763{
764 const xmlChar *cur = lang;
765
766 if (cur == NULL)
767 return (0);
768 if (((cur[0] == 'i') && (cur[1] == '-')) ||
769 ((cur[0] == 'I') && (cur[1] == '-'))) {
770 /*
771 * IANA code
772 */
773 cur += 2;
774 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
775 ((cur[0] >= 'a') && (cur[0] <= 'z')))
776 cur++;
777 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
778 ((cur[0] == 'X') && (cur[1] == '-'))) {
779 /*
780 * User code
781 */
782 cur += 2;
783 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
784 ((cur[0] >= 'a') && (cur[0] <= 'z')))
785 cur++;
786 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
787 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
788 /*
789 * ISO639
790 */
791 cur++;
792 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
793 ((cur[0] >= 'a') && (cur[0] <= 'z')))
794 cur++;
795 else
796 return (0);
797 } else
798 return (0);
799 while (cur[0] != 0) { /* non input consuming */
800 if (cur[0] != '-')
801 return (0);
802 cur++;
803 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
804 ((cur[0] >= 'a') && (cur[0] <= 'z')))
805 cur++;
806 else
807 return (0);
808 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
809 ((cur[0] >= 'a') && (cur[0] <= 'z')))
810 cur++;
811 }
812 return (1);
813}
814
Owen Taylor3473f882001-02-23 17:55:21 +0000815/************************************************************************
816 * *
817 * Parser stacks related functions and macros *
818 * *
819 ************************************************************************/
820
821xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
822 const xmlChar ** str);
823
Daniel Veillard0fb18932003-09-07 09:14:37 +0000824#ifdef SAX2
825/**
826 * nsPush:
827 * @ctxt: an XML parser context
828 * @prefix: the namespace prefix or NULL
829 * @URL: the namespace name
830 *
831 * Pushes a new parser namespace on top of the ns stack
832 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000833 * Returns -1 in case of error, -2 if the namespace should be discarded
834 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000835 */
836static int
837nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
838{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000839 if (ctxt->options & XML_PARSE_NSCLEAN) {
840 int i;
841 for (i = 0;i < ctxt->nsNr;i += 2) {
842 if (ctxt->nsTab[i] == prefix) {
843 /* in scope */
844 if (ctxt->nsTab[i + 1] == URL)
845 return(-2);
846 /* out of scope keep it */
847 break;
848 }
849 }
850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
852 ctxt->nsMax = 10;
853 ctxt->nsNr = 0;
854 ctxt->nsTab = (const xmlChar **)
855 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
856 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000857 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax = 0;
859 return (-1);
860 }
861 } else if (ctxt->nsNr >= ctxt->nsMax) {
862 ctxt->nsMax *= 2;
863 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000864 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
866 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000867 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax /= 2;
869 return (-1);
870 }
871 }
872 ctxt->nsTab[ctxt->nsNr++] = prefix;
873 ctxt->nsTab[ctxt->nsNr++] = URL;
874 return (ctxt->nsNr);
875}
876/**
877 * nsPop:
878 * @ctxt: an XML parser context
879 * @nr: the number to pop
880 *
881 * Pops the top @nr parser prefix/namespace from the ns stack
882 *
883 * Returns the number of namespaces removed
884 */
885static int
886nsPop(xmlParserCtxtPtr ctxt, int nr)
887{
888 int i;
889
890 if (ctxt->nsTab == NULL) return(0);
891 if (ctxt->nsNr < nr) {
892 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
893 nr = ctxt->nsNr;
894 }
895 if (ctxt->nsNr <= 0)
896 return (0);
897
898 for (i = 0;i < nr;i++) {
899 ctxt->nsNr--;
900 ctxt->nsTab[ctxt->nsNr] = NULL;
901 }
902 return(nr);
903}
904#endif
905
906static int
907xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
908 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 int maxatts;
911
912 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000913 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 atts = (const xmlChar **)
915 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
919 if (attallocs == NULL) goto mem_error;
920 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000921 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000922 } else if (nr + 5 > ctxt->maxatts) {
923 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
925 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
929 (maxatts / 5) * sizeof(int));
930 if (attallocs == NULL) goto mem_error;
931 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000932 ctxt->maxatts = maxatts;
933 }
934 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000935mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000936 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000938}
939
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000940/**
941 * inputPush:
942 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000944 *
945 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 *
947 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000948 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000949int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000950inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
951{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000952 if ((ctxt == NULL) || (value == NULL))
953 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000954 if (ctxt->inputNr >= ctxt->inputMax) {
955 ctxt->inputMax *= 2;
956 ctxt->inputTab =
957 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
958 ctxt->inputMax *
959 sizeof(ctxt->inputTab[0]));
960 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000961 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962 return (0);
963 }
964 }
965 ctxt->inputTab[ctxt->inputNr] = value;
966 ctxt->input = value;
967 return (ctxt->inputNr++);
968}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000969/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000970 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971 * @ctxt: an XML parser context
972 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000977xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978inputPop(xmlParserCtxtPtr ctxt)
979{
980 xmlParserInputPtr ret;
981
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000982 if (ctxt == NULL)
983 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000984 if (ctxt->inputNr <= 0)
985 return (0);
986 ctxt->inputNr--;
987 if (ctxt->inputNr > 0)
988 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
989 else
990 ctxt->input = NULL;
991 ret = ctxt->inputTab[ctxt->inputNr];
992 ctxt->inputTab[ctxt->inputNr] = 0;
993 return (ret);
994}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000995/**
996 * nodePush:
997 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000998 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000999 *
1000 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 *
1002 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001003 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001004int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001005nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1006{
1007 if (ctxt->nodeNr >= ctxt->nodeMax) {
1008 ctxt->nodeMax *= 2;
1009 ctxt->nodeTab =
1010 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1011 ctxt->nodeMax *
1012 sizeof(ctxt->nodeTab[0]));
1013 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001014 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 return (0);
1016 }
1017 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001018 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001019 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001020 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1021 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001022 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001023 return(0);
1024 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001025 ctxt->nodeTab[ctxt->nodeNr] = value;
1026 ctxt->node = value;
1027 return (ctxt->nodeNr++);
1028}
1029/**
1030 * nodePop:
1031 * @ctxt: an XML parser context
1032 *
1033 * Pops the top element node from the node stack
1034 *
1035 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001036 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001037xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001038nodePop(xmlParserCtxtPtr ctxt)
1039{
1040 xmlNodePtr ret;
1041
1042 if (ctxt->nodeNr <= 0)
1043 return (0);
1044 ctxt->nodeNr--;
1045 if (ctxt->nodeNr > 0)
1046 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1047 else
1048 ctxt->node = NULL;
1049 ret = ctxt->nodeTab[ctxt->nodeNr];
1050 ctxt->nodeTab[ctxt->nodeNr] = 0;
1051 return (ret);
1052}
Daniel Veillarda2351322004-06-27 12:08:10 +00001053
1054#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001055/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * nameNsPush:
1057 * @ctxt: an XML parser context
1058 * @value: the element name
1059 * @prefix: the element prefix
1060 * @URI: the element namespace name
1061 *
1062 * Pushes a new element name/prefix/URL on top of the name stack
1063 *
1064 * Returns -1 in case of error, the index in the stack otherwise
1065 */
1066static int
1067nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1068 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1069{
1070 if (ctxt->nameNr >= ctxt->nameMax) {
1071 const xmlChar * *tmp;
1072 void **tmp2;
1073 ctxt->nameMax *= 2;
1074 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1075 ctxt->nameMax *
1076 sizeof(ctxt->nameTab[0]));
1077 if (tmp == NULL) {
1078 ctxt->nameMax /= 2;
1079 goto mem_error;
1080 }
1081 ctxt->nameTab = tmp;
1082 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1083 ctxt->nameMax * 3 *
1084 sizeof(ctxt->pushTab[0]));
1085 if (tmp2 == NULL) {
1086 ctxt->nameMax /= 2;
1087 goto mem_error;
1088 }
1089 ctxt->pushTab = tmp2;
1090 }
1091 ctxt->nameTab[ctxt->nameNr] = value;
1092 ctxt->name = value;
1093 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1094 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001095 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 return (ctxt->nameNr++);
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001099 return (-1);
1100}
1101/**
1102 * nameNsPop:
1103 * @ctxt: an XML parser context
1104 *
1105 * Pops the top element/prefix/URI name from the name stack
1106 *
1107 * Returns the name just removed
1108 */
1109static const xmlChar *
1110nameNsPop(xmlParserCtxtPtr ctxt)
1111{
1112 const xmlChar *ret;
1113
1114 if (ctxt->nameNr <= 0)
1115 return (0);
1116 ctxt->nameNr--;
1117 if (ctxt->nameNr > 0)
1118 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1119 else
1120 ctxt->name = NULL;
1121 ret = ctxt->nameTab[ctxt->nameNr];
1122 ctxt->nameTab[ctxt->nameNr] = NULL;
1123 return (ret);
1124}
Daniel Veillarda2351322004-06-27 12:08:10 +00001125#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126
1127/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001128 * namePush:
1129 * @ctxt: an XML parser context
1130 * @value: the element name
1131 *
1132 * Pushes a new element name on top of the name stack
1133 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001135 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001136int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001137namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001138{
1139 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143 ctxt->nameMax *
1144 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 if (tmp == NULL) {
1146 ctxt->nameMax /= 2;
1147 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001148 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150 }
1151 ctxt->nameTab[ctxt->nameNr] = value;
1152 ctxt->name = value;
1153 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001155 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001157}
1158/**
1159 * namePop:
1160 * @ctxt: an XML parser context
1161 *
1162 * Pops the top element name from the name stack
1163 *
1164 * Returns the name just removed
1165 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001166const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001167namePop(xmlParserCtxtPtr ctxt)
1168{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001169 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170
1171 if (ctxt->nameNr <= 0)
1172 return (0);
1173 ctxt->nameNr--;
1174 if (ctxt->nameNr > 0)
1175 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1176 else
1177 ctxt->name = NULL;
1178 ret = ctxt->nameTab[ctxt->nameNr];
1179 ctxt->nameTab[ctxt->nameNr] = 0;
1180 return (ret);
1181}
Owen Taylor3473f882001-02-23 17:55:21 +00001182
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001183static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001184 if (ctxt->spaceNr >= ctxt->spaceMax) {
1185 ctxt->spaceMax *= 2;
1186 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1187 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1188 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001189 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001190 return(0);
1191 }
1192 }
1193 ctxt->spaceTab[ctxt->spaceNr] = val;
1194 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1195 return(ctxt->spaceNr++);
1196}
1197
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001198static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001199 int ret;
1200 if (ctxt->spaceNr <= 0) return(0);
1201 ctxt->spaceNr--;
1202 if (ctxt->spaceNr > 0)
1203 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1204 else
1205 ctxt->space = NULL;
1206 ret = ctxt->spaceTab[ctxt->spaceNr];
1207 ctxt->spaceTab[ctxt->spaceNr] = -1;
1208 return(ret);
1209}
1210
1211/*
1212 * Macros for accessing the content. Those should be used only by the parser,
1213 * and not exported.
1214 *
1215 * Dirty macros, i.e. one often need to make assumption on the context to
1216 * use them
1217 *
1218 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1219 * To be used with extreme caution since operations consuming
1220 * characters may move the input buffer to a different location !
1221 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1222 * This should be used internally by the parser
1223 * only to compare to ASCII values otherwise it would break when
1224 * running with UTF-8 encoding.
1225 * RAW same as CUR but in the input buffer, bypass any token
1226 * extraction that may have been done
1227 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1228 * to compare on ASCII based substring.
1229 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001230 * strings without newlines within the parser.
1231 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1232 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001233 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1234 *
1235 * NEXT Skip to the next character, this does the proper decoding
1236 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001237 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001238 * CUR_CHAR(l) returns the current unicode character (int), set l
1239 * to the number of xmlChars used for the encoding [0-5].
1240 * CUR_SCHAR same but operate on a string instead of the context
1241 * COPY_BUF copy the current unicode char to the target buffer, increment
1242 * the index
1243 * GROW, SHRINK handling of input buffers
1244 */
1245
Daniel Veillardfdc91562002-07-01 21:52:03 +00001246#define RAW (*ctxt->input->cur)
1247#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001248#define NXT(val) ctxt->input->cur[(val)]
1249#define CUR_PTR ctxt->input->cur
1250
Daniel Veillarda07050d2003-10-19 14:46:32 +00001251#define CMP4( s, c1, c2, c3, c4 ) \
1252 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1253 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1254#define CMP5( s, c1, c2, c3, c4, c5 ) \
1255 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1256#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1257 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1258#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1259 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1260#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1261 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1262#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1263 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1264 ((unsigned char *) s)[ 8 ] == c9 )
1265#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1266 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1267 ((unsigned char *) s)[ 9 ] == c10 )
1268
Owen Taylor3473f882001-02-23 17:55:21 +00001269#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001270 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001271 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001272 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001273 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1274 xmlPopInput(ctxt); \
1275 } while (0)
1276
Daniel Veillard0b787f32004-03-26 17:29:53 +00001277#define SKIPL(val) do { \
1278 int skipl; \
1279 for(skipl=0; skipl<val; skipl++) { \
1280 if (*(ctxt->input->cur) == '\n') { \
1281 ctxt->input->line++; ctxt->input->col = 1; \
1282 } else ctxt->input->col++; \
1283 ctxt->nbChars++; \
1284 ctxt->input->cur++; \
1285 } \
1286 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1287 if ((*ctxt->input->cur == 0) && \
1288 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1289 xmlPopInput(ctxt); \
1290 } while (0)
1291
Daniel Veillarda880b122003-04-21 21:36:41 +00001292#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001293 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1294 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001295 xmlSHRINK (ctxt);
1296
1297static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1298 xmlParserInputShrink(ctxt->input);
1299 if ((*ctxt->input->cur == 0) &&
1300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001302 }
Owen Taylor3473f882001-02-23 17:55:21 +00001303
Daniel Veillarda880b122003-04-21 21:36:41 +00001304#define GROW if ((ctxt->progressive == 0) && \
1305 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001306 xmlGROW (ctxt);
1307
1308static void xmlGROW (xmlParserCtxtPtr ctxt) {
1309 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1310 if ((*ctxt->input->cur == 0) &&
1311 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1312 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001313}
Owen Taylor3473f882001-02-23 17:55:21 +00001314
1315#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1316
1317#define NEXT xmlNextChar(ctxt)
1318
Daniel Veillard21a0f912001-02-25 19:54:14 +00001319#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001320 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001321 ctxt->input->cur++; \
1322 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001323 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001324 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1325 }
1326
Owen Taylor3473f882001-02-23 17:55:21 +00001327#define NEXTL(l) do { \
1328 if (*(ctxt->input->cur) == '\n') { \
1329 ctxt->input->line++; ctxt->input->col = 1; \
1330 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001331 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001332 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001333 } while (0)
1334
1335#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1336#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1337
1338#define COPY_BUF(l,b,i,v) \
1339 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001340 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342/**
1343 * xmlSkipBlankChars:
1344 * @ctxt: the XML parser context
1345 *
1346 * skip all blanks character found at that point in the input streams.
1347 * It pops up finished entities in the process if allowable at that point.
1348 *
1349 * Returns the number of space chars skipped
1350 */
1351
1352int
1353xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001354 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001355
1356 /*
1357 * It's Okay to use CUR/NEXT here since all the blanks are on
1358 * the ASCII range.
1359 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001360 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1361 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001362 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001364 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001365 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001366 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001367 if (*cur == '\n') {
1368 ctxt->input->line++; ctxt->input->col = 1;
1369 }
1370 cur++;
1371 res++;
1372 if (*cur == 0) {
1373 ctxt->input->cur = cur;
1374 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1375 cur = ctxt->input->cur;
1376 }
1377 }
1378 ctxt->input->cur = cur;
1379 } else {
1380 int cur;
1381 do {
1382 cur = CUR;
1383 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1384 NEXT;
1385 cur = CUR;
1386 res++;
1387 }
1388 while ((cur == 0) && (ctxt->inputNr > 1) &&
1389 (ctxt->instate != XML_PARSER_COMMENT)) {
1390 xmlPopInput(ctxt);
1391 cur = CUR;
1392 }
1393 /*
1394 * Need to handle support of entities branching here
1395 */
1396 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1397 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1398 }
Owen Taylor3473f882001-02-23 17:55:21 +00001399 return(res);
1400}
1401
1402/************************************************************************
1403 * *
1404 * Commodity functions to handle entities *
1405 * *
1406 ************************************************************************/
1407
1408/**
1409 * xmlPopInput:
1410 * @ctxt: an XML parser context
1411 *
1412 * xmlPopInput: the current input pointed by ctxt->input came to an end
1413 * pop it and return the next char.
1414 *
1415 * Returns the current xmlChar in the parser context
1416 */
1417xmlChar
1418xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard34099b42004-11-04 17:34:35 +00001419 if (ctxt->inputNr <= 1) return(0); /* End of main Input */
Owen Taylor3473f882001-02-23 17:55:21 +00001420 if (xmlParserDebugEntities)
1421 xmlGenericError(xmlGenericErrorContext,
1422 "Popping input %d\n", ctxt->inputNr);
1423 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001424 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001425 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1426 return(xmlPopInput(ctxt));
1427 return(CUR);
1428}
1429
1430/**
1431 * xmlPushInput:
1432 * @ctxt: an XML parser context
1433 * @input: an XML parser input fragment (entity, XML fragment ...).
1434 *
1435 * xmlPushInput: switch to a new input stream which is stacked on top
1436 * of the previous one(s).
1437 */
1438void
1439xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1440 if (input == NULL) return;
1441
1442 if (xmlParserDebugEntities) {
1443 if ((ctxt->input != NULL) && (ctxt->input->filename))
1444 xmlGenericError(xmlGenericErrorContext,
1445 "%s(%d): ", ctxt->input->filename,
1446 ctxt->input->line);
1447 xmlGenericError(xmlGenericErrorContext,
1448 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1449 }
1450 inputPush(ctxt, input);
1451 GROW;
1452}
1453
1454/**
1455 * xmlParseCharRef:
1456 * @ctxt: an XML parser context
1457 *
1458 * parse Reference declarations
1459 *
1460 * [66] CharRef ::= '&#' [0-9]+ ';' |
1461 * '&#x' [0-9a-fA-F]+ ';'
1462 *
1463 * [ WFC: Legal Character ]
1464 * Characters referred to using character references must match the
1465 * production for Char.
1466 *
1467 * Returns the value parsed (as an int), 0 in case of error
1468 */
1469int
1470xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001471 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001472 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001473 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001474
Owen Taylor3473f882001-02-23 17:55:21 +00001475 /*
1476 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1477 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001478 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001479 (NXT(2) == 'x')) {
1480 SKIP(3);
1481 GROW;
1482 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001483 if (count++ > 20) {
1484 count = 0;
1485 GROW;
1486 }
1487 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001488 val = val * 16 + (CUR - '0');
1489 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1490 val = val * 16 + (CUR - 'a') + 10;
1491 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1492 val = val * 16 + (CUR - 'A') + 10;
1493 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001494 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001495 val = 0;
1496 break;
1497 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001498 if (val > 0x10FFFF)
1499 outofrange = val;
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 NEXT;
1502 count++;
1503 }
1504 if (RAW == ';') {
1505 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001506 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001507 ctxt->nbChars ++;
1508 ctxt->input->cur++;
1509 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001510 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 SKIP(2);
1512 GROW;
1513 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001514 if (count++ > 20) {
1515 count = 0;
1516 GROW;
1517 }
1518 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001519 val = val * 10 + (CUR - '0');
1520 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001521 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001522 val = 0;
1523 break;
1524 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001525 if (val > 0x10FFFF)
1526 outofrange = val;
1527
Owen Taylor3473f882001-02-23 17:55:21 +00001528 NEXT;
1529 count++;
1530 }
1531 if (RAW == ';') {
1532 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001533 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001534 ctxt->nbChars ++;
1535 ctxt->input->cur++;
1536 }
1537 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001538 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001539 }
1540
1541 /*
1542 * [ WFC: Legal Character ]
1543 * Characters referred to using character references must match the
1544 * production for Char.
1545 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001546 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001547 return(val);
1548 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001549 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1550 "xmlParseCharRef: invalid xmlChar value %d\n",
1551 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001552 }
1553 return(0);
1554}
1555
1556/**
1557 * xmlParseStringCharRef:
1558 * @ctxt: an XML parser context
1559 * @str: a pointer to an index in the string
1560 *
1561 * parse Reference declarations, variant parsing from a string rather
1562 * than an an input flow.
1563 *
1564 * [66] CharRef ::= '&#' [0-9]+ ';' |
1565 * '&#x' [0-9a-fA-F]+ ';'
1566 *
1567 * [ WFC: Legal Character ]
1568 * Characters referred to using character references must match the
1569 * production for Char.
1570 *
1571 * Returns the value parsed (as an int), 0 in case of error, str will be
1572 * updated to the current value of the index
1573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001574static int
Owen Taylor3473f882001-02-23 17:55:21 +00001575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1576 const xmlChar *ptr;
1577 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001578 unsigned int val = 0;
1579 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001580
1581 if ((str == NULL) || (*str == NULL)) return(0);
1582 ptr = *str;
1583 cur = *ptr;
1584 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1585 ptr += 3;
1586 cur = *ptr;
1587 while (cur != ';') { /* Non input consuming loop */
1588 if ((cur >= '0') && (cur <= '9'))
1589 val = val * 16 + (cur - '0');
1590 else if ((cur >= 'a') && (cur <= 'f'))
1591 val = val * 16 + (cur - 'a') + 10;
1592 else if ((cur >= 'A') && (cur <= 'F'))
1593 val = val * 16 + (cur - 'A') + 10;
1594 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001595 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001596 val = 0;
1597 break;
1598 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001599 if (val > 0x10FFFF)
1600 outofrange = val;
1601
Owen Taylor3473f882001-02-23 17:55:21 +00001602 ptr++;
1603 cur = *ptr;
1604 }
1605 if (cur == ';')
1606 ptr++;
1607 } else if ((cur == '&') && (ptr[1] == '#')){
1608 ptr += 2;
1609 cur = *ptr;
1610 while (cur != ';') { /* Non input consuming loops */
1611 if ((cur >= '0') && (cur <= '9'))
1612 val = val * 10 + (cur - '0');
1613 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001614 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001615 val = 0;
1616 break;
1617 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001618 if (val > 0x10FFFF)
1619 outofrange = val;
1620
Owen Taylor3473f882001-02-23 17:55:21 +00001621 ptr++;
1622 cur = *ptr;
1623 }
1624 if (cur == ';')
1625 ptr++;
1626 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001627 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001628 return(0);
1629 }
1630 *str = ptr;
1631
1632 /*
1633 * [ WFC: Legal Character ]
1634 * Characters referred to using character references must match the
1635 * production for Char.
1636 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001637 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001638 return(val);
1639 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1641 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1642 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001643 }
1644 return(0);
1645}
1646
1647/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001648 * xmlNewBlanksWrapperInputStream:
1649 * @ctxt: an XML parser context
1650 * @entity: an Entity pointer
1651 *
1652 * Create a new input stream for wrapping
1653 * blanks around a PEReference
1654 *
1655 * Returns the new input stream or NULL
1656 */
1657
1658static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1659
Daniel Veillardf4862f02002-09-10 11:13:43 +00001660static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001661xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1662 xmlParserInputPtr input;
1663 xmlChar *buffer;
1664 size_t length;
1665 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001666 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1667 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001668 return(NULL);
1669 }
1670 if (xmlParserDebugEntities)
1671 xmlGenericError(xmlGenericErrorContext,
1672 "new blanks wrapper for entity: %s\n", entity->name);
1673 input = xmlNewInputStream(ctxt);
1674 if (input == NULL) {
1675 return(NULL);
1676 }
1677 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001678 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001679 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001681 return(NULL);
1682 }
1683 buffer [0] = ' ';
1684 buffer [1] = '%';
1685 buffer [length-3] = ';';
1686 buffer [length-2] = ' ';
1687 buffer [length-1] = 0;
1688 memcpy(buffer + 2, entity->name, length - 5);
1689 input->free = deallocblankswrapper;
1690 input->base = buffer;
1691 input->cur = buffer;
1692 input->length = length;
1693 input->end = &buffer[length];
1694 return(input);
1695}
1696
1697/**
Owen Taylor3473f882001-02-23 17:55:21 +00001698 * xmlParserHandlePEReference:
1699 * @ctxt: the parser context
1700 *
1701 * [69] PEReference ::= '%' Name ';'
1702 *
1703 * [ WFC: No Recursion ]
1704 * A parsed entity must not contain a recursive
1705 * reference to itself, either directly or indirectly.
1706 *
1707 * [ WFC: Entity Declared ]
1708 * In a document without any DTD, a document with only an internal DTD
1709 * subset which contains no parameter entity references, or a document
1710 * with "standalone='yes'", ... ... The declaration of a parameter
1711 * entity must precede any reference to it...
1712 *
1713 * [ VC: Entity Declared ]
1714 * In a document with an external subset or external parameter entities
1715 * with "standalone='no'", ... ... The declaration of a parameter entity
1716 * must precede any reference to it...
1717 *
1718 * [ WFC: In DTD ]
1719 * Parameter-entity references may only appear in the DTD.
1720 * NOTE: misleading but this is handled.
1721 *
1722 * A PEReference may have been detected in the current input stream
1723 * the handling is done accordingly to
1724 * http://www.w3.org/TR/REC-xml#entproc
1725 * i.e.
1726 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001727 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001728 */
1729void
1730xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001731 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 xmlEntityPtr entity = NULL;
1733 xmlParserInputPtr input;
1734
Owen Taylor3473f882001-02-23 17:55:21 +00001735 if (RAW != '%') return;
1736 switch(ctxt->instate) {
1737 case XML_PARSER_CDATA_SECTION:
1738 return;
1739 case XML_PARSER_COMMENT:
1740 return;
1741 case XML_PARSER_START_TAG:
1742 return;
1743 case XML_PARSER_END_TAG:
1744 return;
1745 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001746 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001747 return;
1748 case XML_PARSER_PROLOG:
1749 case XML_PARSER_START:
1750 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001751 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 return;
1753 case XML_PARSER_ENTITY_DECL:
1754 case XML_PARSER_CONTENT:
1755 case XML_PARSER_ATTRIBUTE_VALUE:
1756 case XML_PARSER_PI:
1757 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001758 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001759 /* we just ignore it there */
1760 return;
1761 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001762 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001763 return;
1764 case XML_PARSER_ENTITY_VALUE:
1765 /*
1766 * NOTE: in the case of entity values, we don't do the
1767 * substitution here since we need the literal
1768 * entity value to be able to save the internal
1769 * subset of the document.
1770 * This will be handled by xmlStringDecodeEntities
1771 */
1772 return;
1773 case XML_PARSER_DTD:
1774 /*
1775 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1776 * In the internal DTD subset, parameter-entity references
1777 * can occur only where markup declarations can occur, not
1778 * within markup declarations.
1779 * In that case this is handled in xmlParseMarkupDecl
1780 */
1781 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1782 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001783 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001784 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001785 break;
1786 case XML_PARSER_IGNORE:
1787 return;
1788 }
1789
1790 NEXT;
1791 name = xmlParseName(ctxt);
1792 if (xmlParserDebugEntities)
1793 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001794 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001796 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001797 } else {
1798 if (RAW == ';') {
1799 NEXT;
1800 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1801 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1802 if (entity == NULL) {
1803
1804 /*
1805 * [ WFC: Entity Declared ]
1806 * In a document without any DTD, a document with only an
1807 * internal DTD subset which contains no parameter entity
1808 * references, or a document with "standalone='yes'", ...
1809 * ... The declaration of a parameter entity must precede
1810 * any reference to it...
1811 */
1812 if ((ctxt->standalone == 1) ||
1813 ((ctxt->hasExternalSubset == 0) &&
1814 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001816 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001817 } else {
1818 /*
1819 * [ VC: Entity Declared ]
1820 * In a document with an external subset or external
1821 * parameter entities with "standalone='no'", ...
1822 * ... The declaration of a parameter entity must precede
1823 * any reference to it...
1824 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001825 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1826 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1827 "PEReference: %%%s; not found\n",
1828 name);
1829 } else
1830 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1831 "PEReference: %%%s; not found\n",
1832 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001833 ctxt->valid = 0;
1834 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001835 } else if (ctxt->input->free != deallocblankswrapper) {
1836 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1837 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001838 } else {
1839 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1840 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001841 xmlChar start[4];
1842 xmlCharEncoding enc;
1843
Owen Taylor3473f882001-02-23 17:55:21 +00001844 /*
1845 * handle the extra spaces added before and after
1846 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001847 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001848 */
1849 input = xmlNewEntityInputStream(ctxt, entity);
1850 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001851
1852 /*
1853 * Get the 4 first bytes and decode the charset
1854 * if enc != XML_CHAR_ENCODING_NONE
1855 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001856 * Note that, since we may have some non-UTF8
1857 * encoding (like UTF16, bug 135229), the 'length'
1858 * is not known, but we can calculate based upon
1859 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860 */
1861 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001862 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001863 start[0] = RAW;
1864 start[1] = NXT(1);
1865 start[2] = NXT(2);
1866 start[3] = NXT(3);
1867 enc = xmlDetectCharEncoding(start, 4);
1868 if (enc != XML_CHAR_ENCODING_NONE) {
1869 xmlSwitchEncoding(ctxt, enc);
1870 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001871 }
1872
Owen Taylor3473f882001-02-23 17:55:21 +00001873 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001874 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1875 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001876 xmlParseTextDecl(ctxt);
1877 }
Owen Taylor3473f882001-02-23 17:55:21 +00001878 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001879 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1880 "PEReference: %s is not a parameter entity\n",
1881 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001882 }
1883 }
1884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001885 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001886 }
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888}
1889
1890/*
1891 * Macro used to grow the current buffer.
1892 */
1893#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001894 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001895 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001896 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001897 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001898 if (tmp == NULL) goto mem_error; \
1899 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001900}
1901
1902/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001903 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001904 * @ctxt: the parser context
1905 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001907 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1908 * @end: an end marker xmlChar, 0 if none
1909 * @end2: an end marker xmlChar, 0 if none
1910 * @end3: an end marker xmlChar, 0 if none
1911 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001912 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001913 *
1914 * [67] Reference ::= EntityRef | CharRef
1915 *
1916 * [69] PEReference ::= '%' Name ';'
1917 *
1918 * Returns A newly allocated string with the substitution done. The caller
1919 * must deallocate it !
1920 */
1921xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001922xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1923 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001924 xmlChar *buffer = NULL;
1925 int buffer_size = 0;
1926
1927 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001928 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001929 xmlEntityPtr ent;
1930 int c,l;
1931 int nbchars = 0;
1932
Daniel Veillarde57ec792003-09-10 10:50:59 +00001933 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001934 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001935 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001936
1937 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001938 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(NULL);
1940 }
1941
1942 /*
1943 * allocate a translation buffer.
1944 */
1945 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001946 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001947 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001948
1949 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001950 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001951 * we are operating on already parsed values.
1952 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001953 if (str < last)
1954 c = CUR_SCHAR(str, l);
1955 else
1956 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001957 while ((c != 0) && (c != end) && /* non input consuming loop */
1958 (c != end2) && (c != end3)) {
1959
1960 if (c == 0) break;
1961 if ((c == '&') && (str[1] == '#')) {
1962 int val = xmlParseStringCharRef(ctxt, &str);
1963 if (val != 0) {
1964 COPY_BUF(0,buffer,nbchars,val);
1965 }
1966 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1967 if (xmlParserDebugEntities)
1968 xmlGenericError(xmlGenericErrorContext,
1969 "String decoding Entity Reference: %.30s\n",
1970 str);
1971 ent = xmlParseStringEntityRef(ctxt, &str);
1972 if ((ent != NULL) &&
1973 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1974 if (ent->content != NULL) {
1975 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1976 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001977 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1978 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001979 }
1980 } else if ((ent != NULL) && (ent->content != NULL)) {
1981 xmlChar *rep;
1982
1983 ctxt->depth++;
1984 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1985 0, 0, 0);
1986 ctxt->depth--;
1987 if (rep != NULL) {
1988 current = rep;
1989 while (*current != 0) { /* non input consuming loop */
1990 buffer[nbchars++] = *current++;
1991 if (nbchars >
1992 buffer_size - XML_PARSER_BUFFER_SIZE) {
1993 growBuffer(buffer);
1994 }
1995 }
1996 xmlFree(rep);
1997 }
1998 } else if (ent != NULL) {
1999 int i = xmlStrlen(ent->name);
2000 const xmlChar *cur = ent->name;
2001
2002 buffer[nbchars++] = '&';
2003 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2004 growBuffer(buffer);
2005 }
2006 for (;i > 0;i--)
2007 buffer[nbchars++] = *cur++;
2008 buffer[nbchars++] = ';';
2009 }
2010 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2011 if (xmlParserDebugEntities)
2012 xmlGenericError(xmlGenericErrorContext,
2013 "String decoding PE Reference: %.30s\n", str);
2014 ent = xmlParseStringPEReference(ctxt, &str);
2015 if (ent != NULL) {
2016 xmlChar *rep;
2017
2018 ctxt->depth++;
2019 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2020 0, 0, 0);
2021 ctxt->depth--;
2022 if (rep != NULL) {
2023 current = rep;
2024 while (*current != 0) { /* non input consuming loop */
2025 buffer[nbchars++] = *current++;
2026 if (nbchars >
2027 buffer_size - XML_PARSER_BUFFER_SIZE) {
2028 growBuffer(buffer);
2029 }
2030 }
2031 xmlFree(rep);
2032 }
2033 }
2034 } else {
2035 COPY_BUF(l,buffer,nbchars,c);
2036 str += l;
2037 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2038 growBuffer(buffer);
2039 }
2040 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002041 if (str < last)
2042 c = CUR_SCHAR(str, l);
2043 else
2044 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002045 }
2046 buffer[nbchars++] = 0;
2047 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002048
2049mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002050 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002051 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002052}
2053
Daniel Veillarde57ec792003-09-10 10:50:59 +00002054/**
2055 * xmlStringDecodeEntities:
2056 * @ctxt: the parser context
2057 * @str: the input string
2058 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2059 * @end: an end marker xmlChar, 0 if none
2060 * @end2: an end marker xmlChar, 0 if none
2061 * @end3: an end marker xmlChar, 0 if none
2062 *
2063 * Takes a entity string content and process to do the adequate substitutions.
2064 *
2065 * [67] Reference ::= EntityRef | CharRef
2066 *
2067 * [69] PEReference ::= '%' Name ';'
2068 *
2069 * Returns A newly allocated string with the substitution done. The caller
2070 * must deallocate it !
2071 */
2072xmlChar *
2073xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2074 xmlChar end, xmlChar end2, xmlChar end3) {
2075 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2076 end, end2, end3));
2077}
Owen Taylor3473f882001-02-23 17:55:21 +00002078
2079/************************************************************************
2080 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002081 * Commodity functions, cleanup needed ? *
2082 * *
2083 ************************************************************************/
2084
2085/**
2086 * areBlanks:
2087 * @ctxt: an XML parser context
2088 * @str: a xmlChar *
2089 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002090 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002091 *
2092 * Is this a sequence of blank chars that one can ignore ?
2093 *
2094 * Returns 1 if ignorable 0 otherwise.
2095 */
2096
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002097static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2098 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002099 int i, ret;
2100 xmlNodePtr lastChild;
2101
Daniel Veillard05c13a22001-09-09 08:38:09 +00002102 /*
2103 * Don't spend time trying to differentiate them, the same callback is
2104 * used !
2105 */
2106 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002107 return(0);
2108
Owen Taylor3473f882001-02-23 17:55:21 +00002109 /*
2110 * Check for xml:space value.
2111 */
2112 if (*(ctxt->space) == 1)
2113 return(0);
2114
2115 /*
2116 * Check that the string is made of blanks
2117 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002118 if (blank_chars == 0) {
2119 for (i = 0;i < len;i++)
2120 if (!(IS_BLANK_CH(str[i]))) return(0);
2121 }
Owen Taylor3473f882001-02-23 17:55:21 +00002122
2123 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002124 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002125 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002126 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002127 if (ctxt->myDoc != NULL) {
2128 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2129 if (ret == 0) return(1);
2130 if (ret == 1) return(0);
2131 }
2132
2133 /*
2134 * Otherwise, heuristic :-\
2135 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002136 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if ((ctxt->node->children == NULL) &&
2138 (RAW == '<') && (NXT(1) == '/')) return(0);
2139
2140 lastChild = xmlGetLastChild(ctxt->node);
2141 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002142 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2143 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002144 } else if (xmlNodeIsText(lastChild))
2145 return(0);
2146 else if ((ctxt->node->children != NULL) &&
2147 (xmlNodeIsText(ctxt->node->children)))
2148 return(0);
2149 return(1);
2150}
2151
Owen Taylor3473f882001-02-23 17:55:21 +00002152/************************************************************************
2153 * *
2154 * Extra stuff for namespace support *
2155 * Relates to http://www.w3.org/TR/WD-xml-names *
2156 * *
2157 ************************************************************************/
2158
2159/**
2160 * xmlSplitQName:
2161 * @ctxt: an XML parser context
2162 * @name: an XML parser context
2163 * @prefix: a xmlChar **
2164 *
2165 * parse an UTF8 encoded XML qualified name string
2166 *
2167 * [NS 5] QName ::= (Prefix ':')? LocalPart
2168 *
2169 * [NS 6] Prefix ::= NCName
2170 *
2171 * [NS 7] LocalPart ::= NCName
2172 *
2173 * Returns the local part, and prefix is updated
2174 * to get the Prefix if any.
2175 */
2176
2177xmlChar *
2178xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2179 xmlChar buf[XML_MAX_NAMELEN + 5];
2180 xmlChar *buffer = NULL;
2181 int len = 0;
2182 int max = XML_MAX_NAMELEN;
2183 xmlChar *ret = NULL;
2184 const xmlChar *cur = name;
2185 int c;
2186
2187 *prefix = NULL;
2188
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002189 if (cur == NULL) return(NULL);
2190
Owen Taylor3473f882001-02-23 17:55:21 +00002191#ifndef XML_XML_NAMESPACE
2192 /* xml: prefix is not really a namespace */
2193 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2194 (cur[2] == 'l') && (cur[3] == ':'))
2195 return(xmlStrdup(name));
2196#endif
2197
Daniel Veillard597bc482003-07-24 16:08:28 +00002198 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002199 if (cur[0] == ':')
2200 return(xmlStrdup(name));
2201
2202 c = *cur++;
2203 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2204 buf[len++] = c;
2205 c = *cur++;
2206 }
2207 if (len >= max) {
2208 /*
2209 * Okay someone managed to make a huge name, so he's ready to pay
2210 * for the processing speed.
2211 */
2212 max = len * 2;
2213
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002214 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002215 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002216 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002217 return(NULL);
2218 }
2219 memcpy(buffer, buf, len);
2220 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2221 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002222 xmlChar *tmp;
2223
Owen Taylor3473f882001-02-23 17:55:21 +00002224 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002225 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002226 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002227 if (tmp == NULL) {
2228 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002229 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002230 return(NULL);
2231 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002233 }
2234 buffer[len++] = c;
2235 c = *cur++;
2236 }
2237 buffer[len] = 0;
2238 }
2239
Daniel Veillard597bc482003-07-24 16:08:28 +00002240 /* nasty but well=formed
2241 if ((c == ':') && (*cur == 0)) {
2242 return(xmlStrdup(name));
2243 } */
2244
Owen Taylor3473f882001-02-23 17:55:21 +00002245 if (buffer == NULL)
2246 ret = xmlStrndup(buf, len);
2247 else {
2248 ret = buffer;
2249 buffer = NULL;
2250 max = XML_MAX_NAMELEN;
2251 }
2252
2253
2254 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002255 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002256 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002257 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002258 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002259 }
Owen Taylor3473f882001-02-23 17:55:21 +00002260 len = 0;
2261
Daniel Veillardbb284f42002-10-16 18:02:47 +00002262 /*
2263 * Check that the first character is proper to start
2264 * a new name
2265 */
2266 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2267 ((c >= 0x41) && (c <= 0x5A)) ||
2268 (c == '_') || (c == ':'))) {
2269 int l;
2270 int first = CUR_SCHAR(cur, l);
2271
2272 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002273 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002274 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002275 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002276 }
2277 }
2278 cur++;
2279
Owen Taylor3473f882001-02-23 17:55:21 +00002280 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2281 buf[len++] = c;
2282 c = *cur++;
2283 }
2284 if (len >= max) {
2285 /*
2286 * Okay someone managed to make a huge name, so he's ready to pay
2287 * for the processing speed.
2288 */
2289 max = len * 2;
2290
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002291 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002292 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002293 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002294 return(NULL);
2295 }
2296 memcpy(buffer, buf, len);
2297 while (c != 0) { /* tested bigname2.xml */
2298 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002299 xmlChar *tmp;
2300
Owen Taylor3473f882001-02-23 17:55:21 +00002301 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002302 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002303 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002304 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002305 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002306 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return(NULL);
2308 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002310 }
2311 buffer[len++] = c;
2312 c = *cur++;
2313 }
2314 buffer[len] = 0;
2315 }
2316
2317 if (buffer == NULL)
2318 ret = xmlStrndup(buf, len);
2319 else {
2320 ret = buffer;
2321 }
2322 }
2323
2324 return(ret);
2325}
2326
2327/************************************************************************
2328 * *
2329 * The parser itself *
2330 * Relates to http://www.w3.org/TR/REC-xml *
2331 * *
2332 ************************************************************************/
2333
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002334static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002335static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002336 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002337
Owen Taylor3473f882001-02-23 17:55:21 +00002338/**
2339 * xmlParseName:
2340 * @ctxt: an XML parser context
2341 *
2342 * parse an XML name.
2343 *
2344 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2345 * CombiningChar | Extender
2346 *
2347 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2348 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002349 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002350 *
2351 * Returns the Name parsed or NULL
2352 */
2353
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002354const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002355xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002356 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002357 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002358 int count = 0;
2359
2360 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002361
2362 /*
2363 * Accelerator for simple ASCII names
2364 */
2365 in = ctxt->input->cur;
2366 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2367 ((*in >= 0x41) && (*in <= 0x5A)) ||
2368 (*in == '_') || (*in == ':')) {
2369 in++;
2370 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2371 ((*in >= 0x41) && (*in <= 0x5A)) ||
2372 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002373 (*in == '_') || (*in == '-') ||
2374 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002375 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002376 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002377 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002379 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002380 ctxt->nbChars += count;
2381 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002382 if (ret == NULL)
2383 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002384 return(ret);
2385 }
2386 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002387 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002388}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389
Daniel Veillard46de64e2002-05-29 08:21:33 +00002390/**
2391 * xmlParseNameAndCompare:
2392 * @ctxt: an XML parser context
2393 *
2394 * parse an XML name and compares for match
2395 * (specialized for endtag parsing)
2396 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002397 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2398 * and the name for mismatch
2399 */
2400
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002401static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002402xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002403 register const xmlChar *cmp = other;
2404 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002405 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002406
2407 GROW;
2408
2409 in = ctxt->input->cur;
2410 while (*in != 0 && *in == *cmp) {
2411 ++in;
2412 ++cmp;
2413 }
William M. Brack76e95df2003-10-18 16:20:14 +00002414 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002415 /* success */
2416 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002417 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002418 }
2419 /* failure (or end of input buffer), check with full function */
2420 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002421 /* strings coming from the dictionnary direct compare possible */
2422 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002423 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
2425 return ret;
2426}
2427
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002429xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002430 int len = 0, l;
2431 int c;
2432 int count = 0;
2433
2434 /*
2435 * Handler for more complex cases
2436 */
2437 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002438 c = CUR_CHAR(l);
2439 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2440 (!IS_LETTER(c) && (c != '_') &&
2441 (c != ':'))) {
2442 return(NULL);
2443 }
2444
2445 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002446 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002447 (c == '.') || (c == '-') ||
2448 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002449 (IS_COMBINING(c)) ||
2450 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (count++ > 100) {
2452 count = 0;
2453 GROW;
2454 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002455 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002456 NEXTL(l);
2457 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002458 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002459 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002460}
2461
2462/**
2463 * xmlParseStringName:
2464 * @ctxt: an XML parser context
2465 * @str: a pointer to the string pointer (IN/OUT)
2466 *
2467 * parse an XML name.
2468 *
2469 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2470 * CombiningChar | Extender
2471 *
2472 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2473 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002474 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002476 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002477 * is updated to the current location in the string.
2478 */
2479
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002480static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002481xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2482 xmlChar buf[XML_MAX_NAMELEN + 5];
2483 const xmlChar *cur = *str;
2484 int len = 0, l;
2485 int c;
2486
2487 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002488 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002489 (c != ':')) {
2490 return(NULL);
2491 }
2492
William M. Brack871611b2003-10-18 04:53:14 +00002493 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002494 (c == '.') || (c == '-') ||
2495 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002496 (IS_COMBINING(c)) ||
2497 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 COPY_BUF(l,buf,len,c);
2499 cur += l;
2500 c = CUR_SCHAR(cur, l);
2501 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2502 /*
2503 * Okay someone managed to make a huge name, so he's ready to pay
2504 * for the processing speed.
2505 */
2506 xmlChar *buffer;
2507 int max = len * 2;
2508
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002509 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002510 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002511 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002512 return(NULL);
2513 }
2514 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002515 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002516 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002517 (c == '.') || (c == '-') ||
2518 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002519 (IS_COMBINING(c)) ||
2520 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 COPY_BUF(l,buffer,len,c);
2534 cur += l;
2535 c = CUR_SCHAR(cur, l);
2536 }
2537 buffer[len] = 0;
2538 *str = cur;
2539 return(buffer);
2540 }
2541 }
2542 *str = cur;
2543 return(xmlStrndup(buf, len));
2544}
2545
2546/**
2547 * xmlParseNmtoken:
2548 * @ctxt: an XML parser context
2549 *
2550 * parse an XML Nmtoken.
2551 *
2552 * [7] Nmtoken ::= (NameChar)+
2553 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002554 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002555 *
2556 * Returns the Nmtoken parsed or NULL
2557 */
2558
2559xmlChar *
2560xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2561 xmlChar buf[XML_MAX_NAMELEN + 5];
2562 int len = 0, l;
2563 int c;
2564 int count = 0;
2565
2566 GROW;
2567 c = CUR_CHAR(l);
2568
William M. Brack871611b2003-10-18 04:53:14 +00002569 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002570 (c == '.') || (c == '-') ||
2571 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002572 (IS_COMBINING(c)) ||
2573 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002574 if (count++ > 100) {
2575 count = 0;
2576 GROW;
2577 }
2578 COPY_BUF(l,buf,len,c);
2579 NEXTL(l);
2580 c = CUR_CHAR(l);
2581 if (len >= XML_MAX_NAMELEN) {
2582 /*
2583 * Okay someone managed to make a huge token, so he's ready to pay
2584 * for the processing speed.
2585 */
2586 xmlChar *buffer;
2587 int max = len * 2;
2588
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002589 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002590 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002591 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002592 return(NULL);
2593 }
2594 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002595 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002596 (c == '.') || (c == '-') ||
2597 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002598 (IS_COMBINING(c)) ||
2599 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002600 if (count++ > 100) {
2601 count = 0;
2602 GROW;
2603 }
2604 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002605 xmlChar *tmp;
2606
Owen Taylor3473f882001-02-23 17:55:21 +00002607 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002608 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002609 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002610 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002611 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002612 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002613 return(NULL);
2614 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002615 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 COPY_BUF(l,buffer,len,c);
2618 NEXTL(l);
2619 c = CUR_CHAR(l);
2620 }
2621 buffer[len] = 0;
2622 return(buffer);
2623 }
2624 }
2625 if (len == 0)
2626 return(NULL);
2627 return(xmlStrndup(buf, len));
2628}
2629
2630/**
2631 * xmlParseEntityValue:
2632 * @ctxt: an XML parser context
2633 * @orig: if non-NULL store a copy of the original entity value
2634 *
2635 * parse a value for ENTITY declarations
2636 *
2637 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2638 * "'" ([^%&'] | PEReference | Reference)* "'"
2639 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002640 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002641 */
2642
2643xmlChar *
2644xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2645 xmlChar *buf = NULL;
2646 int len = 0;
2647 int size = XML_PARSER_BUFFER_SIZE;
2648 int c, l;
2649 xmlChar stop;
2650 xmlChar *ret = NULL;
2651 const xmlChar *cur = NULL;
2652 xmlParserInputPtr input;
2653
2654 if (RAW == '"') stop = '"';
2655 else if (RAW == '\'') stop = '\'';
2656 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002657 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002658 return(NULL);
2659 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002660 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002661 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002662 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002663 return(NULL);
2664 }
2665
2666 /*
2667 * The content of the entity definition is copied in a buffer.
2668 */
2669
2670 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2671 input = ctxt->input;
2672 GROW;
2673 NEXT;
2674 c = CUR_CHAR(l);
2675 /*
2676 * NOTE: 4.4.5 Included in Literal
2677 * When a parameter entity reference appears in a literal entity
2678 * value, ... a single or double quote character in the replacement
2679 * text is always treated as a normal data character and will not
2680 * terminate the literal.
2681 * In practice it means we stop the loop only when back at parsing
2682 * the initial entity and the quote is found
2683 */
William M. Brack871611b2003-10-18 04:53:14 +00002684 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002685 (ctxt->input != input))) {
2686 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002687 xmlChar *tmp;
2688
Owen Taylor3473f882001-02-23 17:55:21 +00002689 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002690 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2691 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002692 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002693 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002694 return(NULL);
2695 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002696 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002697 }
2698 COPY_BUF(l,buf,len,c);
2699 NEXTL(l);
2700 /*
2701 * Pop-up of finished entities.
2702 */
2703 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2704 xmlPopInput(ctxt);
2705
2706 GROW;
2707 c = CUR_CHAR(l);
2708 if (c == 0) {
2709 GROW;
2710 c = CUR_CHAR(l);
2711 }
2712 }
2713 buf[len] = 0;
2714
2715 /*
2716 * Raise problem w.r.t. '&' and '%' being used in non-entities
2717 * reference constructs. Note Charref will be handled in
2718 * xmlStringDecodeEntities()
2719 */
2720 cur = buf;
2721 while (*cur != 0) { /* non input consuming */
2722 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2723 xmlChar *name;
2724 xmlChar tmp = *cur;
2725
2726 cur++;
2727 name = xmlParseStringName(ctxt, &cur);
2728 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002729 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002730 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002731 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002732 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002733 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2734 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002735 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002736 }
2737 if (name != NULL)
2738 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002739 if (*cur == 0)
2740 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 }
2742 cur++;
2743 }
2744
2745 /*
2746 * Then PEReference entities are substituted.
2747 */
2748 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002749 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002750 xmlFree(buf);
2751 } else {
2752 NEXT;
2753 /*
2754 * NOTE: 4.4.7 Bypassed
2755 * When a general entity reference appears in the EntityValue in
2756 * an entity declaration, it is bypassed and left as is.
2757 * so XML_SUBSTITUTE_REF is not set here.
2758 */
2759 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2760 0, 0, 0);
2761 if (orig != NULL)
2762 *orig = buf;
2763 else
2764 xmlFree(buf);
2765 }
2766
2767 return(ret);
2768}
2769
2770/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002771 * xmlParseAttValueComplex:
2772 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002773 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002774 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002775 *
2776 * parse a value for an attribute, this is the fallback function
2777 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002778 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002779 *
2780 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2781 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002782static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002783xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002784 xmlChar limit = 0;
2785 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002786 int len = 0;
2787 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002788 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002789 xmlChar *current = NULL;
2790 xmlEntityPtr ent;
2791
Owen Taylor3473f882001-02-23 17:55:21 +00002792 if (NXT(0) == '"') {
2793 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2794 limit = '"';
2795 NEXT;
2796 } else if (NXT(0) == '\'') {
2797 limit = '\'';
2798 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2799 NEXT;
2800 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002801 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002802 return(NULL);
2803 }
2804
2805 /*
2806 * allocate a translation buffer.
2807 */
2808 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002809 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002810 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002811
2812 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002813 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002814 */
2815 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002816 while ((NXT(0) != limit) && /* checked */
2817 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002818 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002819 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002820 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (NXT(1) == '#') {
2822 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002823
Owen Taylor3473f882001-02-23 17:55:21 +00002824 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002825 if (ctxt->replaceEntities) {
2826 if (len > buf_size - 10) {
2827 growBuffer(buf);
2828 }
2829 buf[len++] = '&';
2830 } else {
2831 /*
2832 * The reparsing will be done in xmlStringGetNodeList()
2833 * called by the attribute() function in SAX.c
2834 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002835 if (len > buf_size - 10) {
2836 growBuffer(buf);
2837 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002838 buf[len++] = '&';
2839 buf[len++] = '#';
2840 buf[len++] = '3';
2841 buf[len++] = '8';
2842 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002843 }
2844 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002845 if (len > buf_size - 10) {
2846 growBuffer(buf);
2847 }
Owen Taylor3473f882001-02-23 17:55:21 +00002848 len += xmlCopyChar(0, &buf[len], val);
2849 }
2850 } else {
2851 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002852 if ((ent != NULL) &&
2853 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2854 if (len > buf_size - 10) {
2855 growBuffer(buf);
2856 }
2857 if ((ctxt->replaceEntities == 0) &&
2858 (ent->content[0] == '&')) {
2859 buf[len++] = '&';
2860 buf[len++] = '#';
2861 buf[len++] = '3';
2862 buf[len++] = '8';
2863 buf[len++] = ';';
2864 } else {
2865 buf[len++] = ent->content[0];
2866 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002867 } else if ((ent != NULL) &&
2868 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869 xmlChar *rep;
2870
2871 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2872 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002873 XML_SUBSTITUTE_REF,
2874 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (rep != NULL) {
2876 current = rep;
2877 while (*current != 0) { /* non input consuming */
2878 buf[len++] = *current++;
2879 if (len > buf_size - 10) {
2880 growBuffer(buf);
2881 }
2882 }
2883 xmlFree(rep);
2884 }
2885 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002886 if (len > buf_size - 10) {
2887 growBuffer(buf);
2888 }
Owen Taylor3473f882001-02-23 17:55:21 +00002889 if (ent->content != NULL)
2890 buf[len++] = ent->content[0];
2891 }
2892 } else if (ent != NULL) {
2893 int i = xmlStrlen(ent->name);
2894 const xmlChar *cur = ent->name;
2895
2896 /*
2897 * This may look absurd but is needed to detect
2898 * entities problems
2899 */
2900 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2901 (ent->content != NULL)) {
2902 xmlChar *rep;
2903 rep = xmlStringDecodeEntities(ctxt, ent->content,
2904 XML_SUBSTITUTE_REF, 0, 0, 0);
2905 if (rep != NULL)
2906 xmlFree(rep);
2907 }
2908
2909 /*
2910 * Just output the reference
2911 */
2912 buf[len++] = '&';
2913 if (len > buf_size - i - 10) {
2914 growBuffer(buf);
2915 }
2916 for (;i > 0;i--)
2917 buf[len++] = *cur++;
2918 buf[len++] = ';';
2919 }
2920 }
2921 } else {
2922 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002923 if ((len != 0) || (!normalize)) {
2924 if ((!normalize) || (!in_space)) {
2925 COPY_BUF(l,buf,len,0x20);
2926 if (len > buf_size - 10) {
2927 growBuffer(buf);
2928 }
2929 }
2930 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002931 }
2932 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002933 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002934 COPY_BUF(l,buf,len,c);
2935 if (len > buf_size - 10) {
2936 growBuffer(buf);
2937 }
2938 }
2939 NEXTL(l);
2940 }
2941 GROW;
2942 c = CUR_CHAR(l);
2943 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 if ((in_space) && (normalize)) {
2945 while (buf[len - 1] == 0x20) len--;
2946 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002947 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002949 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002950 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002951 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2952 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002953 } else
2954 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002955 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002956 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002957
2958mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002959 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002960 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961}
2962
2963/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002964 * xmlParseAttValue:
2965 * @ctxt: an XML parser context
2966 *
2967 * parse a value for an attribute
2968 * Note: the parser won't do substitution of entities here, this
2969 * will be handled later in xmlStringGetNodeList
2970 *
2971 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2972 * "'" ([^<&'] | Reference)* "'"
2973 *
2974 * 3.3.3 Attribute-Value Normalization:
2975 * Before the value of an attribute is passed to the application or
2976 * checked for validity, the XML processor must normalize it as follows:
2977 * - a character reference is processed by appending the referenced
2978 * character to the attribute value
2979 * - an entity reference is processed by recursively processing the
2980 * replacement text of the entity
2981 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2982 * appending #x20 to the normalized value, except that only a single
2983 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2984 * parsed entity or the literal entity value of an internal parsed entity
2985 * - other characters are processed by appending them to the normalized value
2986 * If the declared value is not CDATA, then the XML processor must further
2987 * process the normalized attribute value by discarding any leading and
2988 * trailing space (#x20) characters, and by replacing sequences of space
2989 * (#x20) characters by a single space (#x20) character.
2990 * All attributes for which no declaration has been read should be treated
2991 * by a non-validating parser as if declared CDATA.
2992 *
2993 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2994 */
2995
2996
2997xmlChar *
2998xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003000}
3001
3002/**
Owen Taylor3473f882001-02-23 17:55:21 +00003003 * xmlParseSystemLiteral:
3004 * @ctxt: an XML parser context
3005 *
3006 * parse an XML Literal
3007 *
3008 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3009 *
3010 * Returns the SystemLiteral parsed or NULL
3011 */
3012
3013xmlChar *
3014xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3015 xmlChar *buf = NULL;
3016 int len = 0;
3017 int size = XML_PARSER_BUFFER_SIZE;
3018 int cur, l;
3019 xmlChar stop;
3020 int state = ctxt->instate;
3021 int count = 0;
3022
3023 SHRINK;
3024 if (RAW == '"') {
3025 NEXT;
3026 stop = '"';
3027 } else if (RAW == '\'') {
3028 NEXT;
3029 stop = '\'';
3030 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003031 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003032 return(NULL);
3033 }
3034
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003035 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
3040 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3041 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003042 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003044 xmlChar *tmp;
3045
Owen Taylor3473f882001-02-23 17:55:21 +00003046 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003047 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3048 if (tmp == NULL) {
3049 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003050 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003051 ctxt->instate = (xmlParserInputState) state;
3052 return(NULL);
3053 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003054 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003055 }
3056 count++;
3057 if (count > 50) {
3058 GROW;
3059 count = 0;
3060 }
3061 COPY_BUF(l,buf,len,cur);
3062 NEXTL(l);
3063 cur = CUR_CHAR(l);
3064 if (cur == 0) {
3065 GROW;
3066 SHRINK;
3067 cur = CUR_CHAR(l);
3068 }
3069 }
3070 buf[len] = 0;
3071 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003072 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003073 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003074 } else {
3075 NEXT;
3076 }
3077 return(buf);
3078}
3079
3080/**
3081 * xmlParsePubidLiteral:
3082 * @ctxt: an XML parser context
3083 *
3084 * parse an XML public literal
3085 *
3086 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3087 *
3088 * Returns the PubidLiteral parsed or NULL.
3089 */
3090
3091xmlChar *
3092xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3093 xmlChar *buf = NULL;
3094 int len = 0;
3095 int size = XML_PARSER_BUFFER_SIZE;
3096 xmlChar cur;
3097 xmlChar stop;
3098 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003099 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003100
3101 SHRINK;
3102 if (RAW == '"') {
3103 NEXT;
3104 stop = '"';
3105 } else if (RAW == '\'') {
3106 NEXT;
3107 stop = '\'';
3108 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003109 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003110 return(NULL);
3111 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003112 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003113 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003114 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003115 return(NULL);
3116 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003117 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003119 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003120 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003121 xmlChar *tmp;
3122
Owen Taylor3473f882001-02-23 17:55:21 +00003123 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003124 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3125 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003127 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003128 return(NULL);
3129 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003130 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003131 }
3132 buf[len++] = cur;
3133 count++;
3134 if (count > 50) {
3135 GROW;
3136 count = 0;
3137 }
3138 NEXT;
3139 cur = CUR;
3140 if (cur == 0) {
3141 GROW;
3142 SHRINK;
3143 cur = CUR;
3144 }
3145 }
3146 buf[len] = 0;
3147 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003148 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003149 } else {
3150 NEXT;
3151 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003152 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003153 return(buf);
3154}
3155
Daniel Veillard48b2f892001-02-25 16:11:03 +00003156void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003157/**
3158 * xmlParseCharData:
3159 * @ctxt: an XML parser context
3160 * @cdata: int indicating whether we are within a CDATA section
3161 *
3162 * parse a CharData section.
3163 * if we are within a CDATA section ']]>' marks an end of section.
3164 *
3165 * The right angle bracket (>) may be represented using the string "&gt;",
3166 * and must, for compatibility, be escaped using "&gt;" or a character
3167 * reference when it appears in the string "]]>" in content, when that
3168 * string is not marking the end of a CDATA section.
3169 *
3170 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3171 */
3172
3173void
3174xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003175 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003176 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003177 int line = ctxt->input->line;
3178 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003179
3180 SHRINK;
3181 GROW;
3182 /*
3183 * Accelerated common case where input don't need to be
3184 * modified before passing it to the handler.
3185 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003186 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003187 in = ctxt->input->cur;
3188 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003189get_more_space:
3190 while (*in == 0x20) in++;
3191 if (*in == 0xA) {
3192 ctxt->input->line++;
3193 in++;
3194 while (*in == 0xA) {
3195 ctxt->input->line++;
3196 in++;
3197 }
3198 goto get_more_space;
3199 }
3200 if (*in == '<') {
3201 nbchar = in - ctxt->input->cur;
3202 if (nbchar > 0) {
3203 const xmlChar *tmp = ctxt->input->cur;
3204 ctxt->input->cur = in;
3205
Daniel Veillard34099b42004-11-04 17:34:35 +00003206 if ((ctxt->sax != NULL) &&
3207 (ctxt->sax->ignorableWhitespace !=
3208 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003209 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3210 ctxt->sax->ignorableWhitespace(ctxt->userData,
3211 tmp, nbchar);
3212 } else if (ctxt->sax->characters != NULL)
3213 ctxt->sax->characters(ctxt->userData,
3214 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003215 } else if ((ctxt->sax != NULL) &&
3216 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003217 ctxt->sax->characters(ctxt->userData,
3218 tmp, nbchar);
3219 }
3220 }
3221 return;
3222 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003223get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003224 while (((*in > ']') && (*in <= 0x7F)) ||
3225 ((*in > '&') && (*in < '<')) ||
3226 ((*in > '<') && (*in < ']')) ||
3227 ((*in >= 0x20) && (*in < '&')) ||
3228 (*in == 0x09))
3229 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003230 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003231 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003232 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003233 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003234 ctxt->input->line++;
3235 in++;
3236 }
3237 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003238 }
3239 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003240 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003241 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003242 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003243 return;
3244 }
3245 in++;
3246 goto get_more;
3247 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003248 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003249 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003250 if ((ctxt->sax != NULL) &&
3251 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003252 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003253 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003254 const xmlChar *tmp = ctxt->input->cur;
3255 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003256
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003257 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003258 ctxt->sax->ignorableWhitespace(ctxt->userData,
3259 tmp, nbchar);
3260 } else if (ctxt->sax->characters != NULL)
3261 ctxt->sax->characters(ctxt->userData,
3262 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003263 line = ctxt->input->line;
3264 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003265 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003266 if (ctxt->sax->characters != NULL)
3267 ctxt->sax->characters(ctxt->userData,
3268 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003269 line = ctxt->input->line;
3270 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003271 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003272 }
3273 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003274 if (*in == 0xD) {
3275 in++;
3276 if (*in == 0xA) {
3277 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003278 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003279 ctxt->input->line++;
3280 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003281 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003282 in--;
3283 }
3284 if (*in == '<') {
3285 return;
3286 }
3287 if (*in == '&') {
3288 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003289 }
3290 SHRINK;
3291 GROW;
3292 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003293 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003294 nbchar = 0;
3295 }
Daniel Veillard50582112001-03-26 22:52:16 +00003296 ctxt->input->line = line;
3297 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 xmlParseCharDataComplex(ctxt, cdata);
3299}
3300
Daniel Veillard01c13b52002-12-10 15:19:08 +00003301/**
3302 * xmlParseCharDataComplex:
3303 * @ctxt: an XML parser context
3304 * @cdata: int indicating whether we are within a CDATA section
3305 *
3306 * parse a CharData section.this is the fallback function
3307 * of xmlParseCharData() when the parsing requires handling
3308 * of non-ASCII characters.
3309 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310void
3311xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003312 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3313 int nbchar = 0;
3314 int cur, l;
3315 int count = 0;
3316
3317 SHRINK;
3318 GROW;
3319 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003320 while ((cur != '<') && /* checked */
3321 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003322 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003323 if ((cur == ']') && (NXT(1) == ']') &&
3324 (NXT(2) == '>')) {
3325 if (cdata) break;
3326 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003327 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003328 }
3329 }
3330 COPY_BUF(l,buf,nbchar,cur);
3331 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003332 buf[nbchar] = 0;
3333
Owen Taylor3473f882001-02-23 17:55:21 +00003334 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003335 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003336 */
3337 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003338 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (ctxt->sax->ignorableWhitespace != NULL)
3340 ctxt->sax->ignorableWhitespace(ctxt->userData,
3341 buf, nbchar);
3342 } else {
3343 if (ctxt->sax->characters != NULL)
3344 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3345 }
3346 }
3347 nbchar = 0;
3348 }
3349 count++;
3350 if (count > 50) {
3351 GROW;
3352 count = 0;
3353 }
3354 NEXTL(l);
3355 cur = CUR_CHAR(l);
3356 }
3357 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003358 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003359 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003360 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003361 */
3362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003363 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003364 if (ctxt->sax->ignorableWhitespace != NULL)
3365 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3366 } else {
3367 if (ctxt->sax->characters != NULL)
3368 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3369 }
3370 }
3371 }
3372}
3373
3374/**
3375 * xmlParseExternalID:
3376 * @ctxt: an XML parser context
3377 * @publicID: a xmlChar** receiving PubidLiteral
3378 * @strict: indicate whether we should restrict parsing to only
3379 * production [75], see NOTE below
3380 *
3381 * Parse an External ID or a Public ID
3382 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003383 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003384 * 'PUBLIC' S PubidLiteral S SystemLiteral
3385 *
3386 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3387 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3388 *
3389 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3390 *
3391 * Returns the function returns SystemLiteral and in the second
3392 * case publicID receives PubidLiteral, is strict is off
3393 * it is possible to return NULL and have publicID set.
3394 */
3395
3396xmlChar *
3397xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3398 xmlChar *URI = NULL;
3399
3400 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003401
3402 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003403 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003404 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003405 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003406 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3407 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003408 }
3409 SKIP_BLANKS;
3410 URI = xmlParseSystemLiteral(ctxt);
3411 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003412 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003413 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003414 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003415 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003416 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003418 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003419 }
3420 SKIP_BLANKS;
3421 *publicID = xmlParsePubidLiteral(ctxt);
3422 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 }
3425 if (strict) {
3426 /*
3427 * We don't handle [83] so "S SystemLiteral" is required.
3428 */
William M. Brack76e95df2003-10-18 16:20:14 +00003429 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003431 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003432 }
3433 } else {
3434 /*
3435 * We handle [83] so we return immediately, if
3436 * "S SystemLiteral" is not detected. From a purely parsing
3437 * point of view that's a nice mess.
3438 */
3439 const xmlChar *ptr;
3440 GROW;
3441
3442 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003443 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003444
William M. Brack76e95df2003-10-18 16:20:14 +00003445 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003446 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3447 }
3448 SKIP_BLANKS;
3449 URI = xmlParseSystemLiteral(ctxt);
3450 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003451 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003452 }
3453 }
3454 return(URI);
3455}
3456
3457/**
3458 * xmlParseComment:
3459 * @ctxt: an XML parser context
3460 *
3461 * Skip an XML (SGML) comment <!-- .... -->
3462 * The spec says that "For compatibility, the string "--" (double-hyphen)
3463 * must not occur within comments. "
3464 *
3465 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3466 */
3467void
3468xmlParseComment(xmlParserCtxtPtr ctxt) {
3469 xmlChar *buf = NULL;
3470 int len;
3471 int size = XML_PARSER_BUFFER_SIZE;
3472 int q, ql;
3473 int r, rl;
3474 int cur, l;
3475 xmlParserInputState state;
3476 xmlParserInputPtr input = ctxt->input;
3477 int count = 0;
3478
3479 /*
3480 * Check that there is a comment right here.
3481 */
3482 if ((RAW != '<') || (NXT(1) != '!') ||
3483 (NXT(2) != '-') || (NXT(3) != '-')) return;
3484
3485 state = ctxt->instate;
3486 ctxt->instate = XML_PARSER_COMMENT;
3487 SHRINK;
3488 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003489 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003490 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003491 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003492 ctxt->instate = state;
3493 return;
3494 }
3495 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003496 if (q == 0)
3497 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003498 NEXTL(ql);
3499 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003500 if (r == 0)
3501 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 NEXTL(rl);
3503 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003504 if (cur == 0)
3505 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003506 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003507 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003508 ((cur != '>') ||
3509 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003510 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003511 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003512 }
3513 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003514 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003515 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003516 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3517 if (new_buf == NULL) {
3518 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003519 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003520 ctxt->instate = state;
3521 return;
3522 }
William M. Bracka3215c72004-07-31 16:24:01 +00003523 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003524 }
3525 COPY_BUF(ql,buf,len,q);
3526 q = r;
3527 ql = rl;
3528 r = cur;
3529 rl = l;
3530
3531 count++;
3532 if (count > 50) {
3533 GROW;
3534 count = 0;
3535 }
3536 NEXTL(l);
3537 cur = CUR_CHAR(l);
3538 if (cur == 0) {
3539 SHRINK;
3540 GROW;
3541 cur = CUR_CHAR(l);
3542 }
3543 }
3544 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003545 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003546 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003547 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 xmlFree(buf);
3549 } else {
3550 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003551 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3552 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003553 }
3554 NEXT;
3555 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3556 (!ctxt->disableSAX))
3557 ctxt->sax->comment(ctxt->userData, buf);
3558 xmlFree(buf);
3559 }
3560 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003561 return;
3562not_terminated:
3563 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3564 "Comment not terminated\n", NULL);
3565 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003566}
3567
3568/**
3569 * xmlParsePITarget:
3570 * @ctxt: an XML parser context
3571 *
3572 * parse the name of a PI
3573 *
3574 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3575 *
3576 * Returns the PITarget name or NULL
3577 */
3578
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003579const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003580xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003581 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003582
3583 name = xmlParseName(ctxt);
3584 if ((name != NULL) &&
3585 ((name[0] == 'x') || (name[0] == 'X')) &&
3586 ((name[1] == 'm') || (name[1] == 'M')) &&
3587 ((name[2] == 'l') || (name[2] == 'L'))) {
3588 int i;
3589 if ((name[0] == 'x') && (name[1] == 'm') &&
3590 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003591 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003592 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003593 return(name);
3594 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003595 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003596 return(name);
3597 }
3598 for (i = 0;;i++) {
3599 if (xmlW3CPIs[i] == NULL) break;
3600 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3601 return(name);
3602 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003603 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3604 "xmlParsePITarget: invalid name prefix 'xml'\n",
3605 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 }
3607 return(name);
3608}
3609
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003610#ifdef LIBXML_CATALOG_ENABLED
3611/**
3612 * xmlParseCatalogPI:
3613 * @ctxt: an XML parser context
3614 * @catalog: the PI value string
3615 *
3616 * parse an XML Catalog Processing Instruction.
3617 *
3618 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3619 *
3620 * Occurs only if allowed by the user and if happening in the Misc
3621 * part of the document before any doctype informations
3622 * This will add the given catalog to the parsing context in order
3623 * to be used if there is a resolution need further down in the document
3624 */
3625
3626static void
3627xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3628 xmlChar *URL = NULL;
3629 const xmlChar *tmp, *base;
3630 xmlChar marker;
3631
3632 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003633 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003634 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3635 goto error;
3636 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003637 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003638 if (*tmp != '=') {
3639 return;
3640 }
3641 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003642 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003643 marker = *tmp;
3644 if ((marker != '\'') && (marker != '"'))
3645 goto error;
3646 tmp++;
3647 base = tmp;
3648 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3649 if (*tmp == 0)
3650 goto error;
3651 URL = xmlStrndup(base, tmp - base);
3652 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003653 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003654 if (*tmp != 0)
3655 goto error;
3656
3657 if (URL != NULL) {
3658 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3659 xmlFree(URL);
3660 }
3661 return;
3662
3663error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003664 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3665 "Catalog PI syntax error: %s\n",
3666 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003667 if (URL != NULL)
3668 xmlFree(URL);
3669}
3670#endif
3671
Owen Taylor3473f882001-02-23 17:55:21 +00003672/**
3673 * xmlParsePI:
3674 * @ctxt: an XML parser context
3675 *
3676 * parse an XML Processing Instruction.
3677 *
3678 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3679 *
3680 * The processing is transfered to SAX once parsed.
3681 */
3682
3683void
3684xmlParsePI(xmlParserCtxtPtr ctxt) {
3685 xmlChar *buf = NULL;
3686 int len = 0;
3687 int size = XML_PARSER_BUFFER_SIZE;
3688 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003689 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003690 xmlParserInputState state;
3691 int count = 0;
3692
3693 if ((RAW == '<') && (NXT(1) == '?')) {
3694 xmlParserInputPtr input = ctxt->input;
3695 state = ctxt->instate;
3696 ctxt->instate = XML_PARSER_PI;
3697 /*
3698 * this is a Processing Instruction.
3699 */
3700 SKIP(2);
3701 SHRINK;
3702
3703 /*
3704 * Parse the target name and check for special support like
3705 * namespace.
3706 */
3707 target = xmlParsePITarget(ctxt);
3708 if (target != NULL) {
3709 if ((RAW == '?') && (NXT(1) == '>')) {
3710 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003711 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3712 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 SKIP(2);
3715
3716 /*
3717 * SAX: PI detected.
3718 */
3719 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3720 (ctxt->sax->processingInstruction != NULL))
3721 ctxt->sax->processingInstruction(ctxt->userData,
3722 target, NULL);
3723 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003724 return;
3725 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003726 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003727 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003728 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003729 ctxt->instate = state;
3730 return;
3731 }
3732 cur = CUR;
3733 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003734 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3735 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 SKIP_BLANKS;
3738 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003739 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003740 ((cur != '?') || (NXT(1) != '>'))) {
3741 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003742 xmlChar *tmp;
3743
Owen Taylor3473f882001-02-23 17:55:21 +00003744 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003745 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3746 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003747 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003748 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003749 ctxt->instate = state;
3750 return;
3751 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 count++;
3755 if (count > 50) {
3756 GROW;
3757 count = 0;
3758 }
3759 COPY_BUF(l,buf,len,cur);
3760 NEXTL(l);
3761 cur = CUR_CHAR(l);
3762 if (cur == 0) {
3763 SHRINK;
3764 GROW;
3765 cur = CUR_CHAR(l);
3766 }
3767 }
3768 buf[len] = 0;
3769 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003770 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3771 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003772 } else {
3773 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003774 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3775 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003776 }
3777 SKIP(2);
3778
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003779#ifdef LIBXML_CATALOG_ENABLED
3780 if (((state == XML_PARSER_MISC) ||
3781 (state == XML_PARSER_START)) &&
3782 (xmlStrEqual(target, XML_CATALOG_PI))) {
3783 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3784 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3785 (allow == XML_CATA_ALLOW_ALL))
3786 xmlParseCatalogPI(ctxt, buf);
3787 }
3788#endif
3789
3790
Owen Taylor3473f882001-02-23 17:55:21 +00003791 /*
3792 * SAX: PI detected.
3793 */
3794 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3795 (ctxt->sax->processingInstruction != NULL))
3796 ctxt->sax->processingInstruction(ctxt->userData,
3797 target, buf);
3798 }
3799 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003800 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003801 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 ctxt->instate = state;
3804 }
3805}
3806
3807/**
3808 * xmlParseNotationDecl:
3809 * @ctxt: an XML parser context
3810 *
3811 * parse a notation declaration
3812 *
3813 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3814 *
3815 * Hence there is actually 3 choices:
3816 * 'PUBLIC' S PubidLiteral
3817 * 'PUBLIC' S PubidLiteral S SystemLiteral
3818 * and 'SYSTEM' S SystemLiteral
3819 *
3820 * See the NOTE on xmlParseExternalID().
3821 */
3822
3823void
3824xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003825 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003826 xmlChar *Pubid;
3827 xmlChar *Systemid;
3828
Daniel Veillarda07050d2003-10-19 14:46:32 +00003829 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003830 xmlParserInputPtr input = ctxt->input;
3831 SHRINK;
3832 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003833 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003834 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3835 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return;
3837 }
3838 SKIP_BLANKS;
3839
Daniel Veillard76d66f42001-05-16 21:05:17 +00003840 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003841 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003842 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003843 return;
3844 }
William M. Brack76e95df2003-10-18 16:20:14 +00003845 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003846 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003847 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003848 return;
3849 }
3850 SKIP_BLANKS;
3851
3852 /*
3853 * Parse the IDs.
3854 */
3855 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3856 SKIP_BLANKS;
3857
3858 if (RAW == '>') {
3859 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003860 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3861 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003862 }
3863 NEXT;
3864 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3865 (ctxt->sax->notationDecl != NULL))
3866 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3867 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003868 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
Owen Taylor3473f882001-02-23 17:55:21 +00003870 if (Systemid != NULL) xmlFree(Systemid);
3871 if (Pubid != NULL) xmlFree(Pubid);
3872 }
3873}
3874
3875/**
3876 * xmlParseEntityDecl:
3877 * @ctxt: an XML parser context
3878 *
3879 * parse <!ENTITY declarations
3880 *
3881 * [70] EntityDecl ::= GEDecl | PEDecl
3882 *
3883 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3884 *
3885 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3886 *
3887 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3888 *
3889 * [74] PEDef ::= EntityValue | ExternalID
3890 *
3891 * [76] NDataDecl ::= S 'NDATA' S Name
3892 *
3893 * [ VC: Notation Declared ]
3894 * The Name must match the declared name of a notation.
3895 */
3896
3897void
3898xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003899 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 xmlChar *value = NULL;
3901 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003902 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003903 int isParameter = 0;
3904 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003905 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003906
3907 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003908 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003909 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003910 SHRINK;
3911 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003912 skipped = SKIP_BLANKS;
3913 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3915 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003916 }
Owen Taylor3473f882001-02-23 17:55:21 +00003917
3918 if (RAW == '%') {
3919 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003920 skipped = SKIP_BLANKS;
3921 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003922 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3923 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
Owen Taylor3473f882001-02-23 17:55:21 +00003925 isParameter = 1;
3926 }
3927
Daniel Veillard76d66f42001-05-16 21:05:17 +00003928 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003930 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3931 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003932 return;
3933 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003934 skipped = SKIP_BLANKS;
3935 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003936 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3937 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003938 }
Owen Taylor3473f882001-02-23 17:55:21 +00003939
Daniel Veillardf5582f12002-06-11 10:08:16 +00003940 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003941 /*
3942 * handle the various case of definitions...
3943 */
3944 if (isParameter) {
3945 if ((RAW == '"') || (RAW == '\'')) {
3946 value = xmlParseEntityValue(ctxt, &orig);
3947 if (value) {
3948 if ((ctxt->sax != NULL) &&
3949 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3950 ctxt->sax->entityDecl(ctxt->userData, name,
3951 XML_INTERNAL_PARAMETER_ENTITY,
3952 NULL, NULL, value);
3953 }
3954 } else {
3955 URI = xmlParseExternalID(ctxt, &literal, 1);
3956 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003957 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003958 }
3959 if (URI) {
3960 xmlURIPtr uri;
3961
3962 uri = xmlParseURI((const char *) URI);
3963 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003964 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3965 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003966 /*
3967 * This really ought to be a well formedness error
3968 * but the XML Core WG decided otherwise c.f. issue
3969 * E26 of the XML erratas.
3970 */
Owen Taylor3473f882001-02-23 17:55:21 +00003971 } else {
3972 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003973 /*
3974 * Okay this is foolish to block those but not
3975 * invalid URIs.
3976 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003977 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003978 } else {
3979 if ((ctxt->sax != NULL) &&
3980 (!ctxt->disableSAX) &&
3981 (ctxt->sax->entityDecl != NULL))
3982 ctxt->sax->entityDecl(ctxt->userData, name,
3983 XML_EXTERNAL_PARAMETER_ENTITY,
3984 literal, URI, NULL);
3985 }
3986 xmlFreeURI(uri);
3987 }
3988 }
3989 }
3990 } else {
3991 if ((RAW == '"') || (RAW == '\'')) {
3992 value = xmlParseEntityValue(ctxt, &orig);
3993 if ((ctxt->sax != NULL) &&
3994 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3995 ctxt->sax->entityDecl(ctxt->userData, name,
3996 XML_INTERNAL_GENERAL_ENTITY,
3997 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003998 /*
3999 * For expat compatibility in SAX mode.
4000 */
4001 if ((ctxt->myDoc == NULL) ||
4002 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4003 if (ctxt->myDoc == NULL) {
4004 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4005 }
4006 if (ctxt->myDoc->intSubset == NULL)
4007 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4008 BAD_CAST "fake", NULL, NULL);
4009
Daniel Veillard1af9a412003-08-20 22:54:39 +00004010 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4011 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 } else {
4014 URI = xmlParseExternalID(ctxt, &literal, 1);
4015 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004016 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004017 }
4018 if (URI) {
4019 xmlURIPtr uri;
4020
4021 uri = xmlParseURI((const char *)URI);
4022 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004023 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4024 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004025 /*
4026 * This really ought to be a well formedness error
4027 * but the XML Core WG decided otherwise c.f. issue
4028 * E26 of the XML erratas.
4029 */
Owen Taylor3473f882001-02-23 17:55:21 +00004030 } else {
4031 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004032 /*
4033 * Okay this is foolish to block those but not
4034 * invalid URIs.
4035 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004036 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
4038 xmlFreeURI(uri);
4039 }
4040 }
William M. Brack76e95df2003-10-18 16:20:14 +00004041 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004042 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4043 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004046 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004047 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004048 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4050 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004051 }
4052 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004053 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4055 (ctxt->sax->unparsedEntityDecl != NULL))
4056 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4057 literal, URI, ndata);
4058 } else {
4059 if ((ctxt->sax != NULL) &&
4060 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4061 ctxt->sax->entityDecl(ctxt->userData, name,
4062 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4063 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004064 /*
4065 * For expat compatibility in SAX mode.
4066 * assuming the entity repalcement was asked for
4067 */
4068 if ((ctxt->replaceEntities != 0) &&
4069 ((ctxt->myDoc == NULL) ||
4070 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4071 if (ctxt->myDoc == NULL) {
4072 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4073 }
4074
4075 if (ctxt->myDoc->intSubset == NULL)
4076 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4077 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004078 xmlSAX2EntityDecl(ctxt, name,
4079 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4080 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004081 }
Owen Taylor3473f882001-02-23 17:55:21 +00004082 }
4083 }
4084 }
4085 SKIP_BLANKS;
4086 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004087 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004088 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004089 } else {
4090 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004091 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4092 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004093 }
4094 NEXT;
4095 }
4096 if (orig != NULL) {
4097 /*
4098 * Ugly mechanism to save the raw entity value.
4099 */
4100 xmlEntityPtr cur = NULL;
4101
4102 if (isParameter) {
4103 if ((ctxt->sax != NULL) &&
4104 (ctxt->sax->getParameterEntity != NULL))
4105 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4106 } else {
4107 if ((ctxt->sax != NULL) &&
4108 (ctxt->sax->getEntity != NULL))
4109 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004110 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004111 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004112 }
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
4114 if (cur != NULL) {
4115 if (cur->orig != NULL)
4116 xmlFree(orig);
4117 else
4118 cur->orig = orig;
4119 } else
4120 xmlFree(orig);
4121 }
Owen Taylor3473f882001-02-23 17:55:21 +00004122 if (value != NULL) xmlFree(value);
4123 if (URI != NULL) xmlFree(URI);
4124 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004125 }
4126}
4127
4128/**
4129 * xmlParseDefaultDecl:
4130 * @ctxt: an XML parser context
4131 * @value: Receive a possible fixed default value for the attribute
4132 *
4133 * Parse an attribute default declaration
4134 *
4135 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4136 *
4137 * [ VC: Required Attribute ]
4138 * if the default declaration is the keyword #REQUIRED, then the
4139 * attribute must be specified for all elements of the type in the
4140 * attribute-list declaration.
4141 *
4142 * [ VC: Attribute Default Legal ]
4143 * The declared default value must meet the lexical constraints of
4144 * the declared attribute type c.f. xmlValidateAttributeDecl()
4145 *
4146 * [ VC: Fixed Attribute Default ]
4147 * if an attribute has a default value declared with the #FIXED
4148 * keyword, instances of that attribute must match the default value.
4149 *
4150 * [ WFC: No < in Attribute Values ]
4151 * handled in xmlParseAttValue()
4152 *
4153 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4154 * or XML_ATTRIBUTE_FIXED.
4155 */
4156
4157int
4158xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4159 int val;
4160 xmlChar *ret;
4161
4162 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004163 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004164 SKIP(9);
4165 return(XML_ATTRIBUTE_REQUIRED);
4166 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004167 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004168 SKIP(8);
4169 return(XML_ATTRIBUTE_IMPLIED);
4170 }
4171 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004172 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004173 SKIP(6);
4174 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004175 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4177 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004178 }
4179 SKIP_BLANKS;
4180 }
4181 ret = xmlParseAttValue(ctxt);
4182 ctxt->instate = XML_PARSER_DTD;
4183 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004184 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004185 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004186 } else
4187 *value = ret;
4188 return(val);
4189}
4190
4191/**
4192 * xmlParseNotationType:
4193 * @ctxt: an XML parser context
4194 *
4195 * parse an Notation attribute type.
4196 *
4197 * Note: the leading 'NOTATION' S part has already being parsed...
4198 *
4199 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4200 *
4201 * [ VC: Notation Attributes ]
4202 * Values of this type must match one of the notation names included
4203 * in the declaration; all notation names in the declaration must be declared.
4204 *
4205 * Returns: the notation attribute tree built while parsing
4206 */
4207
4208xmlEnumerationPtr
4209xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004210 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4212
4213 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004214 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 return(NULL);
4216 }
4217 SHRINK;
4218 do {
4219 NEXT;
4220 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004221 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004222 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004223 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4224 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004225 return(ret);
4226 }
4227 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004228 if (cur == NULL) return(ret);
4229 if (last == NULL) ret = last = cur;
4230 else {
4231 last->next = cur;
4232 last = cur;
4233 }
4234 SKIP_BLANKS;
4235 } while (RAW == '|');
4236 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 if ((last != NULL) && (last != ret))
4239 xmlFreeEnumeration(last);
4240 return(ret);
4241 }
4242 NEXT;
4243 return(ret);
4244}
4245
4246/**
4247 * xmlParseEnumerationType:
4248 * @ctxt: an XML parser context
4249 *
4250 * parse an Enumeration attribute type.
4251 *
4252 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4253 *
4254 * [ VC: Enumeration ]
4255 * Values of this type must match one of the Nmtoken tokens in
4256 * the declaration
4257 *
4258 * Returns: the enumeration attribute tree built while parsing
4259 */
4260
4261xmlEnumerationPtr
4262xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4263 xmlChar *name;
4264 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4265
4266 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004267 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004268 return(NULL);
4269 }
4270 SHRINK;
4271 do {
4272 NEXT;
4273 SKIP_BLANKS;
4274 name = xmlParseNmtoken(ctxt);
4275 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004276 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004277 return(ret);
4278 }
4279 cur = xmlCreateEnumeration(name);
4280 xmlFree(name);
4281 if (cur == NULL) return(ret);
4282 if (last == NULL) ret = last = cur;
4283 else {
4284 last->next = cur;
4285 last = cur;
4286 }
4287 SKIP_BLANKS;
4288 } while (RAW == '|');
4289 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 return(ret);
4292 }
4293 NEXT;
4294 return(ret);
4295}
4296
4297/**
4298 * xmlParseEnumeratedType:
4299 * @ctxt: an XML parser context
4300 * @tree: the enumeration tree built while parsing
4301 *
4302 * parse an Enumerated attribute type.
4303 *
4304 * [57] EnumeratedType ::= NotationType | Enumeration
4305 *
4306 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4307 *
4308 *
4309 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4310 */
4311
4312int
4313xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004314 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004315 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004316 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4318 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004319 return(0);
4320 }
4321 SKIP_BLANKS;
4322 *tree = xmlParseNotationType(ctxt);
4323 if (*tree == NULL) return(0);
4324 return(XML_ATTRIBUTE_NOTATION);
4325 }
4326 *tree = xmlParseEnumerationType(ctxt);
4327 if (*tree == NULL) return(0);
4328 return(XML_ATTRIBUTE_ENUMERATION);
4329}
4330
4331/**
4332 * xmlParseAttributeType:
4333 * @ctxt: an XML parser context
4334 * @tree: the enumeration tree built while parsing
4335 *
4336 * parse the Attribute list def for an element
4337 *
4338 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4339 *
4340 * [55] StringType ::= 'CDATA'
4341 *
4342 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4343 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4344 *
4345 * Validity constraints for attribute values syntax are checked in
4346 * xmlValidateAttributeValue()
4347 *
4348 * [ VC: ID ]
4349 * Values of type ID must match the Name production. A name must not
4350 * appear more than once in an XML document as a value of this type;
4351 * i.e., ID values must uniquely identify the elements which bear them.
4352 *
4353 * [ VC: One ID per Element Type ]
4354 * No element type may have more than one ID attribute specified.
4355 *
4356 * [ VC: ID Attribute Default ]
4357 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4358 *
4359 * [ VC: IDREF ]
4360 * Values of type IDREF must match the Name production, and values
4361 * of type IDREFS must match Names; each IDREF Name must match the value
4362 * of an ID attribute on some element in the XML document; i.e. IDREF
4363 * values must match the value of some ID attribute.
4364 *
4365 * [ VC: Entity Name ]
4366 * Values of type ENTITY must match the Name production, values
4367 * of type ENTITIES must match Names; each Entity Name must match the
4368 * name of an unparsed entity declared in the DTD.
4369 *
4370 * [ VC: Name Token ]
4371 * Values of type NMTOKEN must match the Nmtoken production; values
4372 * of type NMTOKENS must match Nmtokens.
4373 *
4374 * Returns the attribute type
4375 */
4376int
4377xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4378 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004379 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004380 SKIP(5);
4381 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004382 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 SKIP(6);
4384 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004385 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004386 SKIP(5);
4387 return(XML_ATTRIBUTE_IDREF);
4388 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4389 SKIP(2);
4390 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004391 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004392 SKIP(6);
4393 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004394 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004395 SKIP(8);
4396 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004397 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004398 SKIP(8);
4399 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004400 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004401 SKIP(7);
4402 return(XML_ATTRIBUTE_NMTOKEN);
4403 }
4404 return(xmlParseEnumeratedType(ctxt, tree));
4405}
4406
4407/**
4408 * xmlParseAttributeListDecl:
4409 * @ctxt: an XML parser context
4410 *
4411 * : parse the Attribute list def for an element
4412 *
4413 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4414 *
4415 * [53] AttDef ::= S Name S AttType S DefaultDecl
4416 *
4417 */
4418void
4419xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004420 const xmlChar *elemName;
4421 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004422 xmlEnumerationPtr tree;
4423
Daniel Veillarda07050d2003-10-19 14:46:32 +00004424 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004425 xmlParserInputPtr input = ctxt->input;
4426
4427 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004428 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004430 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004433 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004434 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4436 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004437 return;
4438 }
4439 SKIP_BLANKS;
4440 GROW;
4441 while (RAW != '>') {
4442 const xmlChar *check = CUR_PTR;
4443 int type;
4444 int def;
4445 xmlChar *defaultValue = NULL;
4446
4447 GROW;
4448 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004449 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004450 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4452 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004453 break;
4454 }
4455 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004456 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004457 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004458 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004459 if (defaultValue != NULL)
4460 xmlFree(defaultValue);
4461 break;
4462 }
4463 SKIP_BLANKS;
4464
4465 type = xmlParseAttributeType(ctxt, &tree);
4466 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004467 if (defaultValue != NULL)
4468 xmlFree(defaultValue);
4469 break;
4470 }
4471
4472 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004473 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4475 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004476 if (defaultValue != NULL)
4477 xmlFree(defaultValue);
4478 if (tree != NULL)
4479 xmlFreeEnumeration(tree);
4480 break;
4481 }
4482 SKIP_BLANKS;
4483
4484 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4485 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004486 if (defaultValue != NULL)
4487 xmlFree(defaultValue);
4488 if (tree != NULL)
4489 xmlFreeEnumeration(tree);
4490 break;
4491 }
4492
4493 GROW;
4494 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004495 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004497 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004498 if (defaultValue != NULL)
4499 xmlFree(defaultValue);
4500 if (tree != NULL)
4501 xmlFreeEnumeration(tree);
4502 break;
4503 }
4504 SKIP_BLANKS;
4505 }
4506 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004507 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4508 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004509 if (defaultValue != NULL)
4510 xmlFree(defaultValue);
4511 if (tree != NULL)
4512 xmlFreeEnumeration(tree);
4513 break;
4514 }
4515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4516 (ctxt->sax->attributeDecl != NULL))
4517 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4518 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004519 else if (tree != NULL)
4520 xmlFreeEnumeration(tree);
4521
4522 if ((ctxt->sax2) && (defaultValue != NULL) &&
4523 (def != XML_ATTRIBUTE_IMPLIED) &&
4524 (def != XML_ATTRIBUTE_REQUIRED)) {
4525 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4526 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004527 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4528 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4529 }
Owen Taylor3473f882001-02-23 17:55:21 +00004530 if (defaultValue != NULL)
4531 xmlFree(defaultValue);
4532 GROW;
4533 }
4534 if (RAW == '>') {
4535 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004536 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4537 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004538 }
4539 NEXT;
4540 }
Owen Taylor3473f882001-02-23 17:55:21 +00004541 }
4542}
4543
4544/**
4545 * xmlParseElementMixedContentDecl:
4546 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004547 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004548 *
4549 * parse the declaration for a Mixed Element content
4550 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4551 *
4552 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4553 * '(' S? '#PCDATA' S? ')'
4554 *
4555 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4556 *
4557 * [ VC: No Duplicate Types ]
4558 * The same name must not appear more than once in a single
4559 * mixed-content declaration.
4560 *
4561 * returns: the list of the xmlElementContentPtr describing the element choices
4562 */
4563xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004564xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004565 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004566 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004567
4568 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004569 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP(7);
4571 SKIP_BLANKS;
4572 SHRINK;
4573 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004574 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004575 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4576"Element content declaration doesn't start and stop in the same entity\n",
4577 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004578 }
Owen Taylor3473f882001-02-23 17:55:21 +00004579 NEXT;
4580 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4581 if (RAW == '*') {
4582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4583 NEXT;
4584 }
4585 return(ret);
4586 }
4587 if ((RAW == '(') || (RAW == '|')) {
4588 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4589 if (ret == NULL) return(NULL);
4590 }
4591 while (RAW == '|') {
4592 NEXT;
4593 if (elem == NULL) {
4594 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4595 if (ret == NULL) return(NULL);
4596 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004597 if (cur != NULL)
4598 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 cur = ret;
4600 } else {
4601 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4602 if (n == NULL) return(NULL);
4603 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004604 if (n->c1 != NULL)
4605 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004607 if (n != NULL)
4608 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004609 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004610 }
4611 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004612 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004613 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004614 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004615 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004616 xmlFreeElementContent(cur);
4617 return(NULL);
4618 }
4619 SKIP_BLANKS;
4620 GROW;
4621 }
4622 if ((RAW == ')') && (NXT(1) == '*')) {
4623 if (elem != NULL) {
4624 cur->c2 = xmlNewElementContent(elem,
4625 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004626 if (cur->c2 != NULL)
4627 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004628 }
4629 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004631 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4632"Element content declaration doesn't start and stop in the same entity\n",
4633 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004634 }
Owen Taylor3473f882001-02-23 17:55:21 +00004635 SKIP(2);
4636 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004638 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004639 return(NULL);
4640 }
4641
4642 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004643 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004644 }
4645 return(ret);
4646}
4647
4648/**
4649 * xmlParseElementChildrenContentDecl:
4650 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004651 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004652 *
4653 * parse the declaration for a Mixed Element content
4654 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4655 *
4656 *
4657 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4658 *
4659 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4660 *
4661 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4662 *
4663 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4664 *
4665 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4666 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004667 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004668 * opening or closing parentheses in a choice, seq, or Mixed
4669 * construct is contained in the replacement text for a parameter
4670 * entity, both must be contained in the same replacement text. For
4671 * interoperability, if a parameter-entity reference appears in a
4672 * choice, seq, or Mixed construct, its replacement text should not
4673 * be empty, and neither the first nor last non-blank character of
4674 * the replacement text should be a connector (| or ,).
4675 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004676 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004677 * hierarchy.
4678 */
4679xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004680xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004681 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004682 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 xmlChar type = 0;
4684
4685 SKIP_BLANKS;
4686 GROW;
4687 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004688 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004689
Owen Taylor3473f882001-02-23 17:55:21 +00004690 /* Recurse on first child */
4691 NEXT;
4692 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004693 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004694 SKIP_BLANKS;
4695 GROW;
4696 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004697 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004698 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004699 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004700 return(NULL);
4701 }
4702 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004703 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004704 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004705 return(NULL);
4706 }
Owen Taylor3473f882001-02-23 17:55:21 +00004707 GROW;
4708 if (RAW == '?') {
4709 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4710 NEXT;
4711 } else if (RAW == '*') {
4712 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4713 NEXT;
4714 } else if (RAW == '+') {
4715 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4716 NEXT;
4717 } else {
4718 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4719 }
Owen Taylor3473f882001-02-23 17:55:21 +00004720 GROW;
4721 }
4722 SKIP_BLANKS;
4723 SHRINK;
4724 while (RAW != ')') {
4725 /*
4726 * Each loop we parse one separator and one element.
4727 */
4728 if (RAW == ',') {
4729 if (type == 0) type = CUR;
4730
4731 /*
4732 * Detect "Name | Name , Name" error
4733 */
4734 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004735 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004736 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004737 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004738 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004739 xmlFreeElementContent(last);
4740 if (ret != NULL)
4741 xmlFreeElementContent(ret);
4742 return(NULL);
4743 }
4744 NEXT;
4745
4746 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4747 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004748 if ((last != NULL) && (last != ret))
4749 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 xmlFreeElementContent(ret);
4751 return(NULL);
4752 }
4753 if (last == NULL) {
4754 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004755 if (ret != NULL)
4756 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004757 ret = cur = op;
4758 } else {
4759 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004760 if (op != NULL)
4761 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004762 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004763 if (last != NULL)
4764 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004765 cur =op;
4766 last = NULL;
4767 }
4768 } else if (RAW == '|') {
4769 if (type == 0) type = CUR;
4770
4771 /*
4772 * Detect "Name , Name | Name" error
4773 */
4774 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004775 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004776 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004777 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004778 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004779 xmlFreeElementContent(last);
4780 if (ret != NULL)
4781 xmlFreeElementContent(ret);
4782 return(NULL);
4783 }
4784 NEXT;
4785
4786 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4787 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004788 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004789 xmlFreeElementContent(last);
4790 if (ret != NULL)
4791 xmlFreeElementContent(ret);
4792 return(NULL);
4793 }
4794 if (last == NULL) {
4795 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004796 if (ret != NULL)
4797 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004798 ret = cur = op;
4799 } else {
4800 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004801 if (op != NULL)
4802 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004803 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004804 if (last != NULL)
4805 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004806 cur =op;
4807 last = NULL;
4808 }
4809 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004810 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004811 if (ret != NULL)
4812 xmlFreeElementContent(ret);
4813 return(NULL);
4814 }
4815 GROW;
4816 SKIP_BLANKS;
4817 GROW;
4818 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004819 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004820 /* Recurse on second child */
4821 NEXT;
4822 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004823 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004824 SKIP_BLANKS;
4825 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004826 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004827 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004828 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004829 if (ret != NULL)
4830 xmlFreeElementContent(ret);
4831 return(NULL);
4832 }
4833 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 if (RAW == '?') {
4835 last->ocur = XML_ELEMENT_CONTENT_OPT;
4836 NEXT;
4837 } else if (RAW == '*') {
4838 last->ocur = XML_ELEMENT_CONTENT_MULT;
4839 NEXT;
4840 } else if (RAW == '+') {
4841 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4842 NEXT;
4843 } else {
4844 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4845 }
4846 }
4847 SKIP_BLANKS;
4848 GROW;
4849 }
4850 if ((cur != NULL) && (last != NULL)) {
4851 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004852 if (last != NULL)
4853 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004854 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004856 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4857"Element content declaration doesn't start and stop in the same entity\n",
4858 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004859 }
Owen Taylor3473f882001-02-23 17:55:21 +00004860 NEXT;
4861 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004862 if (ret != NULL) {
4863 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4864 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4865 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4866 else
4867 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4868 }
Owen Taylor3473f882001-02-23 17:55:21 +00004869 NEXT;
4870 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004871 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004872 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004873 cur = ret;
4874 /*
4875 * Some normalization:
4876 * (a | b* | c?)* == (a | b | c)*
4877 */
4878 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4879 if ((cur->c1 != NULL) &&
4880 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4881 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4882 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4883 if ((cur->c2 != NULL) &&
4884 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4885 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4886 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4887 cur = cur->c2;
4888 }
4889 }
Owen Taylor3473f882001-02-23 17:55:21 +00004890 NEXT;
4891 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004892 if (ret != NULL) {
4893 int found = 0;
4894
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004895 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4896 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4897 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004898 else
4899 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004900 /*
4901 * Some normalization:
4902 * (a | b*)+ == (a | b)*
4903 * (a | b?)+ == (a | b)*
4904 */
4905 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4906 if ((cur->c1 != NULL) &&
4907 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4908 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4909 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4910 found = 1;
4911 }
4912 if ((cur->c2 != NULL) &&
4913 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4914 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4915 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4916 found = 1;
4917 }
4918 cur = cur->c2;
4919 }
4920 if (found)
4921 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4922 }
Owen Taylor3473f882001-02-23 17:55:21 +00004923 NEXT;
4924 }
4925 return(ret);
4926}
4927
4928/**
4929 * xmlParseElementContentDecl:
4930 * @ctxt: an XML parser context
4931 * @name: the name of the element being defined.
4932 * @result: the Element Content pointer will be stored here if any
4933 *
4934 * parse the declaration for an Element content either Mixed or Children,
4935 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4936 *
4937 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4938 *
4939 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4940 */
4941
4942int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004943xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004944 xmlElementContentPtr *result) {
4945
4946 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004947 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004948 int res;
4949
4950 *result = NULL;
4951
4952 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004953 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004954 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 return(-1);
4956 }
4957 NEXT;
4958 GROW;
4959 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004960 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004961 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 res = XML_ELEMENT_TYPE_MIXED;
4963 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004964 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004965 res = XML_ELEMENT_TYPE_ELEMENT;
4966 }
Owen Taylor3473f882001-02-23 17:55:21 +00004967 SKIP_BLANKS;
4968 *result = tree;
4969 return(res);
4970}
4971
4972/**
4973 * xmlParseElementDecl:
4974 * @ctxt: an XML parser context
4975 *
4976 * parse an Element declaration.
4977 *
4978 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4979 *
4980 * [ VC: Unique Element Type Declaration ]
4981 * No element type may be declared more than once
4982 *
4983 * Returns the type of the element, or -1 in case of error
4984 */
4985int
4986xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004987 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004988 int ret = -1;
4989 xmlElementContentPtr content = NULL;
4990
4991 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004992 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004993 xmlParserInputPtr input = ctxt->input;
4994
4995 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004996 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4998 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004999 }
5000 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005001 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005002 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005003 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5004 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005005 return(-1);
5006 }
5007 while ((RAW == 0) && (ctxt->inputNr > 1))
5008 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005009 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5011 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005012 }
5013 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005014 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005015 SKIP(5);
5016 /*
5017 * Element must always be empty.
5018 */
5019 ret = XML_ELEMENT_TYPE_EMPTY;
5020 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5021 (NXT(2) == 'Y')) {
5022 SKIP(3);
5023 /*
5024 * Element is a generic container.
5025 */
5026 ret = XML_ELEMENT_TYPE_ANY;
5027 } else if (RAW == '(') {
5028 ret = xmlParseElementContentDecl(ctxt, name, &content);
5029 } else {
5030 /*
5031 * [ WFC: PEs in Internal Subset ] error handling.
5032 */
5033 if ((RAW == '%') && (ctxt->external == 0) &&
5034 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005035 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005037 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005038 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005039 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5040 }
Owen Taylor3473f882001-02-23 17:55:21 +00005041 return(-1);
5042 }
5043
5044 SKIP_BLANKS;
5045 /*
5046 * Pop-up of finished entities.
5047 */
5048 while ((RAW == 0) && (ctxt->inputNr > 1))
5049 xmlPopInput(ctxt);
5050 SKIP_BLANKS;
5051
5052 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005053 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005054 } else {
5055 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005056 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5057 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005058 }
5059
5060 NEXT;
5061 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5062 (ctxt->sax->elementDecl != NULL))
5063 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5064 content);
5065 }
5066 if (content != NULL) {
5067 xmlFreeElementContent(content);
5068 }
Owen Taylor3473f882001-02-23 17:55:21 +00005069 }
5070 return(ret);
5071}
5072
5073/**
Owen Taylor3473f882001-02-23 17:55:21 +00005074 * xmlParseConditionalSections
5075 * @ctxt: an XML parser context
5076 *
5077 * [61] conditionalSect ::= includeSect | ignoreSect
5078 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5079 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5080 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5081 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5082 */
5083
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005084static void
Owen Taylor3473f882001-02-23 17:55:21 +00005085xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5086 SKIP(3);
5087 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005088 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005089 SKIP(7);
5090 SKIP_BLANKS;
5091 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005092 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005093 } else {
5094 NEXT;
5095 }
5096 if (xmlParserDebugEntities) {
5097 if ((ctxt->input != NULL) && (ctxt->input->filename))
5098 xmlGenericError(xmlGenericErrorContext,
5099 "%s(%d): ", ctxt->input->filename,
5100 ctxt->input->line);
5101 xmlGenericError(xmlGenericErrorContext,
5102 "Entering INCLUDE Conditional Section\n");
5103 }
5104
5105 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5106 (NXT(2) != '>'))) {
5107 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005108 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005109
5110 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5111 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005112 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005113 NEXT;
5114 } else if (RAW == '%') {
5115 xmlParsePEReference(ctxt);
5116 } else
5117 xmlParseMarkupDecl(ctxt);
5118
5119 /*
5120 * Pop-up of finished entities.
5121 */
5122 while ((RAW == 0) && (ctxt->inputNr > 1))
5123 xmlPopInput(ctxt);
5124
Daniel Veillardfdc91562002-07-01 21:52:03 +00005125 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005126 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 break;
5128 }
5129 }
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5134 ctxt->input->line);
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Leaving INCLUDE Conditional Section\n");
5137 }
5138
Daniel Veillarda07050d2003-10-19 14:46:32 +00005139 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005140 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005141 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005142 int depth = 0;
5143
5144 SKIP(6);
5145 SKIP_BLANKS;
5146 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005147 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005148 } else {
5149 NEXT;
5150 }
5151 if (xmlParserDebugEntities) {
5152 if ((ctxt->input != NULL) && (ctxt->input->filename))
5153 xmlGenericError(xmlGenericErrorContext,
5154 "%s(%d): ", ctxt->input->filename,
5155 ctxt->input->line);
5156 xmlGenericError(xmlGenericErrorContext,
5157 "Entering IGNORE Conditional Section\n");
5158 }
5159
5160 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005161 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005162 * But disable SAX event generating DTD building in the meantime
5163 */
5164 state = ctxt->disableSAX;
5165 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005166 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005167 ctxt->instate = XML_PARSER_IGNORE;
5168
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005169 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005170 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5171 depth++;
5172 SKIP(3);
5173 continue;
5174 }
5175 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5176 if (--depth >= 0) SKIP(3);
5177 continue;
5178 }
5179 NEXT;
5180 continue;
5181 }
5182
5183 ctxt->disableSAX = state;
5184 ctxt->instate = instate;
5185
5186 if (xmlParserDebugEntities) {
5187 if ((ctxt->input != NULL) && (ctxt->input->filename))
5188 xmlGenericError(xmlGenericErrorContext,
5189 "%s(%d): ", ctxt->input->filename,
5190 ctxt->input->line);
5191 xmlGenericError(xmlGenericErrorContext,
5192 "Leaving IGNORE Conditional Section\n");
5193 }
5194
5195 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005196 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 }
5198
5199 if (RAW == 0)
5200 SHRINK;
5201
5202 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005203 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 } else {
5205 SKIP(3);
5206 }
5207}
5208
5209/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005210 * xmlParseMarkupDecl:
5211 * @ctxt: an XML parser context
5212 *
5213 * parse Markup declarations
5214 *
5215 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5216 * NotationDecl | PI | Comment
5217 *
5218 * [ VC: Proper Declaration/PE Nesting ]
5219 * Parameter-entity replacement text must be properly nested with
5220 * markup declarations. That is to say, if either the first character
5221 * or the last character of a markup declaration (markupdecl above) is
5222 * contained in the replacement text for a parameter-entity reference,
5223 * both must be contained in the same replacement text.
5224 *
5225 * [ WFC: PEs in Internal Subset ]
5226 * In the internal DTD subset, parameter-entity references can occur
5227 * only where markup declarations can occur, not within markup declarations.
5228 * (This does not apply to references that occur in external parameter
5229 * entities or to the external subset.)
5230 */
5231void
5232xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5233 GROW;
5234 xmlParseElementDecl(ctxt);
5235 xmlParseAttributeListDecl(ctxt);
5236 xmlParseEntityDecl(ctxt);
5237 xmlParseNotationDecl(ctxt);
5238 xmlParsePI(ctxt);
5239 xmlParseComment(ctxt);
5240 /*
5241 * This is only for internal subset. On external entities,
5242 * the replacement is done before parsing stage
5243 */
5244 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5245 xmlParsePEReference(ctxt);
5246
5247 /*
5248 * Conditional sections are allowed from entities included
5249 * by PE References in the internal subset.
5250 */
5251 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5252 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5253 xmlParseConditionalSections(ctxt);
5254 }
5255 }
5256
5257 ctxt->instate = XML_PARSER_DTD;
5258}
5259
5260/**
5261 * xmlParseTextDecl:
5262 * @ctxt: an XML parser context
5263 *
5264 * parse an XML declaration header for external entities
5265 *
5266 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5267 *
5268 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5269 */
5270
5271void
5272xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5273 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005274 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005275
5276 /*
5277 * We know that '<?xml' is here.
5278 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005279 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005280 SKIP(5);
5281 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005282 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005283 return;
5284 }
5285
William M. Brack76e95df2003-10-18 16:20:14 +00005286 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005287 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5288 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005289 }
5290 SKIP_BLANKS;
5291
5292 /*
5293 * We may have the VersionInfo here.
5294 */
5295 version = xmlParseVersionInfo(ctxt);
5296 if (version == NULL)
5297 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005298 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005299 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005300 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5301 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005302 }
5303 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 ctxt->input->version = version;
5305
5306 /*
5307 * We must have the encoding declaration
5308 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005309 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005310 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5311 /*
5312 * The XML REC instructs us to stop parsing right here
5313 */
5314 return;
5315 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005316 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5317 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5318 "Missing encoding in text declaration\n");
5319 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005320
5321 SKIP_BLANKS;
5322 if ((RAW == '?') && (NXT(1) == '>')) {
5323 SKIP(2);
5324 } else if (RAW == '>') {
5325 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005326 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005327 NEXT;
5328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005329 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005330 MOVETO_ENDTAG(CUR_PTR);
5331 NEXT;
5332 }
5333}
5334
5335/**
Owen Taylor3473f882001-02-23 17:55:21 +00005336 * xmlParseExternalSubset:
5337 * @ctxt: an XML parser context
5338 * @ExternalID: the external identifier
5339 * @SystemID: the system identifier (or URL)
5340 *
5341 * parse Markup declarations from an external subset
5342 *
5343 * [30] extSubset ::= textDecl? extSubsetDecl
5344 *
5345 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5346 */
5347void
5348xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5349 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005350 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005351 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005352 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005353 xmlParseTextDecl(ctxt);
5354 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5355 /*
5356 * The XML REC instructs us to stop parsing right here
5357 */
5358 ctxt->instate = XML_PARSER_EOF;
5359 return;
5360 }
5361 }
5362 if (ctxt->myDoc == NULL) {
5363 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5364 }
5365 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5366 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5367
5368 ctxt->instate = XML_PARSER_DTD;
5369 ctxt->external = 1;
5370 while (((RAW == '<') && (NXT(1) == '?')) ||
5371 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005372 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005373 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005374 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005375
5376 GROW;
5377 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5378 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005379 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005380 NEXT;
5381 } else if (RAW == '%') {
5382 xmlParsePEReference(ctxt);
5383 } else
5384 xmlParseMarkupDecl(ctxt);
5385
5386 /*
5387 * Pop-up of finished entities.
5388 */
5389 while ((RAW == 0) && (ctxt->inputNr > 1))
5390 xmlPopInput(ctxt);
5391
Daniel Veillardfdc91562002-07-01 21:52:03 +00005392 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005393 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 break;
5395 }
5396 }
5397
5398 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005399 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005400 }
5401
5402}
5403
5404/**
5405 * xmlParseReference:
5406 * @ctxt: an XML parser context
5407 *
5408 * parse and handle entity references in content, depending on the SAX
5409 * interface, this may end-up in a call to character() if this is a
5410 * CharRef, a predefined entity, if there is no reference() callback.
5411 * or if the parser was asked to switch to that mode.
5412 *
5413 * [67] Reference ::= EntityRef | CharRef
5414 */
5415void
5416xmlParseReference(xmlParserCtxtPtr ctxt) {
5417 xmlEntityPtr ent;
5418 xmlChar *val;
5419 if (RAW != '&') return;
5420
5421 if (NXT(1) == '#') {
5422 int i = 0;
5423 xmlChar out[10];
5424 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005425 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005426
5427 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5428 /*
5429 * So we are using non-UTF-8 buffers
5430 * Check that the char fit on 8bits, if not
5431 * generate a CharRef.
5432 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005433 if (value <= 0xFF) {
5434 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005435 out[1] = 0;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5437 (!ctxt->disableSAX))
5438 ctxt->sax->characters(ctxt->userData, out, 1);
5439 } else {
5440 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005441 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005442 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005443 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005444 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5445 (!ctxt->disableSAX))
5446 ctxt->sax->reference(ctxt->userData, out);
5447 }
5448 } else {
5449 /*
5450 * Just encode the value in UTF-8
5451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005452 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 out[i] = 0;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5455 (!ctxt->disableSAX))
5456 ctxt->sax->characters(ctxt->userData, out, i);
5457 }
5458 } else {
5459 ent = xmlParseEntityRef(ctxt);
5460 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005461 if (!ctxt->wellFormed)
5462 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005463 if ((ent->name != NULL) &&
5464 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5465 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005466 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005467
5468
5469 /*
5470 * The first reference to the entity trigger a parsing phase
5471 * where the ent->children is filled with the result from
5472 * the parsing.
5473 */
5474 if (ent->children == NULL) {
5475 xmlChar *value;
5476 value = ent->content;
5477
5478 /*
5479 * Check that this entity is well formed
5480 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005481 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005482 (value[1] == 0) && (value[0] == '<') &&
5483 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5484 /*
5485 * DONE: get definite answer on this !!!
5486 * Lots of entity decls are used to declare a single
5487 * char
5488 * <!ENTITY lt "<">
5489 * Which seems to be valid since
5490 * 2.4: The ampersand character (&) and the left angle
5491 * bracket (<) may appear in their literal form only
5492 * when used ... They are also legal within the literal
5493 * entity value of an internal entity declaration;i
5494 * see "4.3.2 Well-Formed Parsed Entities".
5495 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5496 * Looking at the OASIS test suite and James Clark
5497 * tests, this is broken. However the XML REC uses
5498 * it. Is the XML REC not well-formed ????
5499 * This is a hack to avoid this problem
5500 *
5501 * ANSWER: since lt gt amp .. are already defined,
5502 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005503 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005504 * is lousy but acceptable.
5505 */
5506 list = xmlNewDocText(ctxt->myDoc, value);
5507 if (list != NULL) {
5508 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5509 (ent->children == NULL)) {
5510 ent->children = list;
5511 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005512 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005513 list->parent = (xmlNodePtr) ent;
5514 } else {
5515 xmlFreeNodeList(list);
5516 }
5517 } else if (list != NULL) {
5518 xmlFreeNodeList(list);
5519 }
5520 } else {
5521 /*
5522 * 4.3.2: An internal general parsed entity is well-formed
5523 * if its replacement text matches the production labeled
5524 * content.
5525 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005526
5527 void *user_data;
5528 /*
5529 * This is a bit hackish but this seems the best
5530 * way to make sure both SAX and DOM entity support
5531 * behaves okay.
5532 */
5533 if (ctxt->userData == ctxt)
5534 user_data = NULL;
5535 else
5536 user_data = ctxt->userData;
5537
Owen Taylor3473f882001-02-23 17:55:21 +00005538 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5539 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005540 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5541 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 ctxt->depth--;
5543 } else if (ent->etype ==
5544 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5545 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005546 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005547 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005548 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005549 ctxt->depth--;
5550 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005551 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005552 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5553 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 }
5555 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005556 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005557 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005558 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005559 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5560 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005561 (ent->children == NULL)) {
5562 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005563 if (ctxt->replaceEntities) {
5564 /*
5565 * Prune it directly in the generated document
5566 * except for single text nodes.
5567 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005568 if (((list->type == XML_TEXT_NODE) &&
5569 (list->next == NULL)) ||
5570 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005571 list->parent = (xmlNodePtr) ent;
5572 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005573 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005574 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005575 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 while (list != NULL) {
5577 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005578 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 if (list->next == NULL)
5580 ent->last = list;
5581 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005582 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005583 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005584#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005585 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5586 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005587#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 }
5589 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005590 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005591 while (list != NULL) {
5592 list->parent = (xmlNodePtr) ent;
5593 if (list->next == NULL)
5594 ent->last = list;
5595 list = list->next;
5596 }
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598 } else {
5599 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005601 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005602 } else if ((ret != XML_ERR_OK) &&
5603 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005604 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005605 } else if (list != NULL) {
5606 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005608 }
5609 }
5610 }
5611 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5612 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5613 /*
5614 * Create a node.
5615 */
5616 ctxt->sax->reference(ctxt->userData, ent->name);
5617 return;
5618 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005619 /*
5620 * There is a problem on the handling of _private for entities
5621 * (bug 155816): Should we copy the content of the field from
5622 * the entity (possibly overwriting some value set by the user
5623 * when a copy is created), should we leave it alone, or should
5624 * we try to take care of different situations? The problem
5625 * is exacerbated by the usage of this field by the xmlReader.
5626 * To fix this bug, we look at _private on the created node
5627 * and, if it's NULL, we copy in whatever was in the entity.
5628 * If it's not NULL we leave it alone. This is somewhat of a
5629 * hack - maybe we should have further tests to determine
5630 * what to do.
5631 */
Owen Taylor3473f882001-02-23 17:55:21 +00005632 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5633 /*
5634 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005636 * In the first occurrence list contains the replacement.
5637 * progressive == 2 means we are operating on the Reader
5638 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005639 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005640 if (((list == NULL) && (ent->owner == 0)) ||
5641 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005642 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005643
5644 /*
5645 * when operating on a reader, the entities definitions
5646 * are always owning the entities subtree.
5647 if (ctxt->parseMode == XML_PARSE_READER)
5648 ent->owner = 1;
5649 */
5650
Daniel Veillard62f313b2001-07-04 19:49:14 +00005651 cur = ent->children;
5652 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005653 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005654 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005655 if (nw->_private == NULL)
5656 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005657 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005658 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005659 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005660 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005661 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005662 if (cur == ent->last) {
5663 /*
5664 * needed to detect some strange empty
5665 * node cases in the reader tests
5666 */
5667 if ((ctxt->parseMode == XML_PARSE_READER) &&
5668 (nw->type == XML_ELEMENT_NODE) &&
5669 (nw->children == NULL))
5670 nw->extra = 1;
5671
Daniel Veillard62f313b2001-07-04 19:49:14 +00005672 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005673 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005674 cur = cur->next;
5675 }
Daniel Veillard81273902003-09-30 00:43:48 +00005676#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005677 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005678 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005679#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005680 } else if (list == NULL) {
5681 xmlNodePtr nw = NULL, cur, next, last,
5682 firstChild = NULL;
5683 /*
5684 * Copy the entity child list and make it the new
5685 * entity child list. The goal is to make sure any
5686 * ID or REF referenced will be the one from the
5687 * document content and not the entity copy.
5688 */
5689 cur = ent->children;
5690 ent->children = NULL;
5691 last = ent->last;
5692 ent->last = NULL;
5693 while (cur != NULL) {
5694 next = cur->next;
5695 cur->next = NULL;
5696 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005697 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005698 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005699 if (nw->_private == NULL)
5700 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005701 if (firstChild == NULL){
5702 firstChild = cur;
5703 }
5704 xmlAddChild((xmlNodePtr) ent, nw);
5705 xmlAddChild(ctxt->node, cur);
5706 }
5707 if (cur == last)
5708 break;
5709 cur = next;
5710 }
5711 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005712#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005713 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5714 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005715#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005716 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005717 const xmlChar *nbktext;
5718
Daniel Veillard62f313b2001-07-04 19:49:14 +00005719 /*
5720 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005721 * node with a possible previous text one which
5722 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005723 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005724 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5725 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005726 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005727 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005728 if ((ent->last != ent->children) &&
5729 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005730 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005731 xmlAddChildList(ctxt->node, ent->children);
5732 }
5733
Owen Taylor3473f882001-02-23 17:55:21 +00005734 /*
5735 * This is to avoid a nasty side effect, see
5736 * characters() in SAX.c
5737 */
5738 ctxt->nodemem = 0;
5739 ctxt->nodelen = 0;
5740 return;
5741 } else {
5742 /*
5743 * Probably running in SAX mode
5744 */
5745 xmlParserInputPtr input;
5746
5747 input = xmlNewEntityInputStream(ctxt, ent);
5748 xmlPushInput(ctxt, input);
5749 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005750 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5751 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005752 xmlParseTextDecl(ctxt);
5753 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5754 /*
5755 * The XML REC instructs us to stop parsing right here
5756 */
5757 ctxt->instate = XML_PARSER_EOF;
5758 return;
5759 }
5760 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005761 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5762 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005763 }
5764 }
5765 return;
5766 }
5767 }
5768 } else {
5769 val = ent->content;
5770 if (val == NULL) return;
5771 /*
5772 * inline the entity.
5773 */
5774 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5775 (!ctxt->disableSAX))
5776 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5777 }
5778 }
5779}
5780
5781/**
5782 * xmlParseEntityRef:
5783 * @ctxt: an XML parser context
5784 *
5785 * parse ENTITY references declarations
5786 *
5787 * [68] EntityRef ::= '&' Name ';'
5788 *
5789 * [ WFC: Entity Declared ]
5790 * In a document without any DTD, a document with only an internal DTD
5791 * subset which contains no parameter entity references, or a document
5792 * with "standalone='yes'", the Name given in the entity reference
5793 * must match that in an entity declaration, except that well-formed
5794 * documents need not declare any of the following entities: amp, lt,
5795 * gt, apos, quot. The declaration of a parameter entity must precede
5796 * any reference to it. Similarly, the declaration of a general entity
5797 * must precede any reference to it which appears in a default value in an
5798 * attribute-list declaration. Note that if entities are declared in the
5799 * external subset or in external parameter entities, a non-validating
5800 * processor is not obligated to read and process their declarations;
5801 * for such documents, the rule that an entity must be declared is a
5802 * well-formedness constraint only if standalone='yes'.
5803 *
5804 * [ WFC: Parsed Entity ]
5805 * An entity reference must not contain the name of an unparsed entity
5806 *
5807 * Returns the xmlEntityPtr if found, or NULL otherwise.
5808 */
5809xmlEntityPtr
5810xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005811 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005812 xmlEntityPtr ent = NULL;
5813
5814 GROW;
5815
5816 if (RAW == '&') {
5817 NEXT;
5818 name = xmlParseName(ctxt);
5819 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5821 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005822 } else {
5823 if (RAW == ';') {
5824 NEXT;
5825 /*
5826 * Ask first SAX for entity resolution, otherwise try the
5827 * predefined set.
5828 */
5829 if (ctxt->sax != NULL) {
5830 if (ctxt->sax->getEntity != NULL)
5831 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005832 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005833 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005834 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5835 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005836 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005837 }
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839 /*
5840 * [ WFC: Entity Declared ]
5841 * In a document without any DTD, a document with only an
5842 * internal DTD subset which contains no parameter entity
5843 * references, or a document with "standalone='yes'", the
5844 * Name given in the entity reference must match that in an
5845 * entity declaration, except that well-formed documents
5846 * need not declare any of the following entities: amp, lt,
5847 * gt, apos, quot.
5848 * The declaration of a parameter entity must precede any
5849 * reference to it.
5850 * Similarly, the declaration of a general entity must
5851 * precede any reference to it which appears in a default
5852 * value in an attribute-list declaration. Note that if
5853 * entities are declared in the external subset or in
5854 * external parameter entities, a non-validating processor
5855 * is not obligated to read and process their declarations;
5856 * for such documents, the rule that an entity must be
5857 * declared is a well-formedness constraint only if
5858 * standalone='yes'.
5859 */
5860 if (ent == NULL) {
5861 if ((ctxt->standalone == 1) ||
5862 ((ctxt->hasExternalSubset == 0) &&
5863 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005864 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005865 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005866 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005867 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005868 "Entity '%s' not defined\n", name);
5869 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005870 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005871 }
5872
5873 /*
5874 * [ WFC: Parsed Entity ]
5875 * An entity reference must not contain the name of an
5876 * unparsed entity
5877 */
5878 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005879 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005880 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005881 }
5882
5883 /*
5884 * [ WFC: No External Entity References ]
5885 * Attribute values cannot contain direct or indirect
5886 * entity references to external entities.
5887 */
5888 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5889 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005890 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5891 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 }
5893 /*
5894 * [ WFC: No < in Attribute Values ]
5895 * The replacement text of any entity referred to directly or
5896 * indirectly in an attribute value (other than "&lt;") must
5897 * not contain a <.
5898 */
5899 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5900 (ent != NULL) &&
5901 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5902 (ent->content != NULL) &&
5903 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005904 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005905 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005906 }
5907
5908 /*
5909 * Internal check, no parameter entities here ...
5910 */
5911 else {
5912 switch (ent->etype) {
5913 case XML_INTERNAL_PARAMETER_ENTITY:
5914 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005915 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5916 "Attempt to reference the parameter entity '%s'\n",
5917 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 break;
5919 default:
5920 break;
5921 }
5922 }
5923
5924 /*
5925 * [ WFC: No Recursion ]
5926 * A parsed entity must not contain a recursive reference
5927 * to itself, either directly or indirectly.
5928 * Done somewhere else
5929 */
5930
5931 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005932 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 }
Owen Taylor3473f882001-02-23 17:55:21 +00005934 }
5935 }
5936 return(ent);
5937}
5938
5939/**
5940 * xmlParseStringEntityRef:
5941 * @ctxt: an XML parser context
5942 * @str: a pointer to an index in the string
5943 *
5944 * parse ENTITY references declarations, but this version parses it from
5945 * a string value.
5946 *
5947 * [68] EntityRef ::= '&' Name ';'
5948 *
5949 * [ WFC: Entity Declared ]
5950 * In a document without any DTD, a document with only an internal DTD
5951 * subset which contains no parameter entity references, or a document
5952 * with "standalone='yes'", the Name given in the entity reference
5953 * must match that in an entity declaration, except that well-formed
5954 * documents need not declare any of the following entities: amp, lt,
5955 * gt, apos, quot. The declaration of a parameter entity must precede
5956 * any reference to it. Similarly, the declaration of a general entity
5957 * must precede any reference to it which appears in a default value in an
5958 * attribute-list declaration. Note that if entities are declared in the
5959 * external subset or in external parameter entities, a non-validating
5960 * processor is not obligated to read and process their declarations;
5961 * for such documents, the rule that an entity must be declared is a
5962 * well-formedness constraint only if standalone='yes'.
5963 *
5964 * [ WFC: Parsed Entity ]
5965 * An entity reference must not contain the name of an unparsed entity
5966 *
5967 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5968 * is updated to the current location in the string.
5969 */
5970xmlEntityPtr
5971xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5972 xmlChar *name;
5973 const xmlChar *ptr;
5974 xmlChar cur;
5975 xmlEntityPtr ent = NULL;
5976
5977 if ((str == NULL) || (*str == NULL))
5978 return(NULL);
5979 ptr = *str;
5980 cur = *ptr;
5981 if (cur == '&') {
5982 ptr++;
5983 cur = *ptr;
5984 name = xmlParseStringName(ctxt, &ptr);
5985 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005986 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5987 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005988 } else {
5989 if (*ptr == ';') {
5990 ptr++;
5991 /*
5992 * Ask first SAX for entity resolution, otherwise try the
5993 * predefined set.
5994 */
5995 if (ctxt->sax != NULL) {
5996 if (ctxt->sax->getEntity != NULL)
5997 ent = ctxt->sax->getEntity(ctxt->userData, name);
5998 if (ent == NULL)
5999 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006000 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006001 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006002 }
Owen Taylor3473f882001-02-23 17:55:21 +00006003 }
6004 /*
6005 * [ WFC: Entity Declared ]
6006 * In a document without any DTD, a document with only an
6007 * internal DTD subset which contains no parameter entity
6008 * references, or a document with "standalone='yes'", the
6009 * Name given in the entity reference must match that in an
6010 * entity declaration, except that well-formed documents
6011 * need not declare any of the following entities: amp, lt,
6012 * gt, apos, quot.
6013 * The declaration of a parameter entity must precede any
6014 * reference to it.
6015 * Similarly, the declaration of a general entity must
6016 * precede any reference to it which appears in a default
6017 * value in an attribute-list declaration. Note that if
6018 * entities are declared in the external subset or in
6019 * external parameter entities, a non-validating processor
6020 * is not obligated to read and process their declarations;
6021 * for such documents, the rule that an entity must be
6022 * declared is a well-formedness constraint only if
6023 * standalone='yes'.
6024 */
6025 if (ent == NULL) {
6026 if ((ctxt->standalone == 1) ||
6027 ((ctxt->hasExternalSubset == 0) &&
6028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006030 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006031 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006032 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006033 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006034 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006035 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006036 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006037 }
6038
6039 /*
6040 * [ WFC: Parsed Entity ]
6041 * An entity reference must not contain the name of an
6042 * unparsed entity
6043 */
6044 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006045 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006046 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
6048
6049 /*
6050 * [ WFC: No External Entity References ]
6051 * Attribute values cannot contain direct or indirect
6052 * entity references to external entities.
6053 */
6054 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006056 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006057 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006058 }
6059 /*
6060 * [ WFC: No < in Attribute Values ]
6061 * The replacement text of any entity referred to directly or
6062 * indirectly in an attribute value (other than "&lt;") must
6063 * not contain a <.
6064 */
6065 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6066 (ent != NULL) &&
6067 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6068 (ent->content != NULL) &&
6069 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006070 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6071 "'<' in entity '%s' is not allowed in attributes values\n",
6072 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006073 }
6074
6075 /*
6076 * Internal check, no parameter entities here ...
6077 */
6078 else {
6079 switch (ent->etype) {
6080 case XML_INTERNAL_PARAMETER_ENTITY:
6081 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006082 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6083 "Attempt to reference the parameter entity '%s'\n",
6084 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006085 break;
6086 default:
6087 break;
6088 }
6089 }
6090
6091 /*
6092 * [ WFC: No Recursion ]
6093 * A parsed entity must not contain a recursive reference
6094 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006095 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006096 */
6097
6098 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006099 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 }
6101 xmlFree(name);
6102 }
6103 }
6104 *str = ptr;
6105 return(ent);
6106}
6107
6108/**
6109 * xmlParsePEReference:
6110 * @ctxt: an XML parser context
6111 *
6112 * parse PEReference declarations
6113 * The entity content is handled directly by pushing it's content as
6114 * a new input stream.
6115 *
6116 * [69] PEReference ::= '%' Name ';'
6117 *
6118 * [ WFC: No Recursion ]
6119 * A parsed entity must not contain a recursive
6120 * reference to itself, either directly or indirectly.
6121 *
6122 * [ WFC: Entity Declared ]
6123 * In a document without any DTD, a document with only an internal DTD
6124 * subset which contains no parameter entity references, or a document
6125 * with "standalone='yes'", ... ... The declaration of a parameter
6126 * entity must precede any reference to it...
6127 *
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external parameter entities
6130 * with "standalone='no'", ... ... The declaration of a parameter entity
6131 * must precede any reference to it...
6132 *
6133 * [ WFC: In DTD ]
6134 * Parameter-entity references may only appear in the DTD.
6135 * NOTE: misleading but this is handled.
6136 */
6137void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006138xmlParsePEReference(xmlParserCtxtPtr ctxt)
6139{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006140 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006141 xmlEntityPtr entity = NULL;
6142 xmlParserInputPtr input;
6143
6144 if (RAW == '%') {
6145 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006146 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006147 if (name == NULL) {
6148 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6149 "xmlParsePEReference: no name\n");
6150 } else {
6151 if (RAW == ';') {
6152 NEXT;
6153 if ((ctxt->sax != NULL) &&
6154 (ctxt->sax->getParameterEntity != NULL))
6155 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6156 name);
6157 if (entity == NULL) {
6158 /*
6159 * [ WFC: Entity Declared ]
6160 * In a document without any DTD, a document with only an
6161 * internal DTD subset which contains no parameter entity
6162 * references, or a document with "standalone='yes'", ...
6163 * ... The declaration of a parameter entity must precede
6164 * any reference to it...
6165 */
6166 if ((ctxt->standalone == 1) ||
6167 ((ctxt->hasExternalSubset == 0) &&
6168 (ctxt->hasPErefs == 0))) {
6169 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6170 "PEReference: %%%s; not found\n",
6171 name);
6172 } else {
6173 /*
6174 * [ VC: Entity Declared ]
6175 * In a document with an external subset or external
6176 * parameter entities with "standalone='no'", ...
6177 * ... The declaration of a parameter entity must
6178 * precede any reference to it...
6179 */
6180 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6181 "PEReference: %%%s; not found\n",
6182 name, NULL);
6183 ctxt->valid = 0;
6184 }
6185 } else {
6186 /*
6187 * Internal checking in case the entity quest barfed
6188 */
6189 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6190 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6191 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6192 "Internal: %%%s; is not a parameter entity\n",
6193 name, NULL);
6194 } else if (ctxt->input->free != deallocblankswrapper) {
6195 input =
6196 xmlNewBlanksWrapperInputStream(ctxt, entity);
6197 xmlPushInput(ctxt, input);
6198 } else {
6199 /*
6200 * TODO !!!
6201 * handle the extra spaces added before and after
6202 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6203 */
6204 input = xmlNewEntityInputStream(ctxt, entity);
6205 xmlPushInput(ctxt, input);
6206 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006207 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006208 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006209 xmlParseTextDecl(ctxt);
6210 if (ctxt->errNo ==
6211 XML_ERR_UNSUPPORTED_ENCODING) {
6212 /*
6213 * The XML REC instructs us to stop parsing
6214 * right here
6215 */
6216 ctxt->instate = XML_PARSER_EOF;
6217 return;
6218 }
6219 }
6220 }
6221 }
6222 ctxt->hasPErefs = 1;
6223 } else {
6224 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6225 }
6226 }
Owen Taylor3473f882001-02-23 17:55:21 +00006227 }
6228}
6229
6230/**
6231 * xmlParseStringPEReference:
6232 * @ctxt: an XML parser context
6233 * @str: a pointer to an index in the string
6234 *
6235 * parse PEReference declarations
6236 *
6237 * [69] PEReference ::= '%' Name ';'
6238 *
6239 * [ WFC: No Recursion ]
6240 * A parsed entity must not contain a recursive
6241 * reference to itself, either directly or indirectly.
6242 *
6243 * [ WFC: Entity Declared ]
6244 * In a document without any DTD, a document with only an internal DTD
6245 * subset which contains no parameter entity references, or a document
6246 * with "standalone='yes'", ... ... The declaration of a parameter
6247 * entity must precede any reference to it...
6248 *
6249 * [ VC: Entity Declared ]
6250 * In a document with an external subset or external parameter entities
6251 * with "standalone='no'", ... ... The declaration of a parameter entity
6252 * must precede any reference to it...
6253 *
6254 * [ WFC: In DTD ]
6255 * Parameter-entity references may only appear in the DTD.
6256 * NOTE: misleading but this is handled.
6257 *
6258 * Returns the string of the entity content.
6259 * str is updated to the current value of the index
6260 */
6261xmlEntityPtr
6262xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6263 const xmlChar *ptr;
6264 xmlChar cur;
6265 xmlChar *name;
6266 xmlEntityPtr entity = NULL;
6267
6268 if ((str == NULL) || (*str == NULL)) return(NULL);
6269 ptr = *str;
6270 cur = *ptr;
6271 if (cur == '%') {
6272 ptr++;
6273 cur = *ptr;
6274 name = xmlParseStringName(ctxt, &ptr);
6275 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006276 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6277 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006278 } else {
6279 cur = *ptr;
6280 if (cur == ';') {
6281 ptr++;
6282 cur = *ptr;
6283 if ((ctxt->sax != NULL) &&
6284 (ctxt->sax->getParameterEntity != NULL))
6285 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6286 name);
6287 if (entity == NULL) {
6288 /*
6289 * [ WFC: Entity Declared ]
6290 * In a document without any DTD, a document with only an
6291 * internal DTD subset which contains no parameter entity
6292 * references, or a document with "standalone='yes'", ...
6293 * ... The declaration of a parameter entity must precede
6294 * any reference to it...
6295 */
6296 if ((ctxt->standalone == 1) ||
6297 ((ctxt->hasExternalSubset == 0) &&
6298 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006299 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006300 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 } else {
6302 /*
6303 * [ VC: Entity Declared ]
6304 * In a document with an external subset or external
6305 * parameter entities with "standalone='no'", ...
6306 * ... The declaration of a parameter entity must
6307 * precede any reference to it...
6308 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006309 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6310 "PEReference: %%%s; not found\n",
6311 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 ctxt->valid = 0;
6313 }
6314 } else {
6315 /*
6316 * Internal checking in case the entity quest barfed
6317 */
6318 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6319 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006320 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6321 "%%%s; is not a parameter entity\n",
6322 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006323 }
6324 }
6325 ctxt->hasPErefs = 1;
6326 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006327 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 }
6329 xmlFree(name);
6330 }
6331 }
6332 *str = ptr;
6333 return(entity);
6334}
6335
6336/**
6337 * xmlParseDocTypeDecl:
6338 * @ctxt: an XML parser context
6339 *
6340 * parse a DOCTYPE declaration
6341 *
6342 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6343 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6344 *
6345 * [ VC: Root Element Type ]
6346 * The Name in the document type declaration must match the element
6347 * type of the root element.
6348 */
6349
6350void
6351xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006352 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 xmlChar *ExternalID = NULL;
6354 xmlChar *URI = NULL;
6355
6356 /*
6357 * We know that '<!DOCTYPE' has been detected.
6358 */
6359 SKIP(9);
6360
6361 SKIP_BLANKS;
6362
6363 /*
6364 * Parse the DOCTYPE name.
6365 */
6366 name = xmlParseName(ctxt);
6367 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006368 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6369 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006370 }
6371 ctxt->intSubName = name;
6372
6373 SKIP_BLANKS;
6374
6375 /*
6376 * Check for SystemID and ExternalID
6377 */
6378 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6379
6380 if ((URI != NULL) || (ExternalID != NULL)) {
6381 ctxt->hasExternalSubset = 1;
6382 }
6383 ctxt->extSubURI = URI;
6384 ctxt->extSubSystem = ExternalID;
6385
6386 SKIP_BLANKS;
6387
6388 /*
6389 * Create and update the internal subset.
6390 */
6391 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6392 (!ctxt->disableSAX))
6393 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6394
6395 /*
6396 * Is there any internal subset declarations ?
6397 * they are handled separately in xmlParseInternalSubset()
6398 */
6399 if (RAW == '[')
6400 return;
6401
6402 /*
6403 * We should be at the end of the DOCTYPE declaration.
6404 */
6405 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006406 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006407 }
6408 NEXT;
6409}
6410
6411/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006412 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006413 * @ctxt: an XML parser context
6414 *
6415 * parse the internal subset declaration
6416 *
6417 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6418 */
6419
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006420static void
Owen Taylor3473f882001-02-23 17:55:21 +00006421xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6422 /*
6423 * Is there any DTD definition ?
6424 */
6425 if (RAW == '[') {
6426 ctxt->instate = XML_PARSER_DTD;
6427 NEXT;
6428 /*
6429 * Parse the succession of Markup declarations and
6430 * PEReferences.
6431 * Subsequence (markupdecl | PEReference | S)*
6432 */
6433 while (RAW != ']') {
6434 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006435 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006436
6437 SKIP_BLANKS;
6438 xmlParseMarkupDecl(ctxt);
6439 xmlParsePEReference(ctxt);
6440
6441 /*
6442 * Pop-up of finished entities.
6443 */
6444 while ((RAW == 0) && (ctxt->inputNr > 1))
6445 xmlPopInput(ctxt);
6446
6447 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006448 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006449 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006450 break;
6451 }
6452 }
6453 if (RAW == ']') {
6454 NEXT;
6455 SKIP_BLANKS;
6456 }
6457 }
6458
6459 /*
6460 * We should be at the end of the DOCTYPE declaration.
6461 */
6462 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006463 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006464 }
6465 NEXT;
6466}
6467
Daniel Veillard81273902003-09-30 00:43:48 +00006468#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006469/**
6470 * xmlParseAttribute:
6471 * @ctxt: an XML parser context
6472 * @value: a xmlChar ** used to store the value of the attribute
6473 *
6474 * parse an attribute
6475 *
6476 * [41] Attribute ::= Name Eq AttValue
6477 *
6478 * [ WFC: No External Entity References ]
6479 * Attribute values cannot contain direct or indirect entity references
6480 * to external entities.
6481 *
6482 * [ WFC: No < in Attribute Values ]
6483 * The replacement text of any entity referred to directly or indirectly in
6484 * an attribute value (other than "&lt;") must not contain a <.
6485 *
6486 * [ VC: Attribute Value Type ]
6487 * The attribute must have been declared; the value must be of the type
6488 * declared for it.
6489 *
6490 * [25] Eq ::= S? '=' S?
6491 *
6492 * With namespace:
6493 *
6494 * [NS 11] Attribute ::= QName Eq AttValue
6495 *
6496 * Also the case QName == xmlns:??? is handled independently as a namespace
6497 * definition.
6498 *
6499 * Returns the attribute name, and the value in *value.
6500 */
6501
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006502const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006503xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006504 const xmlChar *name;
6505 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006506
6507 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006508 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006509 name = xmlParseName(ctxt);
6510 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006511 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006512 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006513 return(NULL);
6514 }
6515
6516 /*
6517 * read the value
6518 */
6519 SKIP_BLANKS;
6520 if (RAW == '=') {
6521 NEXT;
6522 SKIP_BLANKS;
6523 val = xmlParseAttValue(ctxt);
6524 ctxt->instate = XML_PARSER_CONTENT;
6525 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006526 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006527 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006528 return(NULL);
6529 }
6530
6531 /*
6532 * Check that xml:lang conforms to the specification
6533 * No more registered as an error, just generate a warning now
6534 * since this was deprecated in XML second edition
6535 */
6536 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6537 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006538 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6539 "Malformed value for xml:lang : %s\n",
6540 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006541 }
6542 }
6543
6544 /*
6545 * Check that xml:space conforms to the specification
6546 */
6547 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6548 if (xmlStrEqual(val, BAD_CAST "default"))
6549 *(ctxt->space) = 0;
6550 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6551 *(ctxt->space) = 1;
6552 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006553 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006554"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006555 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006556 }
6557 }
6558
6559 *value = val;
6560 return(name);
6561}
6562
6563/**
6564 * xmlParseStartTag:
6565 * @ctxt: an XML parser context
6566 *
6567 * parse a start of tag either for rule element or
6568 * EmptyElement. In both case we don't parse the tag closing chars.
6569 *
6570 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6571 *
6572 * [ WFC: Unique Att Spec ]
6573 * No attribute name may appear more than once in the same start-tag or
6574 * empty-element tag.
6575 *
6576 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6577 *
6578 * [ WFC: Unique Att Spec ]
6579 * No attribute name may appear more than once in the same start-tag or
6580 * empty-element tag.
6581 *
6582 * With namespace:
6583 *
6584 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6585 *
6586 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6587 *
6588 * Returns the element name parsed
6589 */
6590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006591const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006592xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006593 const xmlChar *name;
6594 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006595 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006596 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006597 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006598 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006599 int i;
6600
6601 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006602 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006603
6604 name = xmlParseName(ctxt);
6605 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006606 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006607 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006608 return(NULL);
6609 }
6610
6611 /*
6612 * Now parse the attributes, it ends up with the ending
6613 *
6614 * (S Attribute)* S?
6615 */
6616 SKIP_BLANKS;
6617 GROW;
6618
Daniel Veillard21a0f912001-02-25 19:54:14 +00006619 while ((RAW != '>') &&
6620 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006621 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006622 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006623 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006624
6625 attname = xmlParseAttribute(ctxt, &attvalue);
6626 if ((attname != NULL) && (attvalue != NULL)) {
6627 /*
6628 * [ WFC: Unique Att Spec ]
6629 * No attribute name may appear more than once in the same
6630 * start-tag or empty-element tag.
6631 */
6632 for (i = 0; i < nbatts;i += 2) {
6633 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006634 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006635 xmlFree(attvalue);
6636 goto failed;
6637 }
6638 }
Owen Taylor3473f882001-02-23 17:55:21 +00006639 /*
6640 * Add the pair to atts
6641 */
6642 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006643 maxatts = 22; /* allow for 10 attrs by default */
6644 atts = (const xmlChar **)
6645 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006646 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006647 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006648 if (attvalue != NULL)
6649 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006650 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006651 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006652 ctxt->atts = atts;
6653 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006654 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006655 const xmlChar **n;
6656
Owen Taylor3473f882001-02-23 17:55:21 +00006657 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006658 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006659 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006660 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006661 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006662 if (attvalue != NULL)
6663 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006664 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006665 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006666 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006667 ctxt->atts = atts;
6668 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006669 }
6670 atts[nbatts++] = attname;
6671 atts[nbatts++] = attvalue;
6672 atts[nbatts] = NULL;
6673 atts[nbatts + 1] = NULL;
6674 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006675 if (attvalue != NULL)
6676 xmlFree(attvalue);
6677 }
6678
6679failed:
6680
Daniel Veillard3772de32002-12-17 10:31:45 +00006681 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006682 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6683 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006684 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6686 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006687 }
6688 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006689 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6690 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006691 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6692 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006693 break;
6694 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006695 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006696 GROW;
6697 }
6698
6699 /*
6700 * SAX: Start of Element !
6701 */
6702 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006703 (!ctxt->disableSAX)) {
6704 if (nbatts > 0)
6705 ctxt->sax->startElement(ctxt->userData, name, atts);
6706 else
6707 ctxt->sax->startElement(ctxt->userData, name, NULL);
6708 }
Owen Taylor3473f882001-02-23 17:55:21 +00006709
6710 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006711 /* Free only the content strings */
6712 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006713 if (atts[i] != NULL)
6714 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006715 }
6716 return(name);
6717}
6718
6719/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006720 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006721 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006722 * @line: line of the start tag
6723 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006724 *
6725 * parse an end of tag
6726 *
6727 * [42] ETag ::= '</' Name S? '>'
6728 *
6729 * With namespace
6730 *
6731 * [NS 9] ETag ::= '</' QName S? '>'
6732 */
6733
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006734static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006735xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006736 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006737
6738 GROW;
6739 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006740 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006741 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006742 return;
6743 }
6744 SKIP(2);
6745
Daniel Veillard46de64e2002-05-29 08:21:33 +00006746 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006747
6748 /*
6749 * We should definitely be at the ending "S? '>'" part
6750 */
6751 GROW;
6752 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006753 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006754 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006755 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006756 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006757
6758 /*
6759 * [ WFC: Element Type Match ]
6760 * The Name in an element's end-tag must match the element type in the
6761 * start-tag.
6762 *
6763 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006764 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006765 if (name == NULL) name = BAD_CAST "unparseable";
6766 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006767 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006768 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006769 }
6770
6771 /*
6772 * SAX: End of Tag
6773 */
6774 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6775 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006776 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006777
Daniel Veillarde57ec792003-09-10 10:50:59 +00006778 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006779 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 return;
6781}
6782
6783/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006784 * xmlParseEndTag:
6785 * @ctxt: an XML parser context
6786 *
6787 * parse an end of tag
6788 *
6789 * [42] ETag ::= '</' Name S? '>'
6790 *
6791 * With namespace
6792 *
6793 * [NS 9] ETag ::= '</' QName S? '>'
6794 */
6795
6796void
6797xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006798 xmlParseEndTag1(ctxt, 0);
6799}
Daniel Veillard81273902003-09-30 00:43:48 +00006800#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006801
6802/************************************************************************
6803 * *
6804 * SAX 2 specific operations *
6805 * *
6806 ************************************************************************/
6807
6808static const xmlChar *
6809xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6810 int len = 0, l;
6811 int c;
6812 int count = 0;
6813
6814 /*
6815 * Handler for more complex cases
6816 */
6817 GROW;
6818 c = CUR_CHAR(l);
6819 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006820 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006821 return(NULL);
6822 }
6823
6824 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006825 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006826 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006827 (IS_COMBINING(c)) ||
6828 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006829 if (count++ > 100) {
6830 count = 0;
6831 GROW;
6832 }
6833 len += l;
6834 NEXTL(l);
6835 c = CUR_CHAR(l);
6836 }
6837 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6838}
6839
6840/*
6841 * xmlGetNamespace:
6842 * @ctxt: an XML parser context
6843 * @prefix: the prefix to lookup
6844 *
6845 * Lookup the namespace name for the @prefix (which ca be NULL)
6846 * The prefix must come from the @ctxt->dict dictionnary
6847 *
6848 * Returns the namespace name or NULL if not bound
6849 */
6850static const xmlChar *
6851xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6852 int i;
6853
Daniel Veillarde57ec792003-09-10 10:50:59 +00006854 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006855 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006856 if (ctxt->nsTab[i] == prefix) {
6857 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6858 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006859 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006860 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006861 return(NULL);
6862}
6863
6864/**
6865 * xmlParseNCName:
6866 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006867 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006868 *
6869 * parse an XML name.
6870 *
6871 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6872 * CombiningChar | Extender
6873 *
6874 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6875 *
6876 * Returns the Name parsed or NULL
6877 */
6878
6879static const xmlChar *
6880xmlParseNCName(xmlParserCtxtPtr ctxt) {
6881 const xmlChar *in;
6882 const xmlChar *ret;
6883 int count = 0;
6884
6885 /*
6886 * Accelerator for simple ASCII names
6887 */
6888 in = ctxt->input->cur;
6889 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6890 ((*in >= 0x41) && (*in <= 0x5A)) ||
6891 (*in == '_')) {
6892 in++;
6893 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6894 ((*in >= 0x41) && (*in <= 0x5A)) ||
6895 ((*in >= 0x30) && (*in <= 0x39)) ||
6896 (*in == '_') || (*in == '-') ||
6897 (*in == '.'))
6898 in++;
6899 if ((*in > 0) && (*in < 0x80)) {
6900 count = in - ctxt->input->cur;
6901 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6902 ctxt->input->cur = in;
6903 ctxt->nbChars += count;
6904 ctxt->input->col += count;
6905 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006906 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006907 }
6908 return(ret);
6909 }
6910 }
6911 return(xmlParseNCNameComplex(ctxt));
6912}
6913
6914/**
6915 * xmlParseQName:
6916 * @ctxt: an XML parser context
6917 * @prefix: pointer to store the prefix part
6918 *
6919 * parse an XML Namespace QName
6920 *
6921 * [6] QName ::= (Prefix ':')? LocalPart
6922 * [7] Prefix ::= NCName
6923 * [8] LocalPart ::= NCName
6924 *
6925 * Returns the Name parsed or NULL
6926 */
6927
6928static const xmlChar *
6929xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6930 const xmlChar *l, *p;
6931
6932 GROW;
6933
6934 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006935 if (l == NULL) {
6936 if (CUR == ':') {
6937 l = xmlParseName(ctxt);
6938 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006939 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6940 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006941 *prefix = NULL;
6942 return(l);
6943 }
6944 }
6945 return(NULL);
6946 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006947 if (CUR == ':') {
6948 NEXT;
6949 p = l;
6950 l = xmlParseNCName(ctxt);
6951 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006952 xmlChar *tmp;
6953
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006954 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6955 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006956 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6957 p = xmlDictLookup(ctxt->dict, tmp, -1);
6958 if (tmp != NULL) xmlFree(tmp);
6959 *prefix = NULL;
6960 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006961 }
6962 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006963 xmlChar *tmp;
6964
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006965 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6966 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006967 NEXT;
6968 tmp = (xmlChar *) xmlParseName(ctxt);
6969 if (tmp != NULL) {
6970 tmp = xmlBuildQName(tmp, l, NULL, 0);
6971 l = xmlDictLookup(ctxt->dict, tmp, -1);
6972 if (tmp != NULL) xmlFree(tmp);
6973 *prefix = p;
6974 return(l);
6975 }
6976 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6977 l = xmlDictLookup(ctxt->dict, tmp, -1);
6978 if (tmp != NULL) xmlFree(tmp);
6979 *prefix = p;
6980 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006981 }
6982 *prefix = p;
6983 } else
6984 *prefix = NULL;
6985 return(l);
6986}
6987
6988/**
6989 * xmlParseQNameAndCompare:
6990 * @ctxt: an XML parser context
6991 * @name: the localname
6992 * @prefix: the prefix, if any.
6993 *
6994 * parse an XML name and compares for match
6995 * (specialized for endtag parsing)
6996 *
6997 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6998 * and the name for mismatch
6999 */
7000
7001static const xmlChar *
7002xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7003 xmlChar const *prefix) {
7004 const xmlChar *cmp = name;
7005 const xmlChar *in;
7006 const xmlChar *ret;
7007 const xmlChar *prefix2;
7008
7009 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7010
7011 GROW;
7012 in = ctxt->input->cur;
7013
7014 cmp = prefix;
7015 while (*in != 0 && *in == *cmp) {
7016 ++in;
7017 ++cmp;
7018 }
7019 if ((*cmp == 0) && (*in == ':')) {
7020 in++;
7021 cmp = name;
7022 while (*in != 0 && *in == *cmp) {
7023 ++in;
7024 ++cmp;
7025 }
William M. Brack76e95df2003-10-18 16:20:14 +00007026 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007027 /* success */
7028 ctxt->input->cur = in;
7029 return((const xmlChar*) 1);
7030 }
7031 }
7032 /*
7033 * all strings coms from the dictionary, equality can be done directly
7034 */
7035 ret = xmlParseQName (ctxt, &prefix2);
7036 if ((ret == name) && (prefix == prefix2))
7037 return((const xmlChar*) 1);
7038 return ret;
7039}
7040
7041/**
7042 * xmlParseAttValueInternal:
7043 * @ctxt: an XML parser context
7044 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007045 * @alloc: whether the attribute was reallocated as a new string
7046 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007047 *
7048 * parse a value for an attribute.
7049 * NOTE: if no normalization is needed, the routine will return pointers
7050 * directly from the data buffer.
7051 *
7052 * 3.3.3 Attribute-Value Normalization:
7053 * Before the value of an attribute is passed to the application or
7054 * checked for validity, the XML processor must normalize it as follows:
7055 * - a character reference is processed by appending the referenced
7056 * character to the attribute value
7057 * - an entity reference is processed by recursively processing the
7058 * replacement text of the entity
7059 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7060 * appending #x20 to the normalized value, except that only a single
7061 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7062 * parsed entity or the literal entity value of an internal parsed entity
7063 * - other characters are processed by appending them to the normalized value
7064 * If the declared value is not CDATA, then the XML processor must further
7065 * process the normalized attribute value by discarding any leading and
7066 * trailing space (#x20) characters, and by replacing sequences of space
7067 * (#x20) characters by a single space (#x20) character.
7068 * All attributes for which no declaration has been read should be treated
7069 * by a non-validating parser as if declared CDATA.
7070 *
7071 * Returns the AttValue parsed or NULL. The value has to be freed by the
7072 * caller if it was copied, this can be detected by val[*len] == 0.
7073 */
7074
7075static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007076xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7077 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007078{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007079 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007080 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007081 xmlChar *ret = NULL;
7082
7083 GROW;
7084 in = (xmlChar *) CUR_PTR;
7085 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007086 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007087 return (NULL);
7088 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007089 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007090
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007091 /*
7092 * try to handle in this routine the most common case where no
7093 * allocation of a new string is required and where content is
7094 * pure ASCII.
7095 */
7096 limit = *in++;
7097 end = ctxt->input->end;
7098 start = in;
7099 if (in >= end) {
7100 const xmlChar *oldbase = ctxt->input->base;
7101 GROW;
7102 if (oldbase != ctxt->input->base) {
7103 long delta = ctxt->input->base - oldbase;
7104 start = start + delta;
7105 in = in + delta;
7106 }
7107 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007108 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007109 if (normalize) {
7110 /*
7111 * Skip any leading spaces
7112 */
7113 while ((in < end) && (*in != limit) &&
7114 ((*in == 0x20) || (*in == 0x9) ||
7115 (*in == 0xA) || (*in == 0xD))) {
7116 in++;
7117 start = in;
7118 if (in >= end) {
7119 const xmlChar *oldbase = ctxt->input->base;
7120 GROW;
7121 if (oldbase != ctxt->input->base) {
7122 long delta = ctxt->input->base - oldbase;
7123 start = start + delta;
7124 in = in + delta;
7125 }
7126 end = ctxt->input->end;
7127 }
7128 }
7129 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7130 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7131 if ((*in++ == 0x20) && (*in == 0x20)) break;
7132 if (in >= end) {
7133 const xmlChar *oldbase = ctxt->input->base;
7134 GROW;
7135 if (oldbase != ctxt->input->base) {
7136 long delta = ctxt->input->base - oldbase;
7137 start = start + delta;
7138 in = in + delta;
7139 }
7140 end = ctxt->input->end;
7141 }
7142 }
7143 last = in;
7144 /*
7145 * skip the trailing blanks
7146 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007147 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007148 while ((in < end) && (*in != limit) &&
7149 ((*in == 0x20) || (*in == 0x9) ||
7150 (*in == 0xA) || (*in == 0xD))) {
7151 in++;
7152 if (in >= end) {
7153 const xmlChar *oldbase = ctxt->input->base;
7154 GROW;
7155 if (oldbase != ctxt->input->base) {
7156 long delta = ctxt->input->base - oldbase;
7157 start = start + delta;
7158 in = in + delta;
7159 last = last + delta;
7160 }
7161 end = ctxt->input->end;
7162 }
7163 }
7164 if (*in != limit) goto need_complex;
7165 } else {
7166 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7167 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7168 in++;
7169 if (in >= end) {
7170 const xmlChar *oldbase = ctxt->input->base;
7171 GROW;
7172 if (oldbase != ctxt->input->base) {
7173 long delta = ctxt->input->base - oldbase;
7174 start = start + delta;
7175 in = in + delta;
7176 }
7177 end = ctxt->input->end;
7178 }
7179 }
7180 last = in;
7181 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007182 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007183 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007185 *len = last - start;
7186 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007187 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007188 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007189 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190 }
7191 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007192 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007193 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007194need_complex:
7195 if (alloc) *alloc = 1;
7196 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007197}
7198
7199/**
7200 * xmlParseAttribute2:
7201 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007202 * @pref: the element prefix
7203 * @elem: the element name
7204 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007205 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007206 * @len: an int * to save the length of the attribute
7207 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007208 *
7209 * parse an attribute in the new SAX2 framework.
7210 *
7211 * Returns the attribute name, and the value in *value, .
7212 */
7213
7214static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007215xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7216 const xmlChar *pref, const xmlChar *elem,
7217 const xmlChar **prefix, xmlChar **value,
7218 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007219 const xmlChar *name;
7220 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007221 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007222
7223 *value = NULL;
7224 GROW;
7225 name = xmlParseQName(ctxt, prefix);
7226 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007227 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7228 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007229 return(NULL);
7230 }
7231
7232 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007233 * get the type if needed
7234 */
7235 if (ctxt->attsSpecial != NULL) {
7236 int type;
7237
7238 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7239 pref, elem, *prefix, name);
7240 if (type != 0) normalize = 1;
7241 }
7242
7243 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244 * read the value
7245 */
7246 SKIP_BLANKS;
7247 if (RAW == '=') {
7248 NEXT;
7249 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007250 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007251 ctxt->instate = XML_PARSER_CONTENT;
7252 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007253 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007254 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007255 return(NULL);
7256 }
7257
7258 /*
7259 * Check that xml:lang conforms to the specification
7260 * No more registered as an error, just generate a warning now
7261 * since this was deprecated in XML second edition
7262 */
7263 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7264 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007265 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7266 "Malformed value for xml:lang : %s\n",
7267 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007268 }
7269 }
7270
7271 /*
7272 * Check that xml:space conforms to the specification
7273 */
7274 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7275 if (xmlStrEqual(val, BAD_CAST "default"))
7276 *(ctxt->space) = 0;
7277 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7278 *(ctxt->space) = 1;
7279 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007280 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007281"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7282 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007283 }
7284 }
7285
7286 *value = val;
7287 return(name);
7288}
7289
7290/**
7291 * xmlParseStartTag2:
7292 * @ctxt: an XML parser context
7293 *
7294 * parse a start of tag either for rule element or
7295 * EmptyElement. In both case we don't parse the tag closing chars.
7296 * This routine is called when running SAX2 parsing
7297 *
7298 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7299 *
7300 * [ WFC: Unique Att Spec ]
7301 * No attribute name may appear more than once in the same start-tag or
7302 * empty-element tag.
7303 *
7304 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7305 *
7306 * [ WFC: Unique Att Spec ]
7307 * No attribute name may appear more than once in the same start-tag or
7308 * empty-element tag.
7309 *
7310 * With namespace:
7311 *
7312 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7313 *
7314 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7315 *
7316 * Returns the element name parsed
7317 */
7318
7319static const xmlChar *
7320xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007321 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007322 const xmlChar *localname;
7323 const xmlChar *prefix;
7324 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007325 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 const xmlChar *nsname;
7327 xmlChar *attvalue;
7328 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007329 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007330 int nratts, nbatts, nbdef;
7331 int i, j, nbNs, attval;
7332 const xmlChar *base;
7333 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334
7335 if (RAW != '<') return(NULL);
7336 NEXT1;
7337
7338 /*
7339 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7340 * point since the attribute values may be stored as pointers to
7341 * the buffer and calling SHRINK would destroy them !
7342 * The Shrinking is only possible once the full set of attribute
7343 * callbacks have been done.
7344 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007345reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007346 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007347 base = ctxt->input->base;
7348 cur = ctxt->input->cur - ctxt->input->base;
7349 nbatts = 0;
7350 nratts = 0;
7351 nbdef = 0;
7352 nbNs = 0;
7353 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007354
7355 localname = xmlParseQName(ctxt, &prefix);
7356 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007357 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7358 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359 return(NULL);
7360 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007361 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007362
7363 /*
7364 * Now parse the attributes, it ends up with the ending
7365 *
7366 * (S Attribute)* S?
7367 */
7368 SKIP_BLANKS;
7369 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007370 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371
7372 while ((RAW != '>') &&
7373 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007374 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007375 const xmlChar *q = CUR_PTR;
7376 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007377 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007379 attname = xmlParseAttribute2(ctxt, prefix, localname,
7380 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 if ((attname != NULL) && (attvalue != NULL)) {
7382 if (len < 0) len = xmlStrlen(attvalue);
7383 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7385 xmlURIPtr uri;
7386
7387 if (*URL != 0) {
7388 uri = xmlParseURI((const char *) URL);
7389 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007390 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7391 "xmlns: %s not a valid URI\n",
7392 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007393 } else {
7394 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007395 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7396 "xmlns: URI %s is not absolute\n",
7397 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007398 }
7399 xmlFreeURI(uri);
7400 }
7401 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007402 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007403 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007405 for (j = 1;j <= nbNs;j++)
7406 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7407 break;
7408 if (j <= nbNs)
7409 xmlErrAttributeDup(ctxt, NULL, attname);
7410 else
7411 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007412 if (alloc != 0) xmlFree(attvalue);
7413 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007414 continue;
7415 }
7416 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007417 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7418 xmlURIPtr uri;
7419
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 if (attname == ctxt->str_xml) {
7421 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007422 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7423 "xml namespace prefix mapped to wrong URI\n",
7424 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007425 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007426 /*
7427 * Do not keep a namespace definition node
7428 */
7429 if (alloc != 0) xmlFree(attvalue);
7430 SKIP_BLANKS;
7431 continue;
7432 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007433 uri = xmlParseURI((const char *) URL);
7434 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007435 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7436 "xmlns:%s: '%s' is not a valid URI\n",
7437 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007438 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007439 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007440 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7441 "xmlns:%s: URI %s is not absolute\n",
7442 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007443 }
7444 xmlFreeURI(uri);
7445 }
7446
Daniel Veillard0fb18932003-09-07 09:14:37 +00007447 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007448 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007449 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007450 for (j = 1;j <= nbNs;j++)
7451 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7452 break;
7453 if (j <= nbNs)
7454 xmlErrAttributeDup(ctxt, aprefix, attname);
7455 else
7456 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007457 if (alloc != 0) xmlFree(attvalue);
7458 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007459 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007460 continue;
7461 }
7462
7463 /*
7464 * Add the pair to atts
7465 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007466 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7467 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007468 if (attvalue[len] == 0)
7469 xmlFree(attvalue);
7470 goto failed;
7471 }
7472 maxatts = ctxt->maxatts;
7473 atts = ctxt->atts;
7474 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007475 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 atts[nbatts++] = attname;
7477 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007478 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 atts[nbatts++] = attvalue;
7480 attvalue += len;
7481 atts[nbatts++] = attvalue;
7482 /*
7483 * tag if some deallocation is needed
7484 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007485 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007486 } else {
7487 if ((attvalue != NULL) && (attvalue[len] == 0))
7488 xmlFree(attvalue);
7489 }
7490
7491failed:
7492
7493 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007494 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007495 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7496 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007497 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7499 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007500 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007501 }
7502 SKIP_BLANKS;
7503 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7504 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007505 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007507 break;
7508 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007510 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 }
7512
Daniel Veillard0fb18932003-09-07 09:14:37 +00007513 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007514 * The attributes defaulting
7515 */
7516 if (ctxt->attsDefault != NULL) {
7517 xmlDefAttrsPtr defaults;
7518
7519 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7520 if (defaults != NULL) {
7521 for (i = 0;i < defaults->nbAttrs;i++) {
7522 attname = defaults->values[4 * i];
7523 aprefix = defaults->values[4 * i + 1];
7524
7525 /*
7526 * special work for namespaces defaulted defs
7527 */
7528 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7529 /*
7530 * check that it's not a defined namespace
7531 */
7532 for (j = 1;j <= nbNs;j++)
7533 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7534 break;
7535 if (j <= nbNs) continue;
7536
7537 nsname = xmlGetNamespace(ctxt, NULL);
7538 if (nsname != defaults->values[4 * i + 2]) {
7539 if (nsPush(ctxt, NULL,
7540 defaults->values[4 * i + 2]) > 0)
7541 nbNs++;
7542 }
7543 } else if (aprefix == ctxt->str_xmlns) {
7544 /*
7545 * check that it's not a defined namespace
7546 */
7547 for (j = 1;j <= nbNs;j++)
7548 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7549 break;
7550 if (j <= nbNs) continue;
7551
7552 nsname = xmlGetNamespace(ctxt, attname);
7553 if (nsname != defaults->values[2]) {
7554 if (nsPush(ctxt, attname,
7555 defaults->values[4 * i + 2]) > 0)
7556 nbNs++;
7557 }
7558 } else {
7559 /*
7560 * check that it's not a defined attribute
7561 */
7562 for (j = 0;j < nbatts;j+=5) {
7563 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7564 break;
7565 }
7566 if (j < nbatts) continue;
7567
7568 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7569 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007570 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007571 }
7572 maxatts = ctxt->maxatts;
7573 atts = ctxt->atts;
7574 }
7575 atts[nbatts++] = attname;
7576 atts[nbatts++] = aprefix;
7577 if (aprefix == NULL)
7578 atts[nbatts++] = NULL;
7579 else
7580 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7581 atts[nbatts++] = defaults->values[4 * i + 2];
7582 atts[nbatts++] = defaults->values[4 * i + 3];
7583 nbdef++;
7584 }
7585 }
7586 }
7587 }
7588
Daniel Veillarde70c8772003-11-25 07:21:18 +00007589 /*
7590 * The attributes checkings
7591 */
7592 for (i = 0; i < nbatts;i += 5) {
7593 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7594 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7595 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7596 "Namespace prefix %s for %s on %s is not defined\n",
7597 atts[i + 1], atts[i], localname);
7598 }
7599 atts[i + 2] = nsname;
7600 /*
7601 * [ WFC: Unique Att Spec ]
7602 * No attribute name may appear more than once in the same
7603 * start-tag or empty-element tag.
7604 * As extended by the Namespace in XML REC.
7605 */
7606 for (j = 0; j < i;j += 5) {
7607 if (atts[i] == atts[j]) {
7608 if (atts[i+1] == atts[j+1]) {
7609 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7610 break;
7611 }
7612 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7613 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7614 "Namespaced Attribute %s in '%s' redefined\n",
7615 atts[i], nsname, NULL);
7616 break;
7617 }
7618 }
7619 }
7620 }
7621
Daniel Veillarde57ec792003-09-10 10:50:59 +00007622 nsname = xmlGetNamespace(ctxt, prefix);
7623 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007624 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7625 "Namespace prefix %s on %s is not defined\n",
7626 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007627 }
7628 *pref = prefix;
7629 *URI = nsname;
7630
7631 /*
7632 * SAX: Start of Element !
7633 */
7634 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7635 (!ctxt->disableSAX)) {
7636 if (nbNs > 0)
7637 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7638 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7639 nbatts / 5, nbdef, atts);
7640 else
7641 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7642 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7643 }
7644
7645 /*
7646 * Free up attribute allocated strings if needed
7647 */
7648 if (attval != 0) {
7649 for (i = 3,j = 0; j < nratts;i += 5,j++)
7650 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7651 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007652 }
7653
7654 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007655
7656base_changed:
7657 /*
7658 * the attribute strings are valid iif the base didn't changed
7659 */
7660 if (attval != 0) {
7661 for (i = 3,j = 0; j < nratts;i += 5,j++)
7662 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7663 xmlFree((xmlChar *) atts[i]);
7664 }
7665 ctxt->input->cur = ctxt->input->base + cur;
7666 if (ctxt->wellFormed == 1) {
7667 goto reparse;
7668 }
7669 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007670}
7671
7672/**
7673 * xmlParseEndTag2:
7674 * @ctxt: an XML parser context
7675 * @line: line of the start tag
7676 * @nsNr: number of namespaces on the start tag
7677 *
7678 * parse an end of tag
7679 *
7680 * [42] ETag ::= '</' Name S? '>'
7681 *
7682 * With namespace
7683 *
7684 * [NS 9] ETag ::= '</' QName S? '>'
7685 */
7686
7687static void
7688xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007689 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007690 const xmlChar *name;
7691
7692 GROW;
7693 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007694 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007695 return;
7696 }
7697 SKIP(2);
7698
William M. Brack13dfa872004-09-18 04:52:08 +00007699 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007700 if (ctxt->input->cur[tlen] == '>') {
7701 ctxt->input->cur += tlen + 1;
7702 goto done;
7703 }
7704 ctxt->input->cur += tlen;
7705 name = (xmlChar*)1;
7706 } else {
7707 if (prefix == NULL)
7708 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7709 else
7710 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7711 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712
7713 /*
7714 * We should definitely be at the ending "S? '>'" part
7715 */
7716 GROW;
7717 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007718 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007719 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 } else
7721 NEXT1;
7722
7723 /*
7724 * [ WFC: Element Type Match ]
7725 * The Name in an element's end-tag must match the element type in the
7726 * start-tag.
7727 *
7728 */
7729 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007730 if (name == NULL) name = BAD_CAST "unparseable";
7731 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007733 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007734 }
7735
7736 /*
7737 * SAX: End of Tag
7738 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007739done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007740 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7741 (!ctxt->disableSAX))
7742 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7743
Daniel Veillard0fb18932003-09-07 09:14:37 +00007744 spacePop(ctxt);
7745 if (nsNr != 0)
7746 nsPop(ctxt, nsNr);
7747 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007748}
7749
7750/**
Owen Taylor3473f882001-02-23 17:55:21 +00007751 * xmlParseCDSect:
7752 * @ctxt: an XML parser context
7753 *
7754 * Parse escaped pure raw content.
7755 *
7756 * [18] CDSect ::= CDStart CData CDEnd
7757 *
7758 * [19] CDStart ::= '<![CDATA['
7759 *
7760 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7761 *
7762 * [21] CDEnd ::= ']]>'
7763 */
7764void
7765xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7766 xmlChar *buf = NULL;
7767 int len = 0;
7768 int size = XML_PARSER_BUFFER_SIZE;
7769 int r, rl;
7770 int s, sl;
7771 int cur, l;
7772 int count = 0;
7773
Daniel Veillard8f597c32003-10-06 08:19:27 +00007774 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007775 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007776 SKIP(9);
7777 } else
7778 return;
7779
7780 ctxt->instate = XML_PARSER_CDATA_SECTION;
7781 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007782 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007783 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007784 ctxt->instate = XML_PARSER_CONTENT;
7785 return;
7786 }
7787 NEXTL(rl);
7788 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007789 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007790 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007791 ctxt->instate = XML_PARSER_CONTENT;
7792 return;
7793 }
7794 NEXTL(sl);
7795 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007796 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007797 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007798 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007799 return;
7800 }
William M. Brack871611b2003-10-18 04:53:14 +00007801 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007802 ((r != ']') || (s != ']') || (cur != '>'))) {
7803 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007804 xmlChar *tmp;
7805
Owen Taylor3473f882001-02-23 17:55:21 +00007806 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007807 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7808 if (tmp == NULL) {
7809 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007811 return;
7812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007813 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007814 }
7815 COPY_BUF(rl,buf,len,r);
7816 r = s;
7817 rl = sl;
7818 s = cur;
7819 sl = l;
7820 count++;
7821 if (count > 50) {
7822 GROW;
7823 count = 0;
7824 }
7825 NEXTL(l);
7826 cur = CUR_CHAR(l);
7827 }
7828 buf[len] = 0;
7829 ctxt->instate = XML_PARSER_CONTENT;
7830 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007831 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007832 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007833 xmlFree(buf);
7834 return;
7835 }
7836 NEXTL(l);
7837
7838 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007839 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007840 */
7841 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7842 if (ctxt->sax->cdataBlock != NULL)
7843 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007844 else if (ctxt->sax->characters != NULL)
7845 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007846 }
7847 xmlFree(buf);
7848}
7849
7850/**
7851 * xmlParseContent:
7852 * @ctxt: an XML parser context
7853 *
7854 * Parse a content:
7855 *
7856 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7857 */
7858
7859void
7860xmlParseContent(xmlParserCtxtPtr ctxt) {
7861 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007862 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007863 ((RAW != '<') || (NXT(1) != '/'))) {
7864 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007865 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007866 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007867
7868 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007869 * First case : a Processing Instruction.
7870 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007871 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007872 xmlParsePI(ctxt);
7873 }
7874
7875 /*
7876 * Second case : a CDSection
7877 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007878 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007879 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007880 xmlParseCDSect(ctxt);
7881 }
7882
7883 /*
7884 * Third case : a comment
7885 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007886 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007887 (NXT(2) == '-') && (NXT(3) == '-')) {
7888 xmlParseComment(ctxt);
7889 ctxt->instate = XML_PARSER_CONTENT;
7890 }
7891
7892 /*
7893 * Fourth case : a sub-element.
7894 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007895 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007896 xmlParseElement(ctxt);
7897 }
7898
7899 /*
7900 * Fifth case : a reference. If if has not been resolved,
7901 * parsing returns it's Name, create the node
7902 */
7903
Daniel Veillard21a0f912001-02-25 19:54:14 +00007904 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007905 xmlParseReference(ctxt);
7906 }
7907
7908 /*
7909 * Last case, text. Note that References are handled directly.
7910 */
7911 else {
7912 xmlParseCharData(ctxt, 0);
7913 }
7914
7915 GROW;
7916 /*
7917 * Pop-up of finished entities.
7918 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007919 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007920 xmlPopInput(ctxt);
7921 SHRINK;
7922
Daniel Veillardfdc91562002-07-01 21:52:03 +00007923 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007924 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7925 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007926 ctxt->instate = XML_PARSER_EOF;
7927 break;
7928 }
7929 }
7930}
7931
7932/**
7933 * xmlParseElement:
7934 * @ctxt: an XML parser context
7935 *
7936 * parse an XML element, this is highly recursive
7937 *
7938 * [39] element ::= EmptyElemTag | STag content ETag
7939 *
7940 * [ WFC: Element Type Match ]
7941 * The Name in an element's end-tag must match the element type in the
7942 * start-tag.
7943 *
Owen Taylor3473f882001-02-23 17:55:21 +00007944 */
7945
7946void
7947xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007948 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007949 const xmlChar *prefix;
7950 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007951 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007952 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007953 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007955
7956 /* Capture start position */
7957 if (ctxt->record_info) {
7958 node_info.begin_pos = ctxt->input->consumed +
7959 (CUR_PTR - ctxt->input->base);
7960 node_info.begin_line = ctxt->input->line;
7961 }
7962
7963 if (ctxt->spaceNr == 0)
7964 spacePush(ctxt, -1);
7965 else
7966 spacePush(ctxt, *ctxt->space);
7967
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007968 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007969#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007971#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007972 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007973#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974 else
7975 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007976#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007977 if (name == NULL) {
7978 spacePop(ctxt);
7979 return;
7980 }
7981 namePush(ctxt, name);
7982 ret = ctxt->node;
7983
Daniel Veillard4432df22003-09-28 18:58:27 +00007984#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007985 /*
7986 * [ VC: Root Element Type ]
7987 * The Name in the document type declaration must match the element
7988 * type of the root element.
7989 */
7990 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7991 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7992 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007993#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007994
7995 /*
7996 * Check for an Empty Element.
7997 */
7998 if ((RAW == '/') && (NXT(1) == '>')) {
7999 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008000 if (ctxt->sax2) {
8001 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8002 (!ctxt->disableSAX))
8003 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008004#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008005 } else {
8006 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8007 (!ctxt->disableSAX))
8008 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008009#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008010 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008011 namePop(ctxt);
8012 spacePop(ctxt);
8013 if (nsNr != ctxt->nsNr)
8014 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008015 if ( ret != NULL && ctxt->record_info ) {
8016 node_info.end_pos = ctxt->input->consumed +
8017 (CUR_PTR - ctxt->input->base);
8018 node_info.end_line = ctxt->input->line;
8019 node_info.node = ret;
8020 xmlParserAddNodeInfo(ctxt, &node_info);
8021 }
8022 return;
8023 }
8024 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008025 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008026 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008027 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8028 "Couldn't find end of Start Tag %s line %d\n",
8029 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008030
8031 /*
8032 * end of parsing of this node.
8033 */
8034 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008035 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008036 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008037 if (nsNr != ctxt->nsNr)
8038 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008039
8040 /*
8041 * Capture end position and add node
8042 */
8043 if ( ret != NULL && ctxt->record_info ) {
8044 node_info.end_pos = ctxt->input->consumed +
8045 (CUR_PTR - ctxt->input->base);
8046 node_info.end_line = ctxt->input->line;
8047 node_info.node = ret;
8048 xmlParserAddNodeInfo(ctxt, &node_info);
8049 }
8050 return;
8051 }
8052
8053 /*
8054 * Parse the content of the element:
8055 */
8056 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008057 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008058 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008059 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008060 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008061
8062 /*
8063 * end of parsing of this node.
8064 */
8065 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008066 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008067 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008068 if (nsNr != ctxt->nsNr)
8069 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008070 return;
8071 }
8072
8073 /*
8074 * parse the end of tag: '</' should be here.
8075 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008076 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008077 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008078 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008079 }
8080#ifdef LIBXML_SAX1_ENABLED
8081 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008082 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008083#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008084
8085 /*
8086 * Capture end position and add node
8087 */
8088 if ( ret != NULL && ctxt->record_info ) {
8089 node_info.end_pos = ctxt->input->consumed +
8090 (CUR_PTR - ctxt->input->base);
8091 node_info.end_line = ctxt->input->line;
8092 node_info.node = ret;
8093 xmlParserAddNodeInfo(ctxt, &node_info);
8094 }
8095}
8096
8097/**
8098 * xmlParseVersionNum:
8099 * @ctxt: an XML parser context
8100 *
8101 * parse the XML version value.
8102 *
8103 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8104 *
8105 * Returns the string giving the XML version number, or NULL
8106 */
8107xmlChar *
8108xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8109 xmlChar *buf = NULL;
8110 int len = 0;
8111 int size = 10;
8112 xmlChar cur;
8113
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008114 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008115 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008116 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008117 return(NULL);
8118 }
8119 cur = CUR;
8120 while (((cur >= 'a') && (cur <= 'z')) ||
8121 ((cur >= 'A') && (cur <= 'Z')) ||
8122 ((cur >= '0') && (cur <= '9')) ||
8123 (cur == '_') || (cur == '.') ||
8124 (cur == ':') || (cur == '-')) {
8125 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008126 xmlChar *tmp;
8127
Owen Taylor3473f882001-02-23 17:55:21 +00008128 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008129 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8130 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008131 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008132 return(NULL);
8133 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008134 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008135 }
8136 buf[len++] = cur;
8137 NEXT;
8138 cur=CUR;
8139 }
8140 buf[len] = 0;
8141 return(buf);
8142}
8143
8144/**
8145 * xmlParseVersionInfo:
8146 * @ctxt: an XML parser context
8147 *
8148 * parse the XML version.
8149 *
8150 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8151 *
8152 * [25] Eq ::= S? '=' S?
8153 *
8154 * Returns the version string, e.g. "1.0"
8155 */
8156
8157xmlChar *
8158xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8159 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008160
Daniel Veillarda07050d2003-10-19 14:46:32 +00008161 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008162 SKIP(7);
8163 SKIP_BLANKS;
8164 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008165 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008166 return(NULL);
8167 }
8168 NEXT;
8169 SKIP_BLANKS;
8170 if (RAW == '"') {
8171 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008172 version = xmlParseVersionNum(ctxt);
8173 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008174 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008175 } else
8176 NEXT;
8177 } else if (RAW == '\''){
8178 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008179 version = xmlParseVersionNum(ctxt);
8180 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008181 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008182 } else
8183 NEXT;
8184 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008185 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008186 }
8187 }
8188 return(version);
8189}
8190
8191/**
8192 * xmlParseEncName:
8193 * @ctxt: an XML parser context
8194 *
8195 * parse the XML encoding name
8196 *
8197 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8198 *
8199 * Returns the encoding name value or NULL
8200 */
8201xmlChar *
8202xmlParseEncName(xmlParserCtxtPtr ctxt) {
8203 xmlChar *buf = NULL;
8204 int len = 0;
8205 int size = 10;
8206 xmlChar cur;
8207
8208 cur = CUR;
8209 if (((cur >= 'a') && (cur <= 'z')) ||
8210 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008211 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008212 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008213 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 return(NULL);
8215 }
8216
8217 buf[len++] = cur;
8218 NEXT;
8219 cur = CUR;
8220 while (((cur >= 'a') && (cur <= 'z')) ||
8221 ((cur >= 'A') && (cur <= 'Z')) ||
8222 ((cur >= '0') && (cur <= '9')) ||
8223 (cur == '.') || (cur == '_') ||
8224 (cur == '-')) {
8225 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008226 xmlChar *tmp;
8227
Owen Taylor3473f882001-02-23 17:55:21 +00008228 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008229 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8230 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008231 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008232 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 return(NULL);
8234 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008235 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008236 }
8237 buf[len++] = cur;
8238 NEXT;
8239 cur = CUR;
8240 if (cur == 0) {
8241 SHRINK;
8242 GROW;
8243 cur = CUR;
8244 }
8245 }
8246 buf[len] = 0;
8247 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008248 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 }
8250 return(buf);
8251}
8252
8253/**
8254 * xmlParseEncodingDecl:
8255 * @ctxt: an XML parser context
8256 *
8257 * parse the XML encoding declaration
8258 *
8259 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8260 *
8261 * this setups the conversion filters.
8262 *
8263 * Returns the encoding value or NULL
8264 */
8265
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008266const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008267xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8268 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008269
8270 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008271 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008272 SKIP(8);
8273 SKIP_BLANKS;
8274 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008275 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008276 return(NULL);
8277 }
8278 NEXT;
8279 SKIP_BLANKS;
8280 if (RAW == '"') {
8281 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008282 encoding = xmlParseEncName(ctxt);
8283 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008284 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008285 } else
8286 NEXT;
8287 } else if (RAW == '\''){
8288 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008289 encoding = xmlParseEncName(ctxt);
8290 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008291 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008292 } else
8293 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008294 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008295 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008296 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008297 /*
8298 * UTF-16 encoding stwich has already taken place at this stage,
8299 * more over the little-endian/big-endian selection is already done
8300 */
8301 if ((encoding != NULL) &&
8302 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8303 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008304 if (ctxt->encoding != NULL)
8305 xmlFree((xmlChar *) ctxt->encoding);
8306 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008307 }
8308 /*
8309 * UTF-8 encoding is handled natively
8310 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008311 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008312 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8313 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008314 if (ctxt->encoding != NULL)
8315 xmlFree((xmlChar *) ctxt->encoding);
8316 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008317 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008318 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008319 xmlCharEncodingHandlerPtr handler;
8320
8321 if (ctxt->input->encoding != NULL)
8322 xmlFree((xmlChar *) ctxt->input->encoding);
8323 ctxt->input->encoding = encoding;
8324
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008325 handler = xmlFindCharEncodingHandler((const char *) encoding);
8326 if (handler != NULL) {
8327 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008328 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008329 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008330 "Unsupported encoding %s\n", encoding);
8331 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008332 }
8333 }
8334 }
8335 return(encoding);
8336}
8337
8338/**
8339 * xmlParseSDDecl:
8340 * @ctxt: an XML parser context
8341 *
8342 * parse the XML standalone declaration
8343 *
8344 * [32] SDDecl ::= S 'standalone' Eq
8345 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8346 *
8347 * [ VC: Standalone Document Declaration ]
8348 * TODO The standalone document declaration must have the value "no"
8349 * if any external markup declarations contain declarations of:
8350 * - attributes with default values, if elements to which these
8351 * attributes apply appear in the document without specifications
8352 * of values for these attributes, or
8353 * - entities (other than amp, lt, gt, apos, quot), if references
8354 * to those entities appear in the document, or
8355 * - attributes with values subject to normalization, where the
8356 * attribute appears in the document with a value which will change
8357 * as a result of normalization, or
8358 * - element types with element content, if white space occurs directly
8359 * within any instance of those types.
8360 *
8361 * Returns 1 if standalone, 0 otherwise
8362 */
8363
8364int
8365xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8366 int standalone = -1;
8367
8368 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008369 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008370 SKIP(10);
8371 SKIP_BLANKS;
8372 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008373 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008374 return(standalone);
8375 }
8376 NEXT;
8377 SKIP_BLANKS;
8378 if (RAW == '\''){
8379 NEXT;
8380 if ((RAW == 'n') && (NXT(1) == 'o')) {
8381 standalone = 0;
8382 SKIP(2);
8383 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8384 (NXT(2) == 's')) {
8385 standalone = 1;
8386 SKIP(3);
8387 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008388 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 }
8390 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008391 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008392 } else
8393 NEXT;
8394 } else if (RAW == '"'){
8395 NEXT;
8396 if ((RAW == 'n') && (NXT(1) == 'o')) {
8397 standalone = 0;
8398 SKIP(2);
8399 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8400 (NXT(2) == 's')) {
8401 standalone = 1;
8402 SKIP(3);
8403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 }
8406 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008407 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008408 } else
8409 NEXT;
8410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008412 }
8413 }
8414 return(standalone);
8415}
8416
8417/**
8418 * xmlParseXMLDecl:
8419 * @ctxt: an XML parser context
8420 *
8421 * parse an XML declaration header
8422 *
8423 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8424 */
8425
8426void
8427xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8428 xmlChar *version;
8429
8430 /*
8431 * We know that '<?xml' is here.
8432 */
8433 SKIP(5);
8434
William M. Brack76e95df2003-10-18 16:20:14 +00008435 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8437 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008438 }
8439 SKIP_BLANKS;
8440
8441 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008442 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008443 */
8444 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008445 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008446 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008447 } else {
8448 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8449 /*
8450 * TODO: Blueberry should be detected here
8451 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008452 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8453 "Unsupported version '%s'\n",
8454 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008455 }
8456 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008457 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008458 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008459 }
Owen Taylor3473f882001-02-23 17:55:21 +00008460
8461 /*
8462 * We may have the encoding declaration
8463 */
William M. Brack76e95df2003-10-18 16:20:14 +00008464 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008465 if ((RAW == '?') && (NXT(1) == '>')) {
8466 SKIP(2);
8467 return;
8468 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008470 }
8471 xmlParseEncodingDecl(ctxt);
8472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8473 /*
8474 * The XML REC instructs us to stop parsing right here
8475 */
8476 return;
8477 }
8478
8479 /*
8480 * We may have the standalone status.
8481 */
William M. Brack76e95df2003-10-18 16:20:14 +00008482 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008483 if ((RAW == '?') && (NXT(1) == '>')) {
8484 SKIP(2);
8485 return;
8486 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008488 }
8489 SKIP_BLANKS;
8490 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8491
8492 SKIP_BLANKS;
8493 if ((RAW == '?') && (NXT(1) == '>')) {
8494 SKIP(2);
8495 } else if (RAW == '>') {
8496 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008497 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008498 NEXT;
8499 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008500 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008501 MOVETO_ENDTAG(CUR_PTR);
8502 NEXT;
8503 }
8504}
8505
8506/**
8507 * xmlParseMisc:
8508 * @ctxt: an XML parser context
8509 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008510 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008511 *
8512 * [27] Misc ::= Comment | PI | S
8513 */
8514
8515void
8516xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008517 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008518 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008519 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008520 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008521 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008522 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008523 NEXT;
8524 } else
8525 xmlParseComment(ctxt);
8526 }
8527}
8528
8529/**
8530 * xmlParseDocument:
8531 * @ctxt: an XML parser context
8532 *
8533 * parse an XML document (and build a tree if using the standard SAX
8534 * interface).
8535 *
8536 * [1] document ::= prolog element Misc*
8537 *
8538 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8539 *
8540 * Returns 0, -1 in case of error. the parser context is augmented
8541 * as a result of the parsing.
8542 */
8543
8544int
8545xmlParseDocument(xmlParserCtxtPtr ctxt) {
8546 xmlChar start[4];
8547 xmlCharEncoding enc;
8548
8549 xmlInitParser();
8550
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008551 if ((ctxt == NULL) || (ctxt->input == NULL))
8552 return(-1);
8553
Owen Taylor3473f882001-02-23 17:55:21 +00008554 GROW;
8555
8556 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008557 * SAX: detecting the level.
8558 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008559 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008560
8561 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008562 * SAX: beginning of the document processing.
8563 */
8564 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8565 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8566
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008567 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8568 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008569 /*
8570 * Get the 4 first bytes and decode the charset
8571 * if enc != XML_CHAR_ENCODING_NONE
8572 * plug some encoding conversion routines.
8573 */
8574 start[0] = RAW;
8575 start[1] = NXT(1);
8576 start[2] = NXT(2);
8577 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008578 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008579 if (enc != XML_CHAR_ENCODING_NONE) {
8580 xmlSwitchEncoding(ctxt, enc);
8581 }
Owen Taylor3473f882001-02-23 17:55:21 +00008582 }
8583
8584
8585 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008586 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587 }
8588
8589 /*
8590 * Check for the XMLDecl in the Prolog.
8591 */
8592 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008593 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008594
8595 /*
8596 * Note that we will switch encoding on the fly.
8597 */
8598 xmlParseXMLDecl(ctxt);
8599 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8600 /*
8601 * The XML REC instructs us to stop parsing right here
8602 */
8603 return(-1);
8604 }
8605 ctxt->standalone = ctxt->input->standalone;
8606 SKIP_BLANKS;
8607 } else {
8608 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8609 }
8610 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8611 ctxt->sax->startDocument(ctxt->userData);
8612
8613 /*
8614 * The Misc part of the Prolog
8615 */
8616 GROW;
8617 xmlParseMisc(ctxt);
8618
8619 /*
8620 * Then possibly doc type declaration(s) and more Misc
8621 * (doctypedecl Misc*)?
8622 */
8623 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008624 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008625
8626 ctxt->inSubset = 1;
8627 xmlParseDocTypeDecl(ctxt);
8628 if (RAW == '[') {
8629 ctxt->instate = XML_PARSER_DTD;
8630 xmlParseInternalSubset(ctxt);
8631 }
8632
8633 /*
8634 * Create and update the external subset.
8635 */
8636 ctxt->inSubset = 2;
8637 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8638 (!ctxt->disableSAX))
8639 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8640 ctxt->extSubSystem, ctxt->extSubURI);
8641 ctxt->inSubset = 0;
8642
8643
8644 ctxt->instate = XML_PARSER_PROLOG;
8645 xmlParseMisc(ctxt);
8646 }
8647
8648 /*
8649 * Time to start parsing the tree itself
8650 */
8651 GROW;
8652 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008653 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8654 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008655 } else {
8656 ctxt->instate = XML_PARSER_CONTENT;
8657 xmlParseElement(ctxt);
8658 ctxt->instate = XML_PARSER_EPILOG;
8659
8660
8661 /*
8662 * The Misc part at the end
8663 */
8664 xmlParseMisc(ctxt);
8665
Daniel Veillard561b7f82002-03-20 21:55:57 +00008666 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008667 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008668 }
8669 ctxt->instate = XML_PARSER_EOF;
8670 }
8671
8672 /*
8673 * SAX: end of the document processing.
8674 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008675 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008676 ctxt->sax->endDocument(ctxt->userData);
8677
Daniel Veillard5997aca2002-03-18 18:36:20 +00008678 /*
8679 * Remove locally kept entity definitions if the tree was not built
8680 */
8681 if ((ctxt->myDoc != NULL) &&
8682 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8683 xmlFreeDoc(ctxt->myDoc);
8684 ctxt->myDoc = NULL;
8685 }
8686
Daniel Veillardc7612992002-02-17 22:47:37 +00008687 if (! ctxt->wellFormed) {
8688 ctxt->valid = 0;
8689 return(-1);
8690 }
Owen Taylor3473f882001-02-23 17:55:21 +00008691 return(0);
8692}
8693
8694/**
8695 * xmlParseExtParsedEnt:
8696 * @ctxt: an XML parser context
8697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008698 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008699 * An external general parsed entity is well-formed if it matches the
8700 * production labeled extParsedEnt.
8701 *
8702 * [78] extParsedEnt ::= TextDecl? content
8703 *
8704 * Returns 0, -1 in case of error. the parser context is augmented
8705 * as a result of the parsing.
8706 */
8707
8708int
8709xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8710 xmlChar start[4];
8711 xmlCharEncoding enc;
8712
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008713 if ((ctxt == NULL) || (ctxt->input == NULL))
8714 return(-1);
8715
Owen Taylor3473f882001-02-23 17:55:21 +00008716 xmlDefaultSAXHandlerInit();
8717
Daniel Veillard309f81d2003-09-23 09:02:53 +00008718 xmlDetectSAX2(ctxt);
8719
Owen Taylor3473f882001-02-23 17:55:21 +00008720 GROW;
8721
8722 /*
8723 * SAX: beginning of the document processing.
8724 */
8725 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8726 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8727
8728 /*
8729 * Get the 4 first bytes and decode the charset
8730 * if enc != XML_CHAR_ENCODING_NONE
8731 * plug some encoding conversion routines.
8732 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008733 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8734 start[0] = RAW;
8735 start[1] = NXT(1);
8736 start[2] = NXT(2);
8737 start[3] = NXT(3);
8738 enc = xmlDetectCharEncoding(start, 4);
8739 if (enc != XML_CHAR_ENCODING_NONE) {
8740 xmlSwitchEncoding(ctxt, enc);
8741 }
Owen Taylor3473f882001-02-23 17:55:21 +00008742 }
8743
8744
8745 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008746 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008747 }
8748
8749 /*
8750 * Check for the XMLDecl in the Prolog.
8751 */
8752 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008753 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008754
8755 /*
8756 * Note that we will switch encoding on the fly.
8757 */
8758 xmlParseXMLDecl(ctxt);
8759 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8760 /*
8761 * The XML REC instructs us to stop parsing right here
8762 */
8763 return(-1);
8764 }
8765 SKIP_BLANKS;
8766 } else {
8767 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8768 }
8769 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8770 ctxt->sax->startDocument(ctxt->userData);
8771
8772 /*
8773 * Doing validity checking on chunk doesn't make sense
8774 */
8775 ctxt->instate = XML_PARSER_CONTENT;
8776 ctxt->validate = 0;
8777 ctxt->loadsubset = 0;
8778 ctxt->depth = 0;
8779
8780 xmlParseContent(ctxt);
8781
8782 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008783 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008784 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008785 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008786 }
8787
8788 /*
8789 * SAX: end of the document processing.
8790 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008791 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008792 ctxt->sax->endDocument(ctxt->userData);
8793
8794 if (! ctxt->wellFormed) return(-1);
8795 return(0);
8796}
8797
Daniel Veillard73b013f2003-09-30 12:36:01 +00008798#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008799/************************************************************************
8800 * *
8801 * Progressive parsing interfaces *
8802 * *
8803 ************************************************************************/
8804
8805/**
8806 * xmlParseLookupSequence:
8807 * @ctxt: an XML parser context
8808 * @first: the first char to lookup
8809 * @next: the next char to lookup or zero
8810 * @third: the next char to lookup or zero
8811 *
8812 * Try to find if a sequence (first, next, third) or just (first next) or
8813 * (first) is available in the input stream.
8814 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8815 * to avoid rescanning sequences of bytes, it DOES change the state of the
8816 * parser, do not use liberally.
8817 *
8818 * Returns the index to the current parsing point if the full sequence
8819 * is available, -1 otherwise.
8820 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008821static int
Owen Taylor3473f882001-02-23 17:55:21 +00008822xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8823 xmlChar next, xmlChar third) {
8824 int base, len;
8825 xmlParserInputPtr in;
8826 const xmlChar *buf;
8827
8828 in = ctxt->input;
8829 if (in == NULL) return(-1);
8830 base = in->cur - in->base;
8831 if (base < 0) return(-1);
8832 if (ctxt->checkIndex > base)
8833 base = ctxt->checkIndex;
8834 if (in->buf == NULL) {
8835 buf = in->base;
8836 len = in->length;
8837 } else {
8838 buf = in->buf->buffer->content;
8839 len = in->buf->buffer->use;
8840 }
8841 /* take into account the sequence length */
8842 if (third) len -= 2;
8843 else if (next) len --;
8844 for (;base < len;base++) {
8845 if (buf[base] == first) {
8846 if (third != 0) {
8847 if ((buf[base + 1] != next) ||
8848 (buf[base + 2] != third)) continue;
8849 } else if (next != 0) {
8850 if (buf[base + 1] != next) continue;
8851 }
8852 ctxt->checkIndex = 0;
8853#ifdef DEBUG_PUSH
8854 if (next == 0)
8855 xmlGenericError(xmlGenericErrorContext,
8856 "PP: lookup '%c' found at %d\n",
8857 first, base);
8858 else if (third == 0)
8859 xmlGenericError(xmlGenericErrorContext,
8860 "PP: lookup '%c%c' found at %d\n",
8861 first, next, base);
8862 else
8863 xmlGenericError(xmlGenericErrorContext,
8864 "PP: lookup '%c%c%c' found at %d\n",
8865 first, next, third, base);
8866#endif
8867 return(base - (in->cur - in->base));
8868 }
8869 }
8870 ctxt->checkIndex = base;
8871#ifdef DEBUG_PUSH
8872 if (next == 0)
8873 xmlGenericError(xmlGenericErrorContext,
8874 "PP: lookup '%c' failed\n", first);
8875 else if (third == 0)
8876 xmlGenericError(xmlGenericErrorContext,
8877 "PP: lookup '%c%c' failed\n", first, next);
8878 else
8879 xmlGenericError(xmlGenericErrorContext,
8880 "PP: lookup '%c%c%c' failed\n", first, next, third);
8881#endif
8882 return(-1);
8883}
8884
8885/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008886 * xmlParseGetLasts:
8887 * @ctxt: an XML parser context
8888 * @lastlt: pointer to store the last '<' from the input
8889 * @lastgt: pointer to store the last '>' from the input
8890 *
8891 * Lookup the last < and > in the current chunk
8892 */
8893static void
8894xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8895 const xmlChar **lastgt) {
8896 const xmlChar *tmp;
8897
8898 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8899 xmlGenericError(xmlGenericErrorContext,
8900 "Internal error: xmlParseGetLasts\n");
8901 return;
8902 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008903 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008904 tmp = ctxt->input->end;
8905 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008906 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008907 if (tmp < ctxt->input->base) {
8908 *lastlt = NULL;
8909 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008910 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008911 *lastlt = tmp;
8912 tmp++;
8913 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8914 if (*tmp == '\'') {
8915 tmp++;
8916 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8917 if (tmp < ctxt->input->end) tmp++;
8918 } else if (*tmp == '"') {
8919 tmp++;
8920 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8921 if (tmp < ctxt->input->end) tmp++;
8922 } else
8923 tmp++;
8924 }
8925 if (tmp < ctxt->input->end)
8926 *lastgt = tmp;
8927 else {
8928 tmp = *lastlt;
8929 tmp--;
8930 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8931 if (tmp >= ctxt->input->base)
8932 *lastgt = tmp;
8933 else
8934 *lastgt = NULL;
8935 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008936 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008937 } else {
8938 *lastlt = NULL;
8939 *lastgt = NULL;
8940 }
8941}
8942/**
Owen Taylor3473f882001-02-23 17:55:21 +00008943 * xmlParseTryOrFinish:
8944 * @ctxt: an XML parser context
8945 * @terminate: last chunk indicator
8946 *
8947 * Try to progress on parsing
8948 *
8949 * Returns zero if no parsing was possible
8950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008951static int
Owen Taylor3473f882001-02-23 17:55:21 +00008952xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8953 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008954 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008955 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008956 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008957
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008958 if (ctxt->input == NULL)
8959 return(0);
8960
Owen Taylor3473f882001-02-23 17:55:21 +00008961#ifdef DEBUG_PUSH
8962 switch (ctxt->instate) {
8963 case XML_PARSER_EOF:
8964 xmlGenericError(xmlGenericErrorContext,
8965 "PP: try EOF\n"); break;
8966 case XML_PARSER_START:
8967 xmlGenericError(xmlGenericErrorContext,
8968 "PP: try START\n"); break;
8969 case XML_PARSER_MISC:
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: try MISC\n");break;
8972 case XML_PARSER_COMMENT:
8973 xmlGenericError(xmlGenericErrorContext,
8974 "PP: try COMMENT\n");break;
8975 case XML_PARSER_PROLOG:
8976 xmlGenericError(xmlGenericErrorContext,
8977 "PP: try PROLOG\n");break;
8978 case XML_PARSER_START_TAG:
8979 xmlGenericError(xmlGenericErrorContext,
8980 "PP: try START_TAG\n");break;
8981 case XML_PARSER_CONTENT:
8982 xmlGenericError(xmlGenericErrorContext,
8983 "PP: try CONTENT\n");break;
8984 case XML_PARSER_CDATA_SECTION:
8985 xmlGenericError(xmlGenericErrorContext,
8986 "PP: try CDATA_SECTION\n");break;
8987 case XML_PARSER_END_TAG:
8988 xmlGenericError(xmlGenericErrorContext,
8989 "PP: try END_TAG\n");break;
8990 case XML_PARSER_ENTITY_DECL:
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: try ENTITY_DECL\n");break;
8993 case XML_PARSER_ENTITY_VALUE:
8994 xmlGenericError(xmlGenericErrorContext,
8995 "PP: try ENTITY_VALUE\n");break;
8996 case XML_PARSER_ATTRIBUTE_VALUE:
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: try ATTRIBUTE_VALUE\n");break;
8999 case XML_PARSER_DTD:
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: try DTD\n");break;
9002 case XML_PARSER_EPILOG:
9003 xmlGenericError(xmlGenericErrorContext,
9004 "PP: try EPILOG\n");break;
9005 case XML_PARSER_PI:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: try PI\n");break;
9008 case XML_PARSER_IGNORE:
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: try IGNORE\n");break;
9011 }
9012#endif
9013
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009014 if ((ctxt->input != NULL) &&
9015 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009016 xmlSHRINK(ctxt);
9017 ctxt->checkIndex = 0;
9018 }
9019 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009020
Daniel Veillarda880b122003-04-21 21:36:41 +00009021 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009022 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9023 return(0);
9024
9025
Owen Taylor3473f882001-02-23 17:55:21 +00009026 /*
9027 * Pop-up of finished entities.
9028 */
9029 while ((RAW == 0) && (ctxt->inputNr > 1))
9030 xmlPopInput(ctxt);
9031
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009032 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009033 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009034 avail = ctxt->input->length -
9035 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009036 else {
9037 /*
9038 * If we are operating on converted input, try to flush
9039 * remainng chars to avoid them stalling in the non-converted
9040 * buffer.
9041 */
9042 if ((ctxt->input->buf->raw != NULL) &&
9043 (ctxt->input->buf->raw->use > 0)) {
9044 int base = ctxt->input->base -
9045 ctxt->input->buf->buffer->content;
9046 int current = ctxt->input->cur - ctxt->input->base;
9047
9048 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9049 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9050 ctxt->input->cur = ctxt->input->base + current;
9051 ctxt->input->end =
9052 &ctxt->input->buf->buffer->content[
9053 ctxt->input->buf->buffer->use];
9054 }
9055 avail = ctxt->input->buf->buffer->use -
9056 (ctxt->input->cur - ctxt->input->base);
9057 }
Owen Taylor3473f882001-02-23 17:55:21 +00009058 if (avail < 1)
9059 goto done;
9060 switch (ctxt->instate) {
9061 case XML_PARSER_EOF:
9062 /*
9063 * Document parsing is done !
9064 */
9065 goto done;
9066 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009067 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9068 xmlChar start[4];
9069 xmlCharEncoding enc;
9070
9071 /*
9072 * Very first chars read from the document flow.
9073 */
9074 if (avail < 4)
9075 goto done;
9076
9077 /*
9078 * Get the 4 first bytes and decode the charset
9079 * if enc != XML_CHAR_ENCODING_NONE
9080 * plug some encoding conversion routines.
9081 */
9082 start[0] = RAW;
9083 start[1] = NXT(1);
9084 start[2] = NXT(2);
9085 start[3] = NXT(3);
9086 enc = xmlDetectCharEncoding(start, 4);
9087 if (enc != XML_CHAR_ENCODING_NONE) {
9088 xmlSwitchEncoding(ctxt, enc);
9089 }
9090 break;
9091 }
Owen Taylor3473f882001-02-23 17:55:21 +00009092
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009093 if (avail < 2)
9094 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009095 cur = ctxt->input->cur[0];
9096 next = ctxt->input->cur[1];
9097 if (cur == 0) {
9098 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9099 ctxt->sax->setDocumentLocator(ctxt->userData,
9100 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009101 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009102 ctxt->instate = XML_PARSER_EOF;
9103#ifdef DEBUG_PUSH
9104 xmlGenericError(xmlGenericErrorContext,
9105 "PP: entering EOF\n");
9106#endif
9107 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9108 ctxt->sax->endDocument(ctxt->userData);
9109 goto done;
9110 }
9111 if ((cur == '<') && (next == '?')) {
9112 /* PI or XML decl */
9113 if (avail < 5) return(ret);
9114 if ((!terminate) &&
9115 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9116 return(ret);
9117 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9118 ctxt->sax->setDocumentLocator(ctxt->userData,
9119 &xmlDefaultSAXLocator);
9120 if ((ctxt->input->cur[2] == 'x') &&
9121 (ctxt->input->cur[3] == 'm') &&
9122 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009123 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009124 ret += 5;
9125#ifdef DEBUG_PUSH
9126 xmlGenericError(xmlGenericErrorContext,
9127 "PP: Parsing XML Decl\n");
9128#endif
9129 xmlParseXMLDecl(ctxt);
9130 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9131 /*
9132 * The XML REC instructs us to stop parsing right
9133 * here
9134 */
9135 ctxt->instate = XML_PARSER_EOF;
9136 return(0);
9137 }
9138 ctxt->standalone = ctxt->input->standalone;
9139 if ((ctxt->encoding == NULL) &&
9140 (ctxt->input->encoding != NULL))
9141 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9142 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9143 (!ctxt->disableSAX))
9144 ctxt->sax->startDocument(ctxt->userData);
9145 ctxt->instate = XML_PARSER_MISC;
9146#ifdef DEBUG_PUSH
9147 xmlGenericError(xmlGenericErrorContext,
9148 "PP: entering MISC\n");
9149#endif
9150 } else {
9151 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9152 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9153 (!ctxt->disableSAX))
9154 ctxt->sax->startDocument(ctxt->userData);
9155 ctxt->instate = XML_PARSER_MISC;
9156#ifdef DEBUG_PUSH
9157 xmlGenericError(xmlGenericErrorContext,
9158 "PP: entering MISC\n");
9159#endif
9160 }
9161 } else {
9162 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9163 ctxt->sax->setDocumentLocator(ctxt->userData,
9164 &xmlDefaultSAXLocator);
9165 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009166 if (ctxt->version == NULL) {
9167 xmlErrMemory(ctxt, NULL);
9168 break;
9169 }
Owen Taylor3473f882001-02-23 17:55:21 +00009170 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9171 (!ctxt->disableSAX))
9172 ctxt->sax->startDocument(ctxt->userData);
9173 ctxt->instate = XML_PARSER_MISC;
9174#ifdef DEBUG_PUSH
9175 xmlGenericError(xmlGenericErrorContext,
9176 "PP: entering MISC\n");
9177#endif
9178 }
9179 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009180 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009181 const xmlChar *name;
9182 const xmlChar *prefix;
9183 const xmlChar *URI;
9184 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009185
9186 if ((avail < 2) && (ctxt->inputNr == 1))
9187 goto done;
9188 cur = ctxt->input->cur[0];
9189 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009190 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009191 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009192 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9193 ctxt->sax->endDocument(ctxt->userData);
9194 goto done;
9195 }
9196 if (!terminate) {
9197 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009198 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009199 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009200 goto done;
9201 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9202 goto done;
9203 }
9204 }
9205 if (ctxt->spaceNr == 0)
9206 spacePush(ctxt, -1);
9207 else
9208 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009209#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009210 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009211#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009212 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009213#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009214 else
9215 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009216#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009217 if (name == NULL) {
9218 spacePop(ctxt);
9219 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009220 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9221 ctxt->sax->endDocument(ctxt->userData);
9222 goto done;
9223 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009224#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009225 /*
9226 * [ VC: Root Element Type ]
9227 * The Name in the document type declaration must match
9228 * the element type of the root element.
9229 */
9230 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9231 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9232 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009233#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009234
9235 /*
9236 * Check for an Empty Element.
9237 */
9238 if ((RAW == '/') && (NXT(1) == '>')) {
9239 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009240
9241 if (ctxt->sax2) {
9242 if ((ctxt->sax != NULL) &&
9243 (ctxt->sax->endElementNs != NULL) &&
9244 (!ctxt->disableSAX))
9245 ctxt->sax->endElementNs(ctxt->userData, name,
9246 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009247#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009248 } else {
9249 if ((ctxt->sax != NULL) &&
9250 (ctxt->sax->endElement != NULL) &&
9251 (!ctxt->disableSAX))
9252 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009253#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009254 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 spacePop(ctxt);
9256 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009257 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009258 } else {
9259 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009260 }
9261 break;
9262 }
9263 if (RAW == '>') {
9264 NEXT;
9265 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009266 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009267 "Couldn't find end of Start Tag %s\n",
9268 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009269 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009270 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009271 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009272 if (ctxt->sax2)
9273 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009274#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009275 else
9276 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009277#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009278
Daniel Veillarda880b122003-04-21 21:36:41 +00009279 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009280 break;
9281 }
9282 case XML_PARSER_CONTENT: {
9283 const xmlChar *test;
9284 unsigned int cons;
9285 if ((avail < 2) && (ctxt->inputNr == 1))
9286 goto done;
9287 cur = ctxt->input->cur[0];
9288 next = ctxt->input->cur[1];
9289
9290 test = CUR_PTR;
9291 cons = ctxt->input->consumed;
9292 if ((cur == '<') && (next == '/')) {
9293 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009294 break;
9295 } else if ((cur == '<') && (next == '?')) {
9296 if ((!terminate) &&
9297 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9298 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009299 xmlParsePI(ctxt);
9300 } else if ((cur == '<') && (next != '!')) {
9301 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009302 break;
9303 } else if ((cur == '<') && (next == '!') &&
9304 (ctxt->input->cur[2] == '-') &&
9305 (ctxt->input->cur[3] == '-')) {
9306 if ((!terminate) &&
9307 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9308 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009309 xmlParseComment(ctxt);
9310 ctxt->instate = XML_PARSER_CONTENT;
9311 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9312 (ctxt->input->cur[2] == '[') &&
9313 (ctxt->input->cur[3] == 'C') &&
9314 (ctxt->input->cur[4] == 'D') &&
9315 (ctxt->input->cur[5] == 'A') &&
9316 (ctxt->input->cur[6] == 'T') &&
9317 (ctxt->input->cur[7] == 'A') &&
9318 (ctxt->input->cur[8] == '[')) {
9319 SKIP(9);
9320 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009321 break;
9322 } else if ((cur == '<') && (next == '!') &&
9323 (avail < 9)) {
9324 goto done;
9325 } else if (cur == '&') {
9326 if ((!terminate) &&
9327 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9328 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009329 xmlParseReference(ctxt);
9330 } else {
9331 /* TODO Avoid the extra copy, handle directly !!! */
9332 /*
9333 * Goal of the following test is:
9334 * - minimize calls to the SAX 'character' callback
9335 * when they are mergeable
9336 * - handle an problem for isBlank when we only parse
9337 * a sequence of blank chars and the next one is
9338 * not available to check against '<' presence.
9339 * - tries to homogenize the differences in SAX
9340 * callbacks between the push and pull versions
9341 * of the parser.
9342 */
9343 if ((ctxt->inputNr == 1) &&
9344 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9345 if (!terminate) {
9346 if (ctxt->progressive) {
9347 if ((lastlt == NULL) ||
9348 (ctxt->input->cur > lastlt))
9349 goto done;
9350 } else if (xmlParseLookupSequence(ctxt,
9351 '<', 0, 0) < 0) {
9352 goto done;
9353 }
9354 }
9355 }
9356 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009357 xmlParseCharData(ctxt, 0);
9358 }
9359 /*
9360 * Pop-up of finished entities.
9361 */
9362 while ((RAW == 0) && (ctxt->inputNr > 1))
9363 xmlPopInput(ctxt);
9364 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009365 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9366 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009367 ctxt->instate = XML_PARSER_EOF;
9368 break;
9369 }
9370 break;
9371 }
9372 case XML_PARSER_END_TAG:
9373 if (avail < 2)
9374 goto done;
9375 if (!terminate) {
9376 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009377 /* > can be found unescaped in attribute values */
9378 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009379 goto done;
9380 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9381 goto done;
9382 }
9383 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009384 if (ctxt->sax2) {
9385 xmlParseEndTag2(ctxt,
9386 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9387 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009388 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009389 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009390 }
9391#ifdef LIBXML_SAX1_ENABLED
9392 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009394#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009395 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009396 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009397 } else {
9398 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009399 }
9400 break;
9401 case XML_PARSER_CDATA_SECTION: {
9402 /*
9403 * The Push mode need to have the SAX callback for
9404 * cdataBlock merge back contiguous callbacks.
9405 */
9406 int base;
9407
9408 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9409 if (base < 0) {
9410 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9411 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9412 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009413 ctxt->sax->cdataBlock(ctxt->userData,
9414 ctxt->input->cur,
9415 XML_PARSER_BIG_BUFFER_SIZE);
9416 else if (ctxt->sax->characters != NULL)
9417 ctxt->sax->characters(ctxt->userData,
9418 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009419 XML_PARSER_BIG_BUFFER_SIZE);
9420 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009421 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009422 ctxt->checkIndex = 0;
9423 }
9424 goto done;
9425 } else {
9426 if ((ctxt->sax != NULL) && (base > 0) &&
9427 (!ctxt->disableSAX)) {
9428 if (ctxt->sax->cdataBlock != NULL)
9429 ctxt->sax->cdataBlock(ctxt->userData,
9430 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009431 else if (ctxt->sax->characters != NULL)
9432 ctxt->sax->characters(ctxt->userData,
9433 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009434 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009435 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009436 ctxt->checkIndex = 0;
9437 ctxt->instate = XML_PARSER_CONTENT;
9438#ifdef DEBUG_PUSH
9439 xmlGenericError(xmlGenericErrorContext,
9440 "PP: entering CONTENT\n");
9441#endif
9442 }
9443 break;
9444 }
Owen Taylor3473f882001-02-23 17:55:21 +00009445 case XML_PARSER_MISC:
9446 SKIP_BLANKS;
9447 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 avail = ctxt->input->length -
9449 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009450 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009451 avail = ctxt->input->buf->buffer->use -
9452 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009453 if (avail < 2)
9454 goto done;
9455 cur = ctxt->input->cur[0];
9456 next = ctxt->input->cur[1];
9457 if ((cur == '<') && (next == '?')) {
9458 if ((!terminate) &&
9459 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9460 goto done;
9461#ifdef DEBUG_PUSH
9462 xmlGenericError(xmlGenericErrorContext,
9463 "PP: Parsing PI\n");
9464#endif
9465 xmlParsePI(ctxt);
9466 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009467 (ctxt->input->cur[2] == '-') &&
9468 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009469 if ((!terminate) &&
9470 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9471 goto done;
9472#ifdef DEBUG_PUSH
9473 xmlGenericError(xmlGenericErrorContext,
9474 "PP: Parsing Comment\n");
9475#endif
9476 xmlParseComment(ctxt);
9477 ctxt->instate = XML_PARSER_MISC;
9478 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009479 (ctxt->input->cur[2] == 'D') &&
9480 (ctxt->input->cur[3] == 'O') &&
9481 (ctxt->input->cur[4] == 'C') &&
9482 (ctxt->input->cur[5] == 'T') &&
9483 (ctxt->input->cur[6] == 'Y') &&
9484 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009485 (ctxt->input->cur[8] == 'E')) {
9486 if ((!terminate) &&
9487 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9488 goto done;
9489#ifdef DEBUG_PUSH
9490 xmlGenericError(xmlGenericErrorContext,
9491 "PP: Parsing internal subset\n");
9492#endif
9493 ctxt->inSubset = 1;
9494 xmlParseDocTypeDecl(ctxt);
9495 if (RAW == '[') {
9496 ctxt->instate = XML_PARSER_DTD;
9497#ifdef DEBUG_PUSH
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: entering DTD\n");
9500#endif
9501 } else {
9502 /*
9503 * Create and update the external subset.
9504 */
9505 ctxt->inSubset = 2;
9506 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9507 (ctxt->sax->externalSubset != NULL))
9508 ctxt->sax->externalSubset(ctxt->userData,
9509 ctxt->intSubName, ctxt->extSubSystem,
9510 ctxt->extSubURI);
9511 ctxt->inSubset = 0;
9512 ctxt->instate = XML_PARSER_PROLOG;
9513#ifdef DEBUG_PUSH
9514 xmlGenericError(xmlGenericErrorContext,
9515 "PP: entering PROLOG\n");
9516#endif
9517 }
9518 } else if ((cur == '<') && (next == '!') &&
9519 (avail < 9)) {
9520 goto done;
9521 } else {
9522 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009523 ctxt->progressive = 1;
9524 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009525#ifdef DEBUG_PUSH
9526 xmlGenericError(xmlGenericErrorContext,
9527 "PP: entering START_TAG\n");
9528#endif
9529 }
9530 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009531 case XML_PARSER_PROLOG:
9532 SKIP_BLANKS;
9533 if (ctxt->input->buf == NULL)
9534 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9535 else
9536 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9537 if (avail < 2)
9538 goto done;
9539 cur = ctxt->input->cur[0];
9540 next = ctxt->input->cur[1];
9541 if ((cur == '<') && (next == '?')) {
9542 if ((!terminate) &&
9543 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9544 goto done;
9545#ifdef DEBUG_PUSH
9546 xmlGenericError(xmlGenericErrorContext,
9547 "PP: Parsing PI\n");
9548#endif
9549 xmlParsePI(ctxt);
9550 } else if ((cur == '<') && (next == '!') &&
9551 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9552 if ((!terminate) &&
9553 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9554 goto done;
9555#ifdef DEBUG_PUSH
9556 xmlGenericError(xmlGenericErrorContext,
9557 "PP: Parsing Comment\n");
9558#endif
9559 xmlParseComment(ctxt);
9560 ctxt->instate = XML_PARSER_PROLOG;
9561 } else if ((cur == '<') && (next == '!') &&
9562 (avail < 4)) {
9563 goto done;
9564 } else {
9565 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009566 if (ctxt->progressive == 0)
9567 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009568 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009569#ifdef DEBUG_PUSH
9570 xmlGenericError(xmlGenericErrorContext,
9571 "PP: entering START_TAG\n");
9572#endif
9573 }
9574 break;
9575 case XML_PARSER_EPILOG:
9576 SKIP_BLANKS;
9577 if (ctxt->input->buf == NULL)
9578 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9579 else
9580 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9581 if (avail < 2)
9582 goto done;
9583 cur = ctxt->input->cur[0];
9584 next = ctxt->input->cur[1];
9585 if ((cur == '<') && (next == '?')) {
9586 if ((!terminate) &&
9587 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9588 goto done;
9589#ifdef DEBUG_PUSH
9590 xmlGenericError(xmlGenericErrorContext,
9591 "PP: Parsing PI\n");
9592#endif
9593 xmlParsePI(ctxt);
9594 ctxt->instate = XML_PARSER_EPILOG;
9595 } else if ((cur == '<') && (next == '!') &&
9596 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9597 if ((!terminate) &&
9598 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9599 goto done;
9600#ifdef DEBUG_PUSH
9601 xmlGenericError(xmlGenericErrorContext,
9602 "PP: Parsing Comment\n");
9603#endif
9604 xmlParseComment(ctxt);
9605 ctxt->instate = XML_PARSER_EPILOG;
9606 } else if ((cur == '<') && (next == '!') &&
9607 (avail < 4)) {
9608 goto done;
9609 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009610 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009611 ctxt->instate = XML_PARSER_EOF;
9612#ifdef DEBUG_PUSH
9613 xmlGenericError(xmlGenericErrorContext,
9614 "PP: entering EOF\n");
9615#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009616 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009617 ctxt->sax->endDocument(ctxt->userData);
9618 goto done;
9619 }
9620 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009621 case XML_PARSER_DTD: {
9622 /*
9623 * Sorry but progressive parsing of the internal subset
9624 * is not expected to be supported. We first check that
9625 * the full content of the internal subset is available and
9626 * the parsing is launched only at that point.
9627 * Internal subset ends up with "']' S? '>'" in an unescaped
9628 * section and not in a ']]>' sequence which are conditional
9629 * sections (whoever argued to keep that crap in XML deserve
9630 * a place in hell !).
9631 */
9632 int base, i;
9633 xmlChar *buf;
9634 xmlChar quote = 0;
9635
9636 base = ctxt->input->cur - ctxt->input->base;
9637 if (base < 0) return(0);
9638 if (ctxt->checkIndex > base)
9639 base = ctxt->checkIndex;
9640 buf = ctxt->input->buf->buffer->content;
9641 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9642 base++) {
9643 if (quote != 0) {
9644 if (buf[base] == quote)
9645 quote = 0;
9646 continue;
9647 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009648 if ((quote == 0) && (buf[base] == '<')) {
9649 int found = 0;
9650 /* special handling of comments */
9651 if (((unsigned int) base + 4 <
9652 ctxt->input->buf->buffer->use) &&
9653 (buf[base + 1] == '!') &&
9654 (buf[base + 2] == '-') &&
9655 (buf[base + 3] == '-')) {
9656 for (;(unsigned int) base + 3 <
9657 ctxt->input->buf->buffer->use; base++) {
9658 if ((buf[base] == '-') &&
9659 (buf[base + 1] == '-') &&
9660 (buf[base + 2] == '>')) {
9661 found = 1;
9662 base += 2;
9663 break;
9664 }
9665 }
9666 if (!found)
9667 break;
9668 continue;
9669 }
9670 }
Owen Taylor3473f882001-02-23 17:55:21 +00009671 if (buf[base] == '"') {
9672 quote = '"';
9673 continue;
9674 }
9675 if (buf[base] == '\'') {
9676 quote = '\'';
9677 continue;
9678 }
9679 if (buf[base] == ']') {
9680 if ((unsigned int) base +1 >=
9681 ctxt->input->buf->buffer->use)
9682 break;
9683 if (buf[base + 1] == ']') {
9684 /* conditional crap, skip both ']' ! */
9685 base++;
9686 continue;
9687 }
9688 for (i = 0;
9689 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9690 i++) {
9691 if (buf[base + i] == '>')
9692 goto found_end_int_subset;
9693 }
9694 break;
9695 }
9696 }
9697 /*
9698 * We didn't found the end of the Internal subset
9699 */
9700 if (quote == 0)
9701 ctxt->checkIndex = base;
9702#ifdef DEBUG_PUSH
9703 if (next == 0)
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: lookup of int subset end filed\n");
9706#endif
9707 goto done;
9708
9709found_end_int_subset:
9710 xmlParseInternalSubset(ctxt);
9711 ctxt->inSubset = 2;
9712 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9713 (ctxt->sax->externalSubset != NULL))
9714 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9715 ctxt->extSubSystem, ctxt->extSubURI);
9716 ctxt->inSubset = 0;
9717 ctxt->instate = XML_PARSER_PROLOG;
9718 ctxt->checkIndex = 0;
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: entering PROLOG\n");
9722#endif
9723 break;
9724 }
9725 case XML_PARSER_COMMENT:
9726 xmlGenericError(xmlGenericErrorContext,
9727 "PP: internal error, state == COMMENT\n");
9728 ctxt->instate = XML_PARSER_CONTENT;
9729#ifdef DEBUG_PUSH
9730 xmlGenericError(xmlGenericErrorContext,
9731 "PP: entering CONTENT\n");
9732#endif
9733 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009734 case XML_PARSER_IGNORE:
9735 xmlGenericError(xmlGenericErrorContext,
9736 "PP: internal error, state == IGNORE");
9737 ctxt->instate = XML_PARSER_DTD;
9738#ifdef DEBUG_PUSH
9739 xmlGenericError(xmlGenericErrorContext,
9740 "PP: entering DTD\n");
9741#endif
9742 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009743 case XML_PARSER_PI:
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: internal error, state == PI\n");
9746 ctxt->instate = XML_PARSER_CONTENT;
9747#ifdef DEBUG_PUSH
9748 xmlGenericError(xmlGenericErrorContext,
9749 "PP: entering CONTENT\n");
9750#endif
9751 break;
9752 case XML_PARSER_ENTITY_DECL:
9753 xmlGenericError(xmlGenericErrorContext,
9754 "PP: internal error, state == ENTITY_DECL\n");
9755 ctxt->instate = XML_PARSER_DTD;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: entering DTD\n");
9759#endif
9760 break;
9761 case XML_PARSER_ENTITY_VALUE:
9762 xmlGenericError(xmlGenericErrorContext,
9763 "PP: internal error, state == ENTITY_VALUE\n");
9764 ctxt->instate = XML_PARSER_CONTENT;
9765#ifdef DEBUG_PUSH
9766 xmlGenericError(xmlGenericErrorContext,
9767 "PP: entering DTD\n");
9768#endif
9769 break;
9770 case XML_PARSER_ATTRIBUTE_VALUE:
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9773 ctxt->instate = XML_PARSER_START_TAG;
9774#ifdef DEBUG_PUSH
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: entering START_TAG\n");
9777#endif
9778 break;
9779 case XML_PARSER_SYSTEM_LITERAL:
9780 xmlGenericError(xmlGenericErrorContext,
9781 "PP: internal error, state == SYSTEM_LITERAL\n");
9782 ctxt->instate = XML_PARSER_START_TAG;
9783#ifdef DEBUG_PUSH
9784 xmlGenericError(xmlGenericErrorContext,
9785 "PP: entering START_TAG\n");
9786#endif
9787 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009788 case XML_PARSER_PUBLIC_LITERAL:
9789 xmlGenericError(xmlGenericErrorContext,
9790 "PP: internal error, state == PUBLIC_LITERAL\n");
9791 ctxt->instate = XML_PARSER_START_TAG;
9792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: entering START_TAG\n");
9795#endif
9796 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009797 }
9798 }
9799done:
9800#ifdef DEBUG_PUSH
9801 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9802#endif
9803 return(ret);
9804}
9805
9806/**
Owen Taylor3473f882001-02-23 17:55:21 +00009807 * xmlParseChunk:
9808 * @ctxt: an XML parser context
9809 * @chunk: an char array
9810 * @size: the size in byte of the chunk
9811 * @terminate: last chunk indicator
9812 *
9813 * Parse a Chunk of memory
9814 *
9815 * Returns zero if no error, the xmlParserErrors otherwise.
9816 */
9817int
9818xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9819 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009820 if (ctxt == NULL)
9821 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009822 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9823 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009824 if (ctxt->instate == XML_PARSER_START)
9825 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009826 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9827 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9828 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9829 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009830 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009831
William M. Bracka3215c72004-07-31 16:24:01 +00009832 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9833 if (res < 0) {
9834 ctxt->errNo = XML_PARSER_EOF;
9835 ctxt->disableSAX = 1;
9836 return (XML_PARSER_EOF);
9837 }
Owen Taylor3473f882001-02-23 17:55:21 +00009838 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9839 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009840 ctxt->input->end =
9841 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009842#ifdef DEBUG_PUSH
9843 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9844#endif
9845
Owen Taylor3473f882001-02-23 17:55:21 +00009846 } else if (ctxt->instate != XML_PARSER_EOF) {
9847 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9848 xmlParserInputBufferPtr in = ctxt->input->buf;
9849 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9850 (in->raw != NULL)) {
9851 int nbchars;
9852
9853 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9854 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009855 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009856 xmlGenericError(xmlGenericErrorContext,
9857 "xmlParseChunk: encoder error\n");
9858 return(XML_ERR_INVALID_ENCODING);
9859 }
9860 }
9861 }
9862 }
9863 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009864 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9865 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009866 if (terminate) {
9867 /*
9868 * Check for termination
9869 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009870 int avail = 0;
9871
9872 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009873 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009874 avail = ctxt->input->length -
9875 (ctxt->input->cur - ctxt->input->base);
9876 else
9877 avail = ctxt->input->buf->buffer->use -
9878 (ctxt->input->cur - ctxt->input->base);
9879 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009880
Owen Taylor3473f882001-02-23 17:55:21 +00009881 if ((ctxt->instate != XML_PARSER_EOF) &&
9882 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009883 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009884 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009885 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009886 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009887 }
Owen Taylor3473f882001-02-23 17:55:21 +00009888 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009889 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009890 ctxt->sax->endDocument(ctxt->userData);
9891 }
9892 ctxt->instate = XML_PARSER_EOF;
9893 }
9894 return((xmlParserErrors) ctxt->errNo);
9895}
9896
9897/************************************************************************
9898 * *
9899 * I/O front end functions to the parser *
9900 * *
9901 ************************************************************************/
9902
9903/**
9904 * xmlStopParser:
9905 * @ctxt: an XML parser context
9906 *
9907 * Blocks further parser processing
9908 */
9909void
9910xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009911 if (ctxt == NULL)
9912 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009913 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009914 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009915 if (ctxt->input != NULL)
9916 ctxt->input->cur = BAD_CAST"";
9917}
9918
9919/**
9920 * xmlCreatePushParserCtxt:
9921 * @sax: a SAX handler
9922 * @user_data: The user data returned on SAX callbacks
9923 * @chunk: a pointer to an array of chars
9924 * @size: number of chars in the array
9925 * @filename: an optional file name or URI
9926 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009927 * Create a parser context for using the XML parser in push mode.
9928 * If @buffer and @size are non-NULL, the data is used to detect
9929 * the encoding. The remaining characters will be parsed so they
9930 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009931 * To allow content encoding detection, @size should be >= 4
9932 * The value of @filename is used for fetching external entities
9933 * and error/warning reports.
9934 *
9935 * Returns the new parser context or NULL
9936 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009937
Owen Taylor3473f882001-02-23 17:55:21 +00009938xmlParserCtxtPtr
9939xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9940 const char *chunk, int size, const char *filename) {
9941 xmlParserCtxtPtr ctxt;
9942 xmlParserInputPtr inputStream;
9943 xmlParserInputBufferPtr buf;
9944 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9945
9946 /*
9947 * plug some encoding conversion routines
9948 */
9949 if ((chunk != NULL) && (size >= 4))
9950 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9951
9952 buf = xmlAllocParserInputBuffer(enc);
9953 if (buf == NULL) return(NULL);
9954
9955 ctxt = xmlNewParserCtxt();
9956 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009957 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009958 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009959 return(NULL);
9960 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009961 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009962 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9963 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009964 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009965 xmlFreeParserInputBuffer(buf);
9966 xmlFreeParserCtxt(ctxt);
9967 return(NULL);
9968 }
Owen Taylor3473f882001-02-23 17:55:21 +00009969 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009970#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009971 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009972#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009973 xmlFree(ctxt->sax);
9974 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9975 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009976 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009977 xmlFreeParserInputBuffer(buf);
9978 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009979 return(NULL);
9980 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +00009981 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
9982 if (sax->initialized == XML_SAX2_MAGIC)
9983 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9984 else
9985 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +00009986 if (user_data != NULL)
9987 ctxt->userData = user_data;
9988 }
9989 if (filename == NULL) {
9990 ctxt->directory = NULL;
9991 } else {
9992 ctxt->directory = xmlParserGetDirectory(filename);
9993 }
9994
9995 inputStream = xmlNewInputStream(ctxt);
9996 if (inputStream == NULL) {
9997 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009998 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009999 return(NULL);
10000 }
10001
10002 if (filename == NULL)
10003 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010004 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010005 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010006 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010007 if (inputStream->filename == NULL) {
10008 xmlFreeParserCtxt(ctxt);
10009 xmlFreeParserInputBuffer(buf);
10010 return(NULL);
10011 }
10012 }
Owen Taylor3473f882001-02-23 17:55:21 +000010013 inputStream->buf = buf;
10014 inputStream->base = inputStream->buf->buffer->content;
10015 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010016 inputStream->end =
10017 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010018
10019 inputPush(ctxt, inputStream);
10020
10021 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10022 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010023 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10024 int cur = ctxt->input->cur - ctxt->input->base;
10025
Owen Taylor3473f882001-02-23 17:55:21 +000010026 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010027
10028 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10029 ctxt->input->cur = ctxt->input->base + cur;
10030 ctxt->input->end =
10031 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010032#ifdef DEBUG_PUSH
10033 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10034#endif
10035 }
10036
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010037 if (enc != XML_CHAR_ENCODING_NONE) {
10038 xmlSwitchEncoding(ctxt, enc);
10039 }
10040
Owen Taylor3473f882001-02-23 17:55:21 +000010041 return(ctxt);
10042}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010043#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010044
10045/**
10046 * xmlCreateIOParserCtxt:
10047 * @sax: a SAX handler
10048 * @user_data: The user data returned on SAX callbacks
10049 * @ioread: an I/O read function
10050 * @ioclose: an I/O close function
10051 * @ioctx: an I/O handler
10052 * @enc: the charset encoding if known
10053 *
10054 * Create a parser context for using the XML parser with an existing
10055 * I/O stream
10056 *
10057 * Returns the new parser context or NULL
10058 */
10059xmlParserCtxtPtr
10060xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10061 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10062 void *ioctx, xmlCharEncoding enc) {
10063 xmlParserCtxtPtr ctxt;
10064 xmlParserInputPtr inputStream;
10065 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010066
10067 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010068
10069 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10070 if (buf == NULL) return(NULL);
10071
10072 ctxt = xmlNewParserCtxt();
10073 if (ctxt == NULL) {
10074 xmlFree(buf);
10075 return(NULL);
10076 }
10077 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010078#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010079 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010080#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010081 xmlFree(ctxt->sax);
10082 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10083 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010085 xmlFree(ctxt);
10086 return(NULL);
10087 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010088 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10089 if (sax->initialized == XML_SAX2_MAGIC)
10090 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10091 else
10092 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010093 if (user_data != NULL)
10094 ctxt->userData = user_data;
10095 }
10096
10097 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10098 if (inputStream == NULL) {
10099 xmlFreeParserCtxt(ctxt);
10100 return(NULL);
10101 }
10102 inputPush(ctxt, inputStream);
10103
10104 return(ctxt);
10105}
10106
Daniel Veillard4432df22003-09-28 18:58:27 +000010107#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010108/************************************************************************
10109 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010110 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010111 * *
10112 ************************************************************************/
10113
10114/**
10115 * xmlIOParseDTD:
10116 * @sax: the SAX handler block or NULL
10117 * @input: an Input Buffer
10118 * @enc: the charset encoding if known
10119 *
10120 * Load and parse a DTD
10121 *
10122 * Returns the resulting xmlDtdPtr or NULL in case of error.
10123 * @input will be freed at parsing end.
10124 */
10125
10126xmlDtdPtr
10127xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10128 xmlCharEncoding enc) {
10129 xmlDtdPtr ret = NULL;
10130 xmlParserCtxtPtr ctxt;
10131 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010132 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010133
10134 if (input == NULL)
10135 return(NULL);
10136
10137 ctxt = xmlNewParserCtxt();
10138 if (ctxt == NULL) {
10139 return(NULL);
10140 }
10141
10142 /*
10143 * Set-up the SAX context
10144 */
10145 if (sax != NULL) {
10146 if (ctxt->sax != NULL)
10147 xmlFree(ctxt->sax);
10148 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010149 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010150 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010151 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010152
10153 /*
10154 * generate a parser input from the I/O handler
10155 */
10156
Daniel Veillard43caefb2003-12-07 19:32:22 +000010157 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010158 if (pinput == NULL) {
10159 if (sax != NULL) ctxt->sax = NULL;
10160 xmlFreeParserCtxt(ctxt);
10161 return(NULL);
10162 }
10163
10164 /*
10165 * plug some encoding conversion routines here.
10166 */
10167 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010168 if (enc != XML_CHAR_ENCODING_NONE) {
10169 xmlSwitchEncoding(ctxt, enc);
10170 }
Owen Taylor3473f882001-02-23 17:55:21 +000010171
10172 pinput->filename = NULL;
10173 pinput->line = 1;
10174 pinput->col = 1;
10175 pinput->base = ctxt->input->cur;
10176 pinput->cur = ctxt->input->cur;
10177 pinput->free = NULL;
10178
10179 /*
10180 * let's parse that entity knowing it's an external subset.
10181 */
10182 ctxt->inSubset = 2;
10183 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10184 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10185 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010186
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010187 if ((enc == XML_CHAR_ENCODING_NONE) &&
10188 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010189 /*
10190 * Get the 4 first bytes and decode the charset
10191 * if enc != XML_CHAR_ENCODING_NONE
10192 * plug some encoding conversion routines.
10193 */
10194 start[0] = RAW;
10195 start[1] = NXT(1);
10196 start[2] = NXT(2);
10197 start[3] = NXT(3);
10198 enc = xmlDetectCharEncoding(start, 4);
10199 if (enc != XML_CHAR_ENCODING_NONE) {
10200 xmlSwitchEncoding(ctxt, enc);
10201 }
10202 }
10203
Owen Taylor3473f882001-02-23 17:55:21 +000010204 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10205
10206 if (ctxt->myDoc != NULL) {
10207 if (ctxt->wellFormed) {
10208 ret = ctxt->myDoc->extSubset;
10209 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010210 if (ret != NULL) {
10211 xmlNodePtr tmp;
10212
10213 ret->doc = NULL;
10214 tmp = ret->children;
10215 while (tmp != NULL) {
10216 tmp->doc = NULL;
10217 tmp = tmp->next;
10218 }
10219 }
Owen Taylor3473f882001-02-23 17:55:21 +000010220 } else {
10221 ret = NULL;
10222 }
10223 xmlFreeDoc(ctxt->myDoc);
10224 ctxt->myDoc = NULL;
10225 }
10226 if (sax != NULL) ctxt->sax = NULL;
10227 xmlFreeParserCtxt(ctxt);
10228
10229 return(ret);
10230}
10231
10232/**
10233 * xmlSAXParseDTD:
10234 * @sax: the SAX handler block
10235 * @ExternalID: a NAME* containing the External ID of the DTD
10236 * @SystemID: a NAME* containing the URL to the DTD
10237 *
10238 * Load and parse an external subset.
10239 *
10240 * Returns the resulting xmlDtdPtr or NULL in case of error.
10241 */
10242
10243xmlDtdPtr
10244xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10245 const xmlChar *SystemID) {
10246 xmlDtdPtr ret = NULL;
10247 xmlParserCtxtPtr ctxt;
10248 xmlParserInputPtr input = NULL;
10249 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010250 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010251
10252 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10253
10254 ctxt = xmlNewParserCtxt();
10255 if (ctxt == NULL) {
10256 return(NULL);
10257 }
10258
10259 /*
10260 * Set-up the SAX context
10261 */
10262 if (sax != NULL) {
10263 if (ctxt->sax != NULL)
10264 xmlFree(ctxt->sax);
10265 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010266 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010267 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010268
10269 /*
10270 * Canonicalise the system ID
10271 */
10272 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010273 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010274 xmlFreeParserCtxt(ctxt);
10275 return(NULL);
10276 }
Owen Taylor3473f882001-02-23 17:55:21 +000010277
10278 /*
10279 * Ask the Entity resolver to load the damn thing
10280 */
10281
10282 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010283 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010284 if (input == NULL) {
10285 if (sax != NULL) ctxt->sax = NULL;
10286 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010287 if (systemIdCanonic != NULL)
10288 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010289 return(NULL);
10290 }
10291
10292 /*
10293 * plug some encoding conversion routines here.
10294 */
10295 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010296 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10297 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10298 xmlSwitchEncoding(ctxt, enc);
10299 }
Owen Taylor3473f882001-02-23 17:55:21 +000010300
10301 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010302 input->filename = (char *) systemIdCanonic;
10303 else
10304 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010305 input->line = 1;
10306 input->col = 1;
10307 input->base = ctxt->input->cur;
10308 input->cur = ctxt->input->cur;
10309 input->free = NULL;
10310
10311 /*
10312 * let's parse that entity knowing it's an external subset.
10313 */
10314 ctxt->inSubset = 2;
10315 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10316 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10317 ExternalID, SystemID);
10318 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10319
10320 if (ctxt->myDoc != NULL) {
10321 if (ctxt->wellFormed) {
10322 ret = ctxt->myDoc->extSubset;
10323 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010324 if (ret != NULL) {
10325 xmlNodePtr tmp;
10326
10327 ret->doc = NULL;
10328 tmp = ret->children;
10329 while (tmp != NULL) {
10330 tmp->doc = NULL;
10331 tmp = tmp->next;
10332 }
10333 }
Owen Taylor3473f882001-02-23 17:55:21 +000010334 } else {
10335 ret = NULL;
10336 }
10337 xmlFreeDoc(ctxt->myDoc);
10338 ctxt->myDoc = NULL;
10339 }
10340 if (sax != NULL) ctxt->sax = NULL;
10341 xmlFreeParserCtxt(ctxt);
10342
10343 return(ret);
10344}
10345
Daniel Veillard4432df22003-09-28 18:58:27 +000010346
Owen Taylor3473f882001-02-23 17:55:21 +000010347/**
10348 * xmlParseDTD:
10349 * @ExternalID: a NAME* containing the External ID of the DTD
10350 * @SystemID: a NAME* containing the URL to the DTD
10351 *
10352 * Load and parse an external subset.
10353 *
10354 * Returns the resulting xmlDtdPtr or NULL in case of error.
10355 */
10356
10357xmlDtdPtr
10358xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10359 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10360}
Daniel Veillard4432df22003-09-28 18:58:27 +000010361#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010362
10363/************************************************************************
10364 * *
10365 * Front ends when parsing an Entity *
10366 * *
10367 ************************************************************************/
10368
10369/**
Owen Taylor3473f882001-02-23 17:55:21 +000010370 * xmlParseCtxtExternalEntity:
10371 * @ctx: the existing parsing context
10372 * @URL: the URL for the entity to load
10373 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010374 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010375 *
10376 * Parse an external general entity within an existing parsing context
10377 * An external general parsed entity is well-formed if it matches the
10378 * production labeled extParsedEnt.
10379 *
10380 * [78] extParsedEnt ::= TextDecl? content
10381 *
10382 * Returns 0 if the entity is well formed, -1 in case of args problem and
10383 * the parser error code otherwise
10384 */
10385
10386int
10387xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010388 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010389 xmlParserCtxtPtr ctxt;
10390 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010391 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010392 xmlSAXHandlerPtr oldsax = NULL;
10393 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010394 xmlChar start[4];
10395 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010396
Daniel Veillardce682bc2004-11-05 17:22:25 +000010397 if (ctx == NULL) return(-1);
10398
Owen Taylor3473f882001-02-23 17:55:21 +000010399 if (ctx->depth > 40) {
10400 return(XML_ERR_ENTITY_LOOP);
10401 }
10402
Daniel Veillardcda96922001-08-21 10:56:31 +000010403 if (lst != NULL)
10404 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010405 if ((URL == NULL) && (ID == NULL))
10406 return(-1);
10407 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10408 return(-1);
10409
10410
10411 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10412 if (ctxt == NULL) return(-1);
10413 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010414 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010415 oldsax = ctxt->sax;
10416 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010417 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 newDoc = xmlNewDoc(BAD_CAST "1.0");
10419 if (newDoc == NULL) {
10420 xmlFreeParserCtxt(ctxt);
10421 return(-1);
10422 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010423 if (ctx->myDoc->dict) {
10424 newDoc->dict = ctx->myDoc->dict;
10425 xmlDictReference(newDoc->dict);
10426 }
Owen Taylor3473f882001-02-23 17:55:21 +000010427 if (ctx->myDoc != NULL) {
10428 newDoc->intSubset = ctx->myDoc->intSubset;
10429 newDoc->extSubset = ctx->myDoc->extSubset;
10430 }
10431 if (ctx->myDoc->URL != NULL) {
10432 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10433 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010434 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10435 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010436 ctxt->sax = oldsax;
10437 xmlFreeParserCtxt(ctxt);
10438 newDoc->intSubset = NULL;
10439 newDoc->extSubset = NULL;
10440 xmlFreeDoc(newDoc);
10441 return(-1);
10442 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010443 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010444 nodePush(ctxt, newDoc->children);
10445 if (ctx->myDoc == NULL) {
10446 ctxt->myDoc = newDoc;
10447 } else {
10448 ctxt->myDoc = ctx->myDoc;
10449 newDoc->children->doc = ctx->myDoc;
10450 }
10451
Daniel Veillard87a764e2001-06-20 17:41:10 +000010452 /*
10453 * Get the 4 first bytes and decode the charset
10454 * if enc != XML_CHAR_ENCODING_NONE
10455 * plug some encoding conversion routines.
10456 */
10457 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010458 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10459 start[0] = RAW;
10460 start[1] = NXT(1);
10461 start[2] = NXT(2);
10462 start[3] = NXT(3);
10463 enc = xmlDetectCharEncoding(start, 4);
10464 if (enc != XML_CHAR_ENCODING_NONE) {
10465 xmlSwitchEncoding(ctxt, enc);
10466 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010467 }
10468
Owen Taylor3473f882001-02-23 17:55:21 +000010469 /*
10470 * Parse a possible text declaration first
10471 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010472 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010473 xmlParseTextDecl(ctxt);
10474 }
10475
10476 /*
10477 * Doing validity checking on chunk doesn't make sense
10478 */
10479 ctxt->instate = XML_PARSER_CONTENT;
10480 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010481 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010482 ctxt->loadsubset = ctx->loadsubset;
10483 ctxt->depth = ctx->depth + 1;
10484 ctxt->replaceEntities = ctx->replaceEntities;
10485 if (ctxt->validate) {
10486 ctxt->vctxt.error = ctx->vctxt.error;
10487 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010488 } else {
10489 ctxt->vctxt.error = NULL;
10490 ctxt->vctxt.warning = NULL;
10491 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010492 ctxt->vctxt.nodeTab = NULL;
10493 ctxt->vctxt.nodeNr = 0;
10494 ctxt->vctxt.nodeMax = 0;
10495 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010496 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10497 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010498 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10499 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10500 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010501 ctxt->dictNames = ctx->dictNames;
10502 ctxt->attsDefault = ctx->attsDefault;
10503 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010504 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010505
10506 xmlParseContent(ctxt);
10507
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010508 ctx->validate = ctxt->validate;
10509 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010510 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010513 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010514 }
10515 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010516 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010517 }
10518
10519 if (!ctxt->wellFormed) {
10520 if (ctxt->errNo == 0)
10521 ret = 1;
10522 else
10523 ret = ctxt->errNo;
10524 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010525 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010526 xmlNodePtr cur;
10527
10528 /*
10529 * Return the newly created nodeset after unlinking it from
10530 * they pseudo parent.
10531 */
10532 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010533 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010534 while (cur != NULL) {
10535 cur->parent = NULL;
10536 cur = cur->next;
10537 }
10538 newDoc->children->children = NULL;
10539 }
10540 ret = 0;
10541 }
10542 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010543 ctxt->dict = NULL;
10544 ctxt->attsDefault = NULL;
10545 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010546 xmlFreeParserCtxt(ctxt);
10547 newDoc->intSubset = NULL;
10548 newDoc->extSubset = NULL;
10549 xmlFreeDoc(newDoc);
10550
10551 return(ret);
10552}
10553
10554/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010555 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010556 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010557 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010558 * @sax: the SAX handler bloc (possibly NULL)
10559 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10560 * @depth: Used for loop detection, use 0
10561 * @URL: the URL for the entity to load
10562 * @ID: the System ID for the entity to load
10563 * @list: the return value for the set of parsed nodes
10564 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010565 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010566 *
10567 * Returns 0 if the entity is well formed, -1 in case of args problem and
10568 * the parser error code otherwise
10569 */
10570
Daniel Veillard7d515752003-09-26 19:12:37 +000010571static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010572xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10573 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010574 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010575 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlParserCtxtPtr ctxt;
10577 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010578 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010579 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010580 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010581 xmlChar start[4];
10582 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010583
10584 if (depth > 40) {
10585 return(XML_ERR_ENTITY_LOOP);
10586 }
10587
10588
10589
10590 if (list != NULL)
10591 *list = NULL;
10592 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010593 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010594 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010595 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010596
10597
10598 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010599 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010600 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010601 if (oldctxt != NULL) {
10602 ctxt->_private = oldctxt->_private;
10603 ctxt->loadsubset = oldctxt->loadsubset;
10604 ctxt->validate = oldctxt->validate;
10605 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010606 ctxt->record_info = oldctxt->record_info;
10607 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10608 ctxt->node_seq.length = oldctxt->node_seq.length;
10609 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010610 } else {
10611 /*
10612 * Doing validity checking on chunk without context
10613 * doesn't make sense
10614 */
10615 ctxt->_private = NULL;
10616 ctxt->validate = 0;
10617 ctxt->external = 2;
10618 ctxt->loadsubset = 0;
10619 }
Owen Taylor3473f882001-02-23 17:55:21 +000010620 if (sax != NULL) {
10621 oldsax = ctxt->sax;
10622 ctxt->sax = sax;
10623 if (user_data != NULL)
10624 ctxt->userData = user_data;
10625 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010626 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 newDoc = xmlNewDoc(BAD_CAST "1.0");
10628 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010629 ctxt->node_seq.maximum = 0;
10630 ctxt->node_seq.length = 0;
10631 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010632 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010633 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010634 }
10635 if (doc != NULL) {
10636 newDoc->intSubset = doc->intSubset;
10637 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010638 newDoc->dict = doc->dict;
10639 } else if (oldctxt != NULL) {
10640 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010641 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010642 xmlDictReference(newDoc->dict);
10643
Owen Taylor3473f882001-02-23 17:55:21 +000010644 if (doc->URL != NULL) {
10645 newDoc->URL = xmlStrdup(doc->URL);
10646 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010647 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10648 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010649 if (sax != NULL)
10650 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010651 ctxt->node_seq.maximum = 0;
10652 ctxt->node_seq.length = 0;
10653 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010654 xmlFreeParserCtxt(ctxt);
10655 newDoc->intSubset = NULL;
10656 newDoc->extSubset = NULL;
10657 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010658 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010659 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010660 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010661 nodePush(ctxt, newDoc->children);
10662 if (doc == NULL) {
10663 ctxt->myDoc = newDoc;
10664 } else {
10665 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010666 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010667 }
10668
Daniel Veillard87a764e2001-06-20 17:41:10 +000010669 /*
10670 * Get the 4 first bytes and decode the charset
10671 * if enc != XML_CHAR_ENCODING_NONE
10672 * plug some encoding conversion routines.
10673 */
10674 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010675 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10676 start[0] = RAW;
10677 start[1] = NXT(1);
10678 start[2] = NXT(2);
10679 start[3] = NXT(3);
10680 enc = xmlDetectCharEncoding(start, 4);
10681 if (enc != XML_CHAR_ENCODING_NONE) {
10682 xmlSwitchEncoding(ctxt, enc);
10683 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010684 }
10685
Owen Taylor3473f882001-02-23 17:55:21 +000010686 /*
10687 * Parse a possible text declaration first
10688 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010689 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010690 xmlParseTextDecl(ctxt);
10691 }
10692
Owen Taylor3473f882001-02-23 17:55:21 +000010693 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010694 ctxt->depth = depth;
10695
10696 xmlParseContent(ctxt);
10697
Daniel Veillard561b7f82002-03-20 21:55:57 +000010698 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010699 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010700 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010701 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010702 }
10703 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010704 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010705 }
10706
10707 if (!ctxt->wellFormed) {
10708 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010709 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010710 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010711 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010712 } else {
10713 if (list != NULL) {
10714 xmlNodePtr cur;
10715
10716 /*
10717 * Return the newly created nodeset after unlinking it from
10718 * they pseudo parent.
10719 */
10720 cur = newDoc->children->children;
10721 *list = cur;
10722 while (cur != NULL) {
10723 cur->parent = NULL;
10724 cur = cur->next;
10725 }
10726 newDoc->children->children = NULL;
10727 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010728 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010729 }
10730 if (sax != NULL)
10731 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010732 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10733 oldctxt->node_seq.length = ctxt->node_seq.length;
10734 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010735 ctxt->node_seq.maximum = 0;
10736 ctxt->node_seq.length = 0;
10737 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010738 xmlFreeParserCtxt(ctxt);
10739 newDoc->intSubset = NULL;
10740 newDoc->extSubset = NULL;
10741 xmlFreeDoc(newDoc);
10742
10743 return(ret);
10744}
10745
Daniel Veillard81273902003-09-30 00:43:48 +000010746#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010747/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010748 * xmlParseExternalEntity:
10749 * @doc: the document the chunk pertains to
10750 * @sax: the SAX handler bloc (possibly NULL)
10751 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10752 * @depth: Used for loop detection, use 0
10753 * @URL: the URL for the entity to load
10754 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010755 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010756 *
10757 * Parse an external general entity
10758 * An external general parsed entity is well-formed if it matches the
10759 * production labeled extParsedEnt.
10760 *
10761 * [78] extParsedEnt ::= TextDecl? content
10762 *
10763 * Returns 0 if the entity is well formed, -1 in case of args problem and
10764 * the parser error code otherwise
10765 */
10766
10767int
10768xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010769 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010770 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010771 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010772}
10773
10774/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010775 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010776 * @doc: the document the chunk pertains to
10777 * @sax: the SAX handler bloc (possibly NULL)
10778 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10779 * @depth: Used for loop detection, use 0
10780 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010781 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010782 *
10783 * Parse a well-balanced chunk of an XML document
10784 * called by the parser
10785 * The allowed sequence for the Well Balanced Chunk is the one defined by
10786 * the content production in the XML grammar:
10787 *
10788 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10789 *
10790 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10791 * the parser error code otherwise
10792 */
10793
10794int
10795xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010796 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010797 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10798 depth, string, lst, 0 );
10799}
Daniel Veillard81273902003-09-30 00:43:48 +000010800#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010801
10802/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010803 * xmlParseBalancedChunkMemoryInternal:
10804 * @oldctxt: the existing parsing context
10805 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10806 * @user_data: the user data field for the parser context
10807 * @lst: the return value for the set of parsed nodes
10808 *
10809 *
10810 * Parse a well-balanced chunk of an XML document
10811 * called by the parser
10812 * The allowed sequence for the Well Balanced Chunk is the one defined by
10813 * the content production in the XML grammar:
10814 *
10815 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10816 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010817 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10818 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010819 *
10820 * In case recover is set to 1, the nodelist will not be empty even if
10821 * the parsed chunk is not well balanced.
10822 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010823static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010824xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10825 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10826 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010827 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010828 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010829 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010830 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010831 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010832 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010833 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010834
10835 if (oldctxt->depth > 40) {
10836 return(XML_ERR_ENTITY_LOOP);
10837 }
10838
10839
10840 if (lst != NULL)
10841 *lst = NULL;
10842 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010843 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010844
10845 size = xmlStrlen(string);
10846
10847 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010848 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010849 if (user_data != NULL)
10850 ctxt->userData = user_data;
10851 else
10852 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010853 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10854 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010855 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10856 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10857 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010858
10859 oldsax = ctxt->sax;
10860 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010861 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010862 ctxt->replaceEntities = oldctxt->replaceEntities;
10863 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010864
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010865 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010866 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010867 newDoc = xmlNewDoc(BAD_CAST "1.0");
10868 if (newDoc == NULL) {
10869 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010870 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010871 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010872 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010873 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010874 newDoc->dict = ctxt->dict;
10875 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010876 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010877 } else {
10878 ctxt->myDoc = oldctxt->myDoc;
10879 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010880 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010881 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010882 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10883 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010884 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010885 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010886 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010887 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010888 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010889 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010890 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010891 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010892 ctxt->myDoc->children = NULL;
10893 ctxt->myDoc->last = NULL;
10894 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010895 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010896 ctxt->instate = XML_PARSER_CONTENT;
10897 ctxt->depth = oldctxt->depth + 1;
10898
Daniel Veillard328f48c2002-11-15 15:24:34 +000010899 ctxt->validate = 0;
10900 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010901 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10902 /*
10903 * ID/IDREF registration will be done in xmlValidateElement below
10904 */
10905 ctxt->loadsubset |= XML_SKIP_IDS;
10906 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010907 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010908 ctxt->attsDefault = oldctxt->attsDefault;
10909 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010910
Daniel Veillard68e9e742002-11-16 15:35:11 +000010911 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010912 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010913 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010914 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010915 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010916 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010917 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010918 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010919 }
10920
10921 if (!ctxt->wellFormed) {
10922 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010923 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010924 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010925 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010926 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010927 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010928 }
10929
William M. Brack7b9154b2003-09-27 19:23:50 +000010930 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010931 xmlNodePtr cur;
10932
10933 /*
10934 * Return the newly created nodeset after unlinking it from
10935 * they pseudo parent.
10936 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010937 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010938 *lst = cur;
10939 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010940#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010941 if (oldctxt->validate && oldctxt->wellFormed &&
10942 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10943 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10944 oldctxt->myDoc, cur);
10945 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010946#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010947 cur->parent = NULL;
10948 cur = cur->next;
10949 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010950 ctxt->myDoc->children->children = NULL;
10951 }
10952 if (ctxt->myDoc != NULL) {
10953 xmlFreeNode(ctxt->myDoc->children);
10954 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010955 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010956 }
10957
10958 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010959 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010960 ctxt->attsDefault = NULL;
10961 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010962 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010963 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010964 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010965 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010966
10967 return(ret);
10968}
10969
Daniel Veillard29b17482004-08-16 00:39:03 +000010970/**
10971 * xmlParseInNodeContext:
10972 * @node: the context node
10973 * @data: the input string
10974 * @datalen: the input string length in bytes
10975 * @options: a combination of xmlParserOption
10976 * @lst: the return value for the set of parsed nodes
10977 *
10978 * Parse a well-balanced chunk of an XML document
10979 * within the context (DTD, namespaces, etc ...) of the given node.
10980 *
10981 * The allowed sequence for the data is a Well Balanced Chunk defined by
10982 * the content production in the XML grammar:
10983 *
10984 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10985 *
10986 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10987 * error code otherwise
10988 */
10989xmlParserErrors
10990xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
10991 int options, xmlNodePtr *lst) {
10992#ifdef SAX2
10993 xmlParserCtxtPtr ctxt;
10994 xmlDocPtr doc = NULL;
10995 xmlNodePtr fake, cur;
10996 int nsnr = 0;
10997
10998 xmlParserErrors ret = XML_ERR_OK;
10999
11000 /*
11001 * check all input parameters, grab the document
11002 */
11003 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11004 return(XML_ERR_INTERNAL_ERROR);
11005 switch (node->type) {
11006 case XML_ELEMENT_NODE:
11007 case XML_ATTRIBUTE_NODE:
11008 case XML_TEXT_NODE:
11009 case XML_CDATA_SECTION_NODE:
11010 case XML_ENTITY_REF_NODE:
11011 case XML_PI_NODE:
11012 case XML_COMMENT_NODE:
11013 case XML_DOCUMENT_NODE:
11014 case XML_HTML_DOCUMENT_NODE:
11015 break;
11016 default:
11017 return(XML_ERR_INTERNAL_ERROR);
11018
11019 }
11020 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11021 (node->type != XML_DOCUMENT_NODE) &&
11022 (node->type != XML_HTML_DOCUMENT_NODE))
11023 node = node->parent;
11024 if (node == NULL)
11025 return(XML_ERR_INTERNAL_ERROR);
11026 if (node->type == XML_ELEMENT_NODE)
11027 doc = node->doc;
11028 else
11029 doc = (xmlDocPtr) node;
11030 if (doc == NULL)
11031 return(XML_ERR_INTERNAL_ERROR);
11032
11033 /*
11034 * allocate a context and set-up everything not related to the
11035 * node position in the tree
11036 */
11037 if (doc->type == XML_DOCUMENT_NODE)
11038 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11039#ifdef LIBXML_HTML_ENABLED
11040 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11041 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11042#endif
11043 else
11044 return(XML_ERR_INTERNAL_ERROR);
11045
11046 if (ctxt == NULL)
11047 return(XML_ERR_NO_MEMORY);
11048 fake = xmlNewComment(NULL);
11049 if (fake == NULL) {
11050 xmlFreeParserCtxt(ctxt);
11051 return(XML_ERR_NO_MEMORY);
11052 }
11053 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011054
11055 /*
11056 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11057 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11058 * we must wait until the last moment to free the original one.
11059 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011060 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011061 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011062 xmlDictFree(ctxt->dict);
11063 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011064 } else
11065 options |= XML_PARSE_NODICT;
11066
11067 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011068 xmlDetectSAX2(ctxt);
11069 ctxt->myDoc = doc;
11070
11071 if (node->type == XML_ELEMENT_NODE) {
11072 nodePush(ctxt, node);
11073 /*
11074 * initialize the SAX2 namespaces stack
11075 */
11076 cur = node;
11077 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11078 xmlNsPtr ns = cur->nsDef;
11079 const xmlChar *iprefix, *ihref;
11080
11081 while (ns != NULL) {
11082 if (ctxt->dict) {
11083 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11084 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11085 } else {
11086 iprefix = ns->prefix;
11087 ihref = ns->href;
11088 }
11089
11090 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11091 nsPush(ctxt, iprefix, ihref);
11092 nsnr++;
11093 }
11094 ns = ns->next;
11095 }
11096 cur = cur->parent;
11097 }
11098 ctxt->instate = XML_PARSER_CONTENT;
11099 }
11100
11101 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11102 /*
11103 * ID/IDREF registration will be done in xmlValidateElement below
11104 */
11105 ctxt->loadsubset |= XML_SKIP_IDS;
11106 }
11107
11108 xmlParseContent(ctxt);
11109 nsPop(ctxt, nsnr);
11110 if ((RAW == '<') && (NXT(1) == '/')) {
11111 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11112 } else if (RAW != 0) {
11113 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11114 }
11115 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11116 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11117 ctxt->wellFormed = 0;
11118 }
11119
11120 if (!ctxt->wellFormed) {
11121 if (ctxt->errNo == 0)
11122 ret = XML_ERR_INTERNAL_ERROR;
11123 else
11124 ret = (xmlParserErrors)ctxt->errNo;
11125 } else {
11126 ret = XML_ERR_OK;
11127 }
11128
11129 /*
11130 * Return the newly created nodeset after unlinking it from
11131 * the pseudo sibling.
11132 */
11133
11134 cur = fake->next;
11135 fake->next = NULL;
11136 node->last = fake;
11137
11138 if (cur != NULL) {
11139 cur->prev = NULL;
11140 }
11141
11142 *lst = cur;
11143
11144 while (cur != NULL) {
11145 cur->parent = NULL;
11146 cur = cur->next;
11147 }
11148
11149 xmlUnlinkNode(fake);
11150 xmlFreeNode(fake);
11151
11152
11153 if (ret != XML_ERR_OK) {
11154 xmlFreeNodeList(*lst);
11155 *lst = NULL;
11156 }
William M. Brackc3f81342004-10-03 01:22:44 +000011157
William M. Brackb7b54de2004-10-06 16:38:01 +000011158 if (doc->dict != NULL)
11159 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011160 xmlFreeParserCtxt(ctxt);
11161
11162 return(ret);
11163#else /* !SAX2 */
11164 return(XML_ERR_INTERNAL_ERROR);
11165#endif
11166}
11167
Daniel Veillard81273902003-09-30 00:43:48 +000011168#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011169/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011170 * xmlParseBalancedChunkMemoryRecover:
11171 * @doc: the document the chunk pertains to
11172 * @sax: the SAX handler bloc (possibly NULL)
11173 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11174 * @depth: Used for loop detection, use 0
11175 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11176 * @lst: the return value for the set of parsed nodes
11177 * @recover: return nodes even if the data is broken (use 0)
11178 *
11179 *
11180 * Parse a well-balanced chunk of an XML document
11181 * called by the parser
11182 * The allowed sequence for the Well Balanced Chunk is the one defined by
11183 * the content production in the XML grammar:
11184 *
11185 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11186 *
11187 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11188 * the parser error code otherwise
11189 *
11190 * In case recover is set to 1, the nodelist will not be empty even if
11191 * the parsed chunk is not well balanced.
11192 */
11193int
11194xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11195 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11196 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011197 xmlParserCtxtPtr ctxt;
11198 xmlDocPtr newDoc;
11199 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011200 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011201 int size;
11202 int ret = 0;
11203
11204 if (depth > 40) {
11205 return(XML_ERR_ENTITY_LOOP);
11206 }
11207
11208
Daniel Veillardcda96922001-08-21 10:56:31 +000011209 if (lst != NULL)
11210 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011211 if (string == NULL)
11212 return(-1);
11213
11214 size = xmlStrlen(string);
11215
11216 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11217 if (ctxt == NULL) return(-1);
11218 ctxt->userData = ctxt;
11219 if (sax != NULL) {
11220 oldsax = ctxt->sax;
11221 ctxt->sax = sax;
11222 if (user_data != NULL)
11223 ctxt->userData = user_data;
11224 }
11225 newDoc = xmlNewDoc(BAD_CAST "1.0");
11226 if (newDoc == NULL) {
11227 xmlFreeParserCtxt(ctxt);
11228 return(-1);
11229 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011230 if ((doc != NULL) && (doc->dict != NULL)) {
11231 xmlDictFree(ctxt->dict);
11232 ctxt->dict = doc->dict;
11233 xmlDictReference(ctxt->dict);
11234 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11235 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11236 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11237 ctxt->dictNames = 1;
11238 } else {
11239 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11240 }
Owen Taylor3473f882001-02-23 17:55:21 +000011241 if (doc != NULL) {
11242 newDoc->intSubset = doc->intSubset;
11243 newDoc->extSubset = doc->extSubset;
11244 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011245 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11246 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011247 if (sax != NULL)
11248 ctxt->sax = oldsax;
11249 xmlFreeParserCtxt(ctxt);
11250 newDoc->intSubset = NULL;
11251 newDoc->extSubset = NULL;
11252 xmlFreeDoc(newDoc);
11253 return(-1);
11254 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011255 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11256 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 if (doc == NULL) {
11258 ctxt->myDoc = newDoc;
11259 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011260 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011261 newDoc->children->doc = doc;
11262 }
11263 ctxt->instate = XML_PARSER_CONTENT;
11264 ctxt->depth = depth;
11265
11266 /*
11267 * Doing validity checking on chunk doesn't make sense
11268 */
11269 ctxt->validate = 0;
11270 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011271 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011272
Daniel Veillardb39bc392002-10-26 19:29:51 +000011273 if ( doc != NULL ){
11274 content = doc->children;
11275 doc->children = NULL;
11276 xmlParseContent(ctxt);
11277 doc->children = content;
11278 }
11279 else {
11280 xmlParseContent(ctxt);
11281 }
Owen Taylor3473f882001-02-23 17:55:21 +000011282 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011283 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011284 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011285 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011286 }
11287 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011288 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011289 }
11290
11291 if (!ctxt->wellFormed) {
11292 if (ctxt->errNo == 0)
11293 ret = 1;
11294 else
11295 ret = ctxt->errNo;
11296 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011297 ret = 0;
11298 }
11299
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011300 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11301 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011302
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011303 /*
11304 * Return the newly created nodeset after unlinking it from
11305 * they pseudo parent.
11306 */
11307 cur = newDoc->children->children;
11308 *lst = cur;
11309 while (cur != NULL) {
11310 xmlSetTreeDoc(cur, doc);
11311 cur->parent = NULL;
11312 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011313 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011314 newDoc->children->children = NULL;
11315 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011316
Owen Taylor3473f882001-02-23 17:55:21 +000011317 if (sax != NULL)
11318 ctxt->sax = oldsax;
11319 xmlFreeParserCtxt(ctxt);
11320 newDoc->intSubset = NULL;
11321 newDoc->extSubset = NULL;
11322 xmlFreeDoc(newDoc);
11323
11324 return(ret);
11325}
11326
11327/**
11328 * xmlSAXParseEntity:
11329 * @sax: the SAX handler block
11330 * @filename: the filename
11331 *
11332 * parse an XML external entity out of context and build a tree.
11333 * It use the given SAX function block to handle the parsing callback.
11334 * If sax is NULL, fallback to the default DOM tree building routines.
11335 *
11336 * [78] extParsedEnt ::= TextDecl? content
11337 *
11338 * This correspond to a "Well Balanced" chunk
11339 *
11340 * Returns the resulting document tree
11341 */
11342
11343xmlDocPtr
11344xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11345 xmlDocPtr ret;
11346 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011347
11348 ctxt = xmlCreateFileParserCtxt(filename);
11349 if (ctxt == NULL) {
11350 return(NULL);
11351 }
11352 if (sax != NULL) {
11353 if (ctxt->sax != NULL)
11354 xmlFree(ctxt->sax);
11355 ctxt->sax = sax;
11356 ctxt->userData = NULL;
11357 }
11358
Owen Taylor3473f882001-02-23 17:55:21 +000011359 xmlParseExtParsedEnt(ctxt);
11360
11361 if (ctxt->wellFormed)
11362 ret = ctxt->myDoc;
11363 else {
11364 ret = NULL;
11365 xmlFreeDoc(ctxt->myDoc);
11366 ctxt->myDoc = NULL;
11367 }
11368 if (sax != NULL)
11369 ctxt->sax = NULL;
11370 xmlFreeParserCtxt(ctxt);
11371
11372 return(ret);
11373}
11374
11375/**
11376 * xmlParseEntity:
11377 * @filename: the filename
11378 *
11379 * parse an XML external entity out of context and build a tree.
11380 *
11381 * [78] extParsedEnt ::= TextDecl? content
11382 *
11383 * This correspond to a "Well Balanced" chunk
11384 *
11385 * Returns the resulting document tree
11386 */
11387
11388xmlDocPtr
11389xmlParseEntity(const char *filename) {
11390 return(xmlSAXParseEntity(NULL, filename));
11391}
Daniel Veillard81273902003-09-30 00:43:48 +000011392#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011393
11394/**
11395 * xmlCreateEntityParserCtxt:
11396 * @URL: the entity URL
11397 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011398 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011399 *
11400 * Create a parser context for an external entity
11401 * Automatic support for ZLIB/Compress compressed document is provided
11402 * by default if found at compile-time.
11403 *
11404 * Returns the new parser context or NULL
11405 */
11406xmlParserCtxtPtr
11407xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11408 const xmlChar *base) {
11409 xmlParserCtxtPtr ctxt;
11410 xmlParserInputPtr inputStream;
11411 char *directory = NULL;
11412 xmlChar *uri;
11413
11414 ctxt = xmlNewParserCtxt();
11415 if (ctxt == NULL) {
11416 return(NULL);
11417 }
11418
11419 uri = xmlBuildURI(URL, base);
11420
11421 if (uri == NULL) {
11422 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11423 if (inputStream == NULL) {
11424 xmlFreeParserCtxt(ctxt);
11425 return(NULL);
11426 }
11427
11428 inputPush(ctxt, inputStream);
11429
11430 if ((ctxt->directory == NULL) && (directory == NULL))
11431 directory = xmlParserGetDirectory((char *)URL);
11432 if ((ctxt->directory == NULL) && (directory != NULL))
11433 ctxt->directory = directory;
11434 } else {
11435 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11436 if (inputStream == NULL) {
11437 xmlFree(uri);
11438 xmlFreeParserCtxt(ctxt);
11439 return(NULL);
11440 }
11441
11442 inputPush(ctxt, inputStream);
11443
11444 if ((ctxt->directory == NULL) && (directory == NULL))
11445 directory = xmlParserGetDirectory((char *)uri);
11446 if ((ctxt->directory == NULL) && (directory != NULL))
11447 ctxt->directory = directory;
11448 xmlFree(uri);
11449 }
Owen Taylor3473f882001-02-23 17:55:21 +000011450 return(ctxt);
11451}
11452
11453/************************************************************************
11454 * *
11455 * Front ends when parsing from a file *
11456 * *
11457 ************************************************************************/
11458
11459/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011460 * xmlCreateURLParserCtxt:
11461 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011462 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011463 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011464 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011465 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011466 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011467 *
11468 * Returns the new parser context or NULL
11469 */
11470xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011471xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011472{
11473 xmlParserCtxtPtr ctxt;
11474 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011475 char *directory = NULL;
11476
Owen Taylor3473f882001-02-23 17:55:21 +000011477 ctxt = xmlNewParserCtxt();
11478 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011479 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011480 return(NULL);
11481 }
11482
Daniel Veillard61b93382003-11-03 14:28:31 +000011483 if (options != 0)
11484 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011485
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011486 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011487 if (inputStream == NULL) {
11488 xmlFreeParserCtxt(ctxt);
11489 return(NULL);
11490 }
11491
Owen Taylor3473f882001-02-23 17:55:21 +000011492 inputPush(ctxt, inputStream);
11493 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011494 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011495 if ((ctxt->directory == NULL) && (directory != NULL))
11496 ctxt->directory = directory;
11497
11498 return(ctxt);
11499}
11500
Daniel Veillard61b93382003-11-03 14:28:31 +000011501/**
11502 * xmlCreateFileParserCtxt:
11503 * @filename: the filename
11504 *
11505 * Create a parser context for a file content.
11506 * Automatic support for ZLIB/Compress compressed document is provided
11507 * by default if found at compile-time.
11508 *
11509 * Returns the new parser context or NULL
11510 */
11511xmlParserCtxtPtr
11512xmlCreateFileParserCtxt(const char *filename)
11513{
11514 return(xmlCreateURLParserCtxt(filename, 0));
11515}
11516
Daniel Veillard81273902003-09-30 00:43:48 +000011517#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011518/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011519 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011520 * @sax: the SAX handler block
11521 * @filename: the filename
11522 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11523 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011524 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011525 *
11526 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11527 * compressed document is provided by default if found at compile-time.
11528 * It use the given SAX function block to handle the parsing callback.
11529 * If sax is NULL, fallback to the default DOM tree building routines.
11530 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011531 * User data (void *) is stored within the parser context in the
11532 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011533 *
Owen Taylor3473f882001-02-23 17:55:21 +000011534 * Returns the resulting document tree
11535 */
11536
11537xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011538xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11539 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011540 xmlDocPtr ret;
11541 xmlParserCtxtPtr ctxt;
11542 char *directory = NULL;
11543
Daniel Veillard635ef722001-10-29 11:48:19 +000011544 xmlInitParser();
11545
Owen Taylor3473f882001-02-23 17:55:21 +000011546 ctxt = xmlCreateFileParserCtxt(filename);
11547 if (ctxt == NULL) {
11548 return(NULL);
11549 }
11550 if (sax != NULL) {
11551 if (ctxt->sax != NULL)
11552 xmlFree(ctxt->sax);
11553 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011554 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011555 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011556 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011557 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011558 }
Owen Taylor3473f882001-02-23 17:55:21 +000011559
11560 if ((ctxt->directory == NULL) && (directory == NULL))
11561 directory = xmlParserGetDirectory(filename);
11562 if ((ctxt->directory == NULL) && (directory != NULL))
11563 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11564
Daniel Veillarddad3f682002-11-17 16:47:27 +000011565 ctxt->recovery = recovery;
11566
Owen Taylor3473f882001-02-23 17:55:21 +000011567 xmlParseDocument(ctxt);
11568
William M. Brackc07329e2003-09-08 01:57:30 +000011569 if ((ctxt->wellFormed) || recovery) {
11570 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011571 if (ret != NULL) {
11572 if (ctxt->input->buf->compressed > 0)
11573 ret->compression = 9;
11574 else
11575 ret->compression = ctxt->input->buf->compressed;
11576 }
William M. Brackc07329e2003-09-08 01:57:30 +000011577 }
Owen Taylor3473f882001-02-23 17:55:21 +000011578 else {
11579 ret = NULL;
11580 xmlFreeDoc(ctxt->myDoc);
11581 ctxt->myDoc = NULL;
11582 }
11583 if (sax != NULL)
11584 ctxt->sax = NULL;
11585 xmlFreeParserCtxt(ctxt);
11586
11587 return(ret);
11588}
11589
11590/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011591 * xmlSAXParseFile:
11592 * @sax: the SAX handler block
11593 * @filename: the filename
11594 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11595 * documents
11596 *
11597 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11598 * compressed document is provided by default if found at compile-time.
11599 * It use the given SAX function block to handle the parsing callback.
11600 * If sax is NULL, fallback to the default DOM tree building routines.
11601 *
11602 * Returns the resulting document tree
11603 */
11604
11605xmlDocPtr
11606xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11607 int recovery) {
11608 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11609}
11610
11611/**
Owen Taylor3473f882001-02-23 17:55:21 +000011612 * xmlRecoverDoc:
11613 * @cur: a pointer to an array of xmlChar
11614 *
11615 * parse an XML in-memory document and build a tree.
11616 * In the case the document is not Well Formed, a tree is built anyway
11617 *
11618 * Returns the resulting document tree
11619 */
11620
11621xmlDocPtr
11622xmlRecoverDoc(xmlChar *cur) {
11623 return(xmlSAXParseDoc(NULL, cur, 1));
11624}
11625
11626/**
11627 * xmlParseFile:
11628 * @filename: the filename
11629 *
11630 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11631 * compressed document is provided by default if found at compile-time.
11632 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011633 * Returns the resulting document tree if the file was wellformed,
11634 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011635 */
11636
11637xmlDocPtr
11638xmlParseFile(const char *filename) {
11639 return(xmlSAXParseFile(NULL, filename, 0));
11640}
11641
11642/**
11643 * xmlRecoverFile:
11644 * @filename: the filename
11645 *
11646 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11647 * compressed document is provided by default if found at compile-time.
11648 * In the case the document is not Well Formed, a tree is built anyway
11649 *
11650 * Returns the resulting document tree
11651 */
11652
11653xmlDocPtr
11654xmlRecoverFile(const char *filename) {
11655 return(xmlSAXParseFile(NULL, filename, 1));
11656}
11657
11658
11659/**
11660 * xmlSetupParserForBuffer:
11661 * @ctxt: an XML parser context
11662 * @buffer: a xmlChar * buffer
11663 * @filename: a file name
11664 *
11665 * Setup the parser context to parse a new buffer; Clears any prior
11666 * contents from the parser context. The buffer parameter must not be
11667 * NULL, but the filename parameter can be
11668 */
11669void
11670xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11671 const char* filename)
11672{
11673 xmlParserInputPtr input;
11674
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011675 if ((ctxt == NULL) || (buffer == NULL))
11676 return;
11677
Owen Taylor3473f882001-02-23 17:55:21 +000011678 input = xmlNewInputStream(ctxt);
11679 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011680 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011681 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011682 return;
11683 }
11684
11685 xmlClearParserCtxt(ctxt);
11686 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011687 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011688 input->base = buffer;
11689 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011690 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011691 inputPush(ctxt, input);
11692}
11693
11694/**
11695 * xmlSAXUserParseFile:
11696 * @sax: a SAX handler
11697 * @user_data: The user data returned on SAX callbacks
11698 * @filename: a file name
11699 *
11700 * parse an XML file and call the given SAX handler routines.
11701 * Automatic support for ZLIB/Compress compressed document is provided
11702 *
11703 * Returns 0 in case of success or a error number otherwise
11704 */
11705int
11706xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11707 const char *filename) {
11708 int ret = 0;
11709 xmlParserCtxtPtr ctxt;
11710
11711 ctxt = xmlCreateFileParserCtxt(filename);
11712 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011713#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011714 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011715#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011716 xmlFree(ctxt->sax);
11717 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011718 xmlDetectSAX2(ctxt);
11719
Owen Taylor3473f882001-02-23 17:55:21 +000011720 if (user_data != NULL)
11721 ctxt->userData = user_data;
11722
11723 xmlParseDocument(ctxt);
11724
11725 if (ctxt->wellFormed)
11726 ret = 0;
11727 else {
11728 if (ctxt->errNo != 0)
11729 ret = ctxt->errNo;
11730 else
11731 ret = -1;
11732 }
11733 if (sax != NULL)
11734 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011735 if (ctxt->myDoc != NULL) {
11736 xmlFreeDoc(ctxt->myDoc);
11737 ctxt->myDoc = NULL;
11738 }
Owen Taylor3473f882001-02-23 17:55:21 +000011739 xmlFreeParserCtxt(ctxt);
11740
11741 return ret;
11742}
Daniel Veillard81273902003-09-30 00:43:48 +000011743#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011744
11745/************************************************************************
11746 * *
11747 * Front ends when parsing from memory *
11748 * *
11749 ************************************************************************/
11750
11751/**
11752 * xmlCreateMemoryParserCtxt:
11753 * @buffer: a pointer to a char array
11754 * @size: the size of the array
11755 *
11756 * Create a parser context for an XML in-memory document.
11757 *
11758 * Returns the new parser context or NULL
11759 */
11760xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011761xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011762 xmlParserCtxtPtr ctxt;
11763 xmlParserInputPtr input;
11764 xmlParserInputBufferPtr buf;
11765
11766 if (buffer == NULL)
11767 return(NULL);
11768 if (size <= 0)
11769 return(NULL);
11770
11771 ctxt = xmlNewParserCtxt();
11772 if (ctxt == NULL)
11773 return(NULL);
11774
Daniel Veillard53350552003-09-18 13:35:51 +000011775 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011776 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011777 if (buf == NULL) {
11778 xmlFreeParserCtxt(ctxt);
11779 return(NULL);
11780 }
Owen Taylor3473f882001-02-23 17:55:21 +000011781
11782 input = xmlNewInputStream(ctxt);
11783 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011784 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011785 xmlFreeParserCtxt(ctxt);
11786 return(NULL);
11787 }
11788
11789 input->filename = NULL;
11790 input->buf = buf;
11791 input->base = input->buf->buffer->content;
11792 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011793 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011794
11795 inputPush(ctxt, input);
11796 return(ctxt);
11797}
11798
Daniel Veillard81273902003-09-30 00:43:48 +000011799#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011800/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011801 * xmlSAXParseMemoryWithData:
11802 * @sax: the SAX handler block
11803 * @buffer: an pointer to a char array
11804 * @size: the size of the array
11805 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11806 * documents
11807 * @data: the userdata
11808 *
11809 * parse an XML in-memory block and use the given SAX function block
11810 * to handle the parsing callback. If sax is NULL, fallback to the default
11811 * DOM tree building routines.
11812 *
11813 * User data (void *) is stored within the parser context in the
11814 * context's _private member, so it is available nearly everywhere in libxml
11815 *
11816 * Returns the resulting document tree
11817 */
11818
11819xmlDocPtr
11820xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11821 int size, int recovery, void *data) {
11822 xmlDocPtr ret;
11823 xmlParserCtxtPtr ctxt;
11824
11825 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11826 if (ctxt == NULL) return(NULL);
11827 if (sax != NULL) {
11828 if (ctxt->sax != NULL)
11829 xmlFree(ctxt->sax);
11830 ctxt->sax = sax;
11831 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011832 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011833 if (data!=NULL) {
11834 ctxt->_private=data;
11835 }
11836
Daniel Veillardadba5f12003-04-04 16:09:01 +000011837 ctxt->recovery = recovery;
11838
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011839 xmlParseDocument(ctxt);
11840
11841 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11842 else {
11843 ret = NULL;
11844 xmlFreeDoc(ctxt->myDoc);
11845 ctxt->myDoc = NULL;
11846 }
11847 if (sax != NULL)
11848 ctxt->sax = NULL;
11849 xmlFreeParserCtxt(ctxt);
11850
11851 return(ret);
11852}
11853
11854/**
Owen Taylor3473f882001-02-23 17:55:21 +000011855 * xmlSAXParseMemory:
11856 * @sax: the SAX handler block
11857 * @buffer: an pointer to a char array
11858 * @size: the size of the array
11859 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11860 * documents
11861 *
11862 * parse an XML in-memory block and use the given SAX function block
11863 * to handle the parsing callback. If sax is NULL, fallback to the default
11864 * DOM tree building routines.
11865 *
11866 * Returns the resulting document tree
11867 */
11868xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011869xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11870 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011871 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011872}
11873
11874/**
11875 * xmlParseMemory:
11876 * @buffer: an pointer to a char array
11877 * @size: the size of the array
11878 *
11879 * parse an XML in-memory block and build a tree.
11880 *
11881 * Returns the resulting document tree
11882 */
11883
Daniel Veillard50822cb2001-07-26 20:05:51 +000011884xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011885 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11886}
11887
11888/**
11889 * xmlRecoverMemory:
11890 * @buffer: an pointer to a char array
11891 * @size: the size of the array
11892 *
11893 * parse an XML in-memory block and build a tree.
11894 * In the case the document is not Well Formed, a tree is built anyway
11895 *
11896 * Returns the resulting document tree
11897 */
11898
Daniel Veillard50822cb2001-07-26 20:05:51 +000011899xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011900 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11901}
11902
11903/**
11904 * xmlSAXUserParseMemory:
11905 * @sax: a SAX handler
11906 * @user_data: The user data returned on SAX callbacks
11907 * @buffer: an in-memory XML document input
11908 * @size: the length of the XML document in bytes
11909 *
11910 * A better SAX parsing routine.
11911 * parse an XML in-memory buffer and call the given SAX handler routines.
11912 *
11913 * Returns 0 in case of success or a error number otherwise
11914 */
11915int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011916 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011917 int ret = 0;
11918 xmlParserCtxtPtr ctxt;
11919 xmlSAXHandlerPtr oldsax = NULL;
11920
Daniel Veillard9e923512002-08-14 08:48:52 +000011921 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011922 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11923 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011924 oldsax = ctxt->sax;
11925 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011926 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011927 if (user_data != NULL)
11928 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011929
11930 xmlParseDocument(ctxt);
11931
11932 if (ctxt->wellFormed)
11933 ret = 0;
11934 else {
11935 if (ctxt->errNo != 0)
11936 ret = ctxt->errNo;
11937 else
11938 ret = -1;
11939 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011940 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000011941 if (ctxt->myDoc != NULL) {
11942 xmlFreeDoc(ctxt->myDoc);
11943 ctxt->myDoc = NULL;
11944 }
Owen Taylor3473f882001-02-23 17:55:21 +000011945 xmlFreeParserCtxt(ctxt);
11946
11947 return ret;
11948}
Daniel Veillard81273902003-09-30 00:43:48 +000011949#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011950
11951/**
11952 * xmlCreateDocParserCtxt:
11953 * @cur: a pointer to an array of xmlChar
11954 *
11955 * Creates a parser context for an XML in-memory document.
11956 *
11957 * Returns the new parser context or NULL
11958 */
11959xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011960xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011961 int len;
11962
11963 if (cur == NULL)
11964 return(NULL);
11965 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011966 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011967}
11968
Daniel Veillard81273902003-09-30 00:43:48 +000011969#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011970/**
11971 * xmlSAXParseDoc:
11972 * @sax: the SAX handler block
11973 * @cur: a pointer to an array of xmlChar
11974 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11975 * documents
11976 *
11977 * parse an XML in-memory document and build a tree.
11978 * It use the given SAX function block to handle the parsing callback.
11979 * If sax is NULL, fallback to the default DOM tree building routines.
11980 *
11981 * Returns the resulting document tree
11982 */
11983
11984xmlDocPtr
11985xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11986 xmlDocPtr ret;
11987 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000011988 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011989
Daniel Veillard38936062004-11-04 17:45:11 +000011990 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011991
11992
11993 ctxt = xmlCreateDocParserCtxt(cur);
11994 if (ctxt == NULL) return(NULL);
11995 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000011996 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011997 ctxt->sax = sax;
11998 ctxt->userData = NULL;
11999 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012000 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012001
12002 xmlParseDocument(ctxt);
12003 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12004 else {
12005 ret = NULL;
12006 xmlFreeDoc(ctxt->myDoc);
12007 ctxt->myDoc = NULL;
12008 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012009 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012010 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012011 xmlFreeParserCtxt(ctxt);
12012
12013 return(ret);
12014}
12015
12016/**
12017 * xmlParseDoc:
12018 * @cur: a pointer to an array of xmlChar
12019 *
12020 * parse an XML in-memory document and build a tree.
12021 *
12022 * Returns the resulting document tree
12023 */
12024
12025xmlDocPtr
12026xmlParseDoc(xmlChar *cur) {
12027 return(xmlSAXParseDoc(NULL, cur, 0));
12028}
Daniel Veillard81273902003-09-30 00:43:48 +000012029#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012030
Daniel Veillard81273902003-09-30 00:43:48 +000012031#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012032/************************************************************************
12033 * *
12034 * Specific function to keep track of entities references *
12035 * and used by the XSLT debugger *
12036 * *
12037 ************************************************************************/
12038
12039static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12040
12041/**
12042 * xmlAddEntityReference:
12043 * @ent : A valid entity
12044 * @firstNode : A valid first node for children of entity
12045 * @lastNode : A valid last node of children entity
12046 *
12047 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12048 */
12049static void
12050xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12051 xmlNodePtr lastNode)
12052{
12053 if (xmlEntityRefFunc != NULL) {
12054 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12055 }
12056}
12057
12058
12059/**
12060 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012061 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012062 *
12063 * Set the function to call call back when a xml reference has been made
12064 */
12065void
12066xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12067{
12068 xmlEntityRefFunc = func;
12069}
Daniel Veillard81273902003-09-30 00:43:48 +000012070#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012071
12072/************************************************************************
12073 * *
12074 * Miscellaneous *
12075 * *
12076 ************************************************************************/
12077
12078#ifdef LIBXML_XPATH_ENABLED
12079#include <libxml/xpath.h>
12080#endif
12081
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012082extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012083static int xmlParserInitialized = 0;
12084
12085/**
12086 * xmlInitParser:
12087 *
12088 * Initialization function for the XML parser.
12089 * This is not reentrant. Call once before processing in case of
12090 * use in multithreaded programs.
12091 */
12092
12093void
12094xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012095 if (xmlParserInitialized != 0)
12096 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012097
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012098 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12099 (xmlGenericError == NULL))
12100 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012101 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012102 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012103 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012104 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012105 xmlDefaultSAXHandlerInit();
12106 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012107#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012108 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012109#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012110#ifdef LIBXML_HTML_ENABLED
12111 htmlInitAutoClose();
12112 htmlDefaultSAXHandlerInit();
12113#endif
12114#ifdef LIBXML_XPATH_ENABLED
12115 xmlXPathInit();
12116#endif
12117 xmlParserInitialized = 1;
12118}
12119
12120/**
12121 * xmlCleanupParser:
12122 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012123 * Cleanup function for the XML library. It tries to reclaim all
12124 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012125 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012126 * function should not prevent reusing the library but one should
12127 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012128 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012129 */
12130
12131void
12132xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012133 if (!xmlParserInitialized)
12134 return;
12135
Owen Taylor3473f882001-02-23 17:55:21 +000012136 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012137#ifdef LIBXML_CATALOG_ENABLED
12138 xmlCatalogCleanup();
12139#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012140 xmlCleanupInputCallbacks();
12141#ifdef LIBXML_OUTPUT_ENABLED
12142 xmlCleanupOutputCallbacks();
12143#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012144#ifdef LIBXML_SCHEMAS_ENABLED
12145 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012146 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012147#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012148 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012149 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012150 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012151 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012152 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012153}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012154
12155/************************************************************************
12156 * *
12157 * New set (2.6.0) of simpler and more flexible APIs *
12158 * *
12159 ************************************************************************/
12160
12161/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012162 * DICT_FREE:
12163 * @str: a string
12164 *
12165 * Free a string if it is not owned by the "dict" dictionnary in the
12166 * current scope
12167 */
12168#define DICT_FREE(str) \
12169 if ((str) && ((!dict) || \
12170 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12171 xmlFree((char *)(str));
12172
12173/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012174 * xmlCtxtReset:
12175 * @ctxt: an XML parser context
12176 *
12177 * Reset a parser context
12178 */
12179void
12180xmlCtxtReset(xmlParserCtxtPtr ctxt)
12181{
12182 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012183 xmlDictPtr dict;
12184
12185 if (ctxt == NULL)
12186 return;
12187
12188 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012189
12190 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12191 xmlFreeInputStream(input);
12192 }
12193 ctxt->inputNr = 0;
12194 ctxt->input = NULL;
12195
12196 ctxt->spaceNr = 0;
12197 ctxt->spaceTab[0] = -1;
12198 ctxt->space = &ctxt->spaceTab[0];
12199
12200
12201 ctxt->nodeNr = 0;
12202 ctxt->node = NULL;
12203
12204 ctxt->nameNr = 0;
12205 ctxt->name = NULL;
12206
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012207 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012208 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012209 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012210 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012211 DICT_FREE(ctxt->directory);
12212 ctxt->directory = NULL;
12213 DICT_FREE(ctxt->extSubURI);
12214 ctxt->extSubURI = NULL;
12215 DICT_FREE(ctxt->extSubSystem);
12216 ctxt->extSubSystem = NULL;
12217 if (ctxt->myDoc != NULL)
12218 xmlFreeDoc(ctxt->myDoc);
12219 ctxt->myDoc = NULL;
12220
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012221 ctxt->standalone = -1;
12222 ctxt->hasExternalSubset = 0;
12223 ctxt->hasPErefs = 0;
12224 ctxt->html = 0;
12225 ctxt->external = 0;
12226 ctxt->instate = XML_PARSER_START;
12227 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012228
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012229 ctxt->wellFormed = 1;
12230 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012231 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012232 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012233#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012234 ctxt->vctxt.userData = ctxt;
12235 ctxt->vctxt.error = xmlParserValidityError;
12236 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012237#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012238 ctxt->record_info = 0;
12239 ctxt->nbChars = 0;
12240 ctxt->checkIndex = 0;
12241 ctxt->inSubset = 0;
12242 ctxt->errNo = XML_ERR_OK;
12243 ctxt->depth = 0;
12244 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12245 ctxt->catalogs = NULL;
12246 xmlInitNodeInfoSeq(&ctxt->node_seq);
12247
12248 if (ctxt->attsDefault != NULL) {
12249 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12250 ctxt->attsDefault = NULL;
12251 }
12252 if (ctxt->attsSpecial != NULL) {
12253 xmlHashFree(ctxt->attsSpecial, NULL);
12254 ctxt->attsSpecial = NULL;
12255 }
12256
Daniel Veillard4432df22003-09-28 18:58:27 +000012257#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258 if (ctxt->catalogs != NULL)
12259 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012260#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012261 if (ctxt->lastError.code != XML_ERR_OK)
12262 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012263}
12264
12265/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012266 * xmlCtxtResetPush:
12267 * @ctxt: an XML parser context
12268 * @chunk: a pointer to an array of chars
12269 * @size: number of chars in the array
12270 * @filename: an optional file name or URI
12271 * @encoding: the document encoding, or NULL
12272 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012273 * Reset a push parser context
12274 *
12275 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012276 */
12277int
12278xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12279 int size, const char *filename, const char *encoding)
12280{
12281 xmlParserInputPtr inputStream;
12282 xmlParserInputBufferPtr buf;
12283 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12284
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012285 if (ctxt == NULL)
12286 return(1);
12287
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012288 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12289 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12290
12291 buf = xmlAllocParserInputBuffer(enc);
12292 if (buf == NULL)
12293 return(1);
12294
12295 if (ctxt == NULL) {
12296 xmlFreeParserInputBuffer(buf);
12297 return(1);
12298 }
12299
12300 xmlCtxtReset(ctxt);
12301
12302 if (ctxt->pushTab == NULL) {
12303 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12304 sizeof(xmlChar *));
12305 if (ctxt->pushTab == NULL) {
12306 xmlErrMemory(ctxt, NULL);
12307 xmlFreeParserInputBuffer(buf);
12308 return(1);
12309 }
12310 }
12311
12312 if (filename == NULL) {
12313 ctxt->directory = NULL;
12314 } else {
12315 ctxt->directory = xmlParserGetDirectory(filename);
12316 }
12317
12318 inputStream = xmlNewInputStream(ctxt);
12319 if (inputStream == NULL) {
12320 xmlFreeParserInputBuffer(buf);
12321 return(1);
12322 }
12323
12324 if (filename == NULL)
12325 inputStream->filename = NULL;
12326 else
12327 inputStream->filename = (char *)
12328 xmlCanonicPath((const xmlChar *) filename);
12329 inputStream->buf = buf;
12330 inputStream->base = inputStream->buf->buffer->content;
12331 inputStream->cur = inputStream->buf->buffer->content;
12332 inputStream->end =
12333 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12334
12335 inputPush(ctxt, inputStream);
12336
12337 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12338 (ctxt->input->buf != NULL)) {
12339 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12340 int cur = ctxt->input->cur - ctxt->input->base;
12341
12342 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12343
12344 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12345 ctxt->input->cur = ctxt->input->base + cur;
12346 ctxt->input->end =
12347 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12348 use];
12349#ifdef DEBUG_PUSH
12350 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12351#endif
12352 }
12353
12354 if (encoding != NULL) {
12355 xmlCharEncodingHandlerPtr hdlr;
12356
12357 hdlr = xmlFindCharEncodingHandler(encoding);
12358 if (hdlr != NULL) {
12359 xmlSwitchToEncoding(ctxt, hdlr);
12360 } else {
12361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12362 "Unsupported encoding %s\n", BAD_CAST encoding);
12363 }
12364 } else if (enc != XML_CHAR_ENCODING_NONE) {
12365 xmlSwitchEncoding(ctxt, enc);
12366 }
12367
12368 return(0);
12369}
12370
12371/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012372 * xmlCtxtUseOptions:
12373 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012374 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012375 *
12376 * Applies the options to the parser context
12377 *
12378 * Returns 0 in case of success, the set of unknown or unimplemented options
12379 * in case of error.
12380 */
12381int
12382xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12383{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012384 if (ctxt == NULL)
12385 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012386 if (options & XML_PARSE_RECOVER) {
12387 ctxt->recovery = 1;
12388 options -= XML_PARSE_RECOVER;
12389 } else
12390 ctxt->recovery = 0;
12391 if (options & XML_PARSE_DTDLOAD) {
12392 ctxt->loadsubset = XML_DETECT_IDS;
12393 options -= XML_PARSE_DTDLOAD;
12394 } else
12395 ctxt->loadsubset = 0;
12396 if (options & XML_PARSE_DTDATTR) {
12397 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12398 options -= XML_PARSE_DTDATTR;
12399 }
12400 if (options & XML_PARSE_NOENT) {
12401 ctxt->replaceEntities = 1;
12402 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12403 options -= XML_PARSE_NOENT;
12404 } else
12405 ctxt->replaceEntities = 0;
12406 if (options & XML_PARSE_NOWARNING) {
12407 ctxt->sax->warning = NULL;
12408 options -= XML_PARSE_NOWARNING;
12409 }
12410 if (options & XML_PARSE_NOERROR) {
12411 ctxt->sax->error = NULL;
12412 ctxt->sax->fatalError = NULL;
12413 options -= XML_PARSE_NOERROR;
12414 }
12415 if (options & XML_PARSE_PEDANTIC) {
12416 ctxt->pedantic = 1;
12417 options -= XML_PARSE_PEDANTIC;
12418 } else
12419 ctxt->pedantic = 0;
12420 if (options & XML_PARSE_NOBLANKS) {
12421 ctxt->keepBlanks = 0;
12422 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12423 options -= XML_PARSE_NOBLANKS;
12424 } else
12425 ctxt->keepBlanks = 1;
12426 if (options & XML_PARSE_DTDVALID) {
12427 ctxt->validate = 1;
12428 if (options & XML_PARSE_NOWARNING)
12429 ctxt->vctxt.warning = NULL;
12430 if (options & XML_PARSE_NOERROR)
12431 ctxt->vctxt.error = NULL;
12432 options -= XML_PARSE_DTDVALID;
12433 } else
12434 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012435#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012436 if (options & XML_PARSE_SAX1) {
12437 ctxt->sax->startElement = xmlSAX2StartElement;
12438 ctxt->sax->endElement = xmlSAX2EndElement;
12439 ctxt->sax->startElementNs = NULL;
12440 ctxt->sax->endElementNs = NULL;
12441 ctxt->sax->initialized = 1;
12442 options -= XML_PARSE_SAX1;
12443 }
Daniel Veillard81273902003-09-30 00:43:48 +000012444#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012445 if (options & XML_PARSE_NODICT) {
12446 ctxt->dictNames = 0;
12447 options -= XML_PARSE_NODICT;
12448 } else {
12449 ctxt->dictNames = 1;
12450 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012451 if (options & XML_PARSE_NOCDATA) {
12452 ctxt->sax->cdataBlock = NULL;
12453 options -= XML_PARSE_NOCDATA;
12454 }
12455 if (options & XML_PARSE_NSCLEAN) {
12456 ctxt->options |= XML_PARSE_NSCLEAN;
12457 options -= XML_PARSE_NSCLEAN;
12458 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012459 if (options & XML_PARSE_NONET) {
12460 ctxt->options |= XML_PARSE_NONET;
12461 options -= XML_PARSE_NONET;
12462 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012463 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012464 return (options);
12465}
12466
12467/**
12468 * xmlDoRead:
12469 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012470 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012471 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012472 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012473 * @reuse: keep the context for reuse
12474 *
12475 * Common front-end for the xmlRead functions
12476 *
12477 * Returns the resulting document tree or NULL
12478 */
12479static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012480xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12481 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012482{
12483 xmlDocPtr ret;
12484
12485 xmlCtxtUseOptions(ctxt, options);
12486 if (encoding != NULL) {
12487 xmlCharEncodingHandlerPtr hdlr;
12488
12489 hdlr = xmlFindCharEncodingHandler(encoding);
12490 if (hdlr != NULL)
12491 xmlSwitchToEncoding(ctxt, hdlr);
12492 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012493 if ((URL != NULL) && (ctxt->input != NULL) &&
12494 (ctxt->input->filename == NULL))
12495 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012496 xmlParseDocument(ctxt);
12497 if ((ctxt->wellFormed) || ctxt->recovery)
12498 ret = ctxt->myDoc;
12499 else {
12500 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012501 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012502 xmlFreeDoc(ctxt->myDoc);
12503 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012504 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012505 ctxt->myDoc = NULL;
12506 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012507 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012508 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012509
12510 return (ret);
12511}
12512
12513/**
12514 * xmlReadDoc:
12515 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012519 *
12520 * parse an XML in-memory document and build a tree.
12521 *
12522 * Returns the resulting document tree
12523 */
12524xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012525xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012526{
12527 xmlParserCtxtPtr ctxt;
12528
12529 if (cur == NULL)
12530 return (NULL);
12531
12532 ctxt = xmlCreateDocParserCtxt(cur);
12533 if (ctxt == NULL)
12534 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012535 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012536}
12537
12538/**
12539 * xmlReadFile:
12540 * @filename: a file or URL
12541 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012542 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012543 *
12544 * parse an XML file from the filesystem or the network.
12545 *
12546 * Returns the resulting document tree
12547 */
12548xmlDocPtr
12549xmlReadFile(const char *filename, const char *encoding, int options)
12550{
12551 xmlParserCtxtPtr ctxt;
12552
Daniel Veillard61b93382003-11-03 14:28:31 +000012553 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012554 if (ctxt == NULL)
12555 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012556 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557}
12558
12559/**
12560 * xmlReadMemory:
12561 * @buffer: a pointer to a char array
12562 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012563 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012564 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012565 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012566 *
12567 * parse an XML in-memory document and build a tree.
12568 *
12569 * Returns the resulting document tree
12570 */
12571xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012572xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012573{
12574 xmlParserCtxtPtr ctxt;
12575
12576 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12577 if (ctxt == NULL)
12578 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012579 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012580}
12581
12582/**
12583 * xmlReadFd:
12584 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012585 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012586 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012587 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012588 *
12589 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012590 * NOTE that the file descriptor will not be closed when the
12591 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012592 *
12593 * Returns the resulting document tree
12594 */
12595xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012596xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012597{
12598 xmlParserCtxtPtr ctxt;
12599 xmlParserInputBufferPtr input;
12600 xmlParserInputPtr stream;
12601
12602 if (fd < 0)
12603 return (NULL);
12604
12605 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12606 if (input == NULL)
12607 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012608 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012609 ctxt = xmlNewParserCtxt();
12610 if (ctxt == NULL) {
12611 xmlFreeParserInputBuffer(input);
12612 return (NULL);
12613 }
12614 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12615 if (stream == NULL) {
12616 xmlFreeParserInputBuffer(input);
12617 xmlFreeParserCtxt(ctxt);
12618 return (NULL);
12619 }
12620 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012621 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012622}
12623
12624/**
12625 * xmlReadIO:
12626 * @ioread: an I/O read function
12627 * @ioclose: an I/O close function
12628 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012629 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012630 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012631 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012632 *
12633 * parse an XML document from I/O functions and source and build a tree.
12634 *
12635 * Returns the resulting document tree
12636 */
12637xmlDocPtr
12638xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012639 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012640{
12641 xmlParserCtxtPtr ctxt;
12642 xmlParserInputBufferPtr input;
12643 xmlParserInputPtr stream;
12644
12645 if (ioread == NULL)
12646 return (NULL);
12647
12648 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12649 XML_CHAR_ENCODING_NONE);
12650 if (input == NULL)
12651 return (NULL);
12652 ctxt = xmlNewParserCtxt();
12653 if (ctxt == NULL) {
12654 xmlFreeParserInputBuffer(input);
12655 return (NULL);
12656 }
12657 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12658 if (stream == NULL) {
12659 xmlFreeParserInputBuffer(input);
12660 xmlFreeParserCtxt(ctxt);
12661 return (NULL);
12662 }
12663 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012664 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012665}
12666
12667/**
12668 * xmlCtxtReadDoc:
12669 * @ctxt: an XML parser context
12670 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012671 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012672 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012673 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012674 *
12675 * parse an XML in-memory document and build a tree.
12676 * This reuses the existing @ctxt parser context
12677 *
12678 * Returns the resulting document tree
12679 */
12680xmlDocPtr
12681xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012682 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012683{
12684 xmlParserInputPtr stream;
12685
12686 if (cur == NULL)
12687 return (NULL);
12688 if (ctxt == NULL)
12689 return (NULL);
12690
12691 xmlCtxtReset(ctxt);
12692
12693 stream = xmlNewStringInputStream(ctxt, cur);
12694 if (stream == NULL) {
12695 return (NULL);
12696 }
12697 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012698 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012699}
12700
12701/**
12702 * xmlCtxtReadFile:
12703 * @ctxt: an XML parser context
12704 * @filename: a file or URL
12705 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012706 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012707 *
12708 * parse an XML file from the filesystem or the network.
12709 * This reuses the existing @ctxt parser context
12710 *
12711 * Returns the resulting document tree
12712 */
12713xmlDocPtr
12714xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12715 const char *encoding, int options)
12716{
12717 xmlParserInputPtr stream;
12718
12719 if (filename == NULL)
12720 return (NULL);
12721 if (ctxt == NULL)
12722 return (NULL);
12723
12724 xmlCtxtReset(ctxt);
12725
12726 stream = xmlNewInputFromFile(ctxt, filename);
12727 if (stream == NULL) {
12728 return (NULL);
12729 }
12730 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012731 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012732}
12733
12734/**
12735 * xmlCtxtReadMemory:
12736 * @ctxt: an XML parser context
12737 * @buffer: a pointer to a char array
12738 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012739 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012740 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012741 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012742 *
12743 * parse an XML in-memory document and build a tree.
12744 * This reuses the existing @ctxt parser context
12745 *
12746 * Returns the resulting document tree
12747 */
12748xmlDocPtr
12749xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012750 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012751{
12752 xmlParserInputBufferPtr input;
12753 xmlParserInputPtr stream;
12754
12755 if (ctxt == NULL)
12756 return (NULL);
12757 if (buffer == NULL)
12758 return (NULL);
12759
12760 xmlCtxtReset(ctxt);
12761
12762 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12763 if (input == NULL) {
12764 return(NULL);
12765 }
12766
12767 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12768 if (stream == NULL) {
12769 xmlFreeParserInputBuffer(input);
12770 return(NULL);
12771 }
12772
12773 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012774 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012775}
12776
12777/**
12778 * xmlCtxtReadFd:
12779 * @ctxt: an XML parser context
12780 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012781 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012782 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012783 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012784 *
12785 * parse an XML from a file descriptor and build a tree.
12786 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012787 * NOTE that the file descriptor will not be closed when the
12788 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012789 *
12790 * Returns the resulting document tree
12791 */
12792xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012793xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12794 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012795{
12796 xmlParserInputBufferPtr input;
12797 xmlParserInputPtr stream;
12798
12799 if (fd < 0)
12800 return (NULL);
12801 if (ctxt == NULL)
12802 return (NULL);
12803
12804 xmlCtxtReset(ctxt);
12805
12806
12807 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12808 if (input == NULL)
12809 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012810 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012811 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12812 if (stream == NULL) {
12813 xmlFreeParserInputBuffer(input);
12814 return (NULL);
12815 }
12816 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012817 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012818}
12819
12820/**
12821 * xmlCtxtReadIO:
12822 * @ctxt: an XML parser context
12823 * @ioread: an I/O read function
12824 * @ioclose: an I/O close function
12825 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012826 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012827 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012828 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012829 *
12830 * parse an XML document from I/O functions and source and build a tree.
12831 * This reuses the existing @ctxt parser context
12832 *
12833 * Returns the resulting document tree
12834 */
12835xmlDocPtr
12836xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12837 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012838 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012839 const char *encoding, int options)
12840{
12841 xmlParserInputBufferPtr input;
12842 xmlParserInputPtr stream;
12843
12844 if (ioread == NULL)
12845 return (NULL);
12846 if (ctxt == NULL)
12847 return (NULL);
12848
12849 xmlCtxtReset(ctxt);
12850
12851 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12852 XML_CHAR_ENCODING_NONE);
12853 if (input == NULL)
12854 return (NULL);
12855 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12856 if (stream == NULL) {
12857 xmlFreeParserInputBuffer(input);
12858 return (NULL);
12859 }
12860 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012861 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012862}