blob: 7a51c051d9f6daed100f2815f26b2d2422570c7a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
609 (ctxt->str_xml_ns == NULL)) {
610 xmlErrMemory(ctxt, NULL);
611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000612}
613
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614typedef struct _xmlDefAttrs xmlDefAttrs;
615typedef xmlDefAttrs *xmlDefAttrsPtr;
616struct _xmlDefAttrs {
617 int nbAttrs; /* number of defaulted attributes on that element */
618 int maxAttrs; /* the size of the array */
619 const xmlChar *values[4]; /* array of localname/prefix/values */
620};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000621
622/**
623 * xmlAddDefAttrs:
624 * @ctxt: an XML parser context
625 * @fullname: the element fullname
626 * @fullattr: the attribute fullname
627 * @value: the attribute value
628 *
629 * Add a defaulted attribute for an element
630 */
631static void
632xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
633 const xmlChar *fullname,
634 const xmlChar *fullattr,
635 const xmlChar *value) {
636 xmlDefAttrsPtr defaults;
637 int len;
638 const xmlChar *name;
639 const xmlChar *prefix;
640
641 if (ctxt->attsDefault == NULL) {
642 ctxt->attsDefault = xmlHashCreate(10);
643 if (ctxt->attsDefault == NULL)
644 goto mem_error;
645 }
646
647 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000648 * split the element name into prefix:localname , the string found
649 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000650 */
651 name = xmlSplitQName3(fullname, &len);
652 if (name == NULL) {
653 name = xmlDictLookup(ctxt->dict, fullname, -1);
654 prefix = NULL;
655 } else {
656 name = xmlDictLookup(ctxt->dict, name, -1);
657 prefix = xmlDictLookup(ctxt->dict, fullname, len);
658 }
659
660 /*
661 * make sure there is some storage
662 */
663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
664 if (defaults == NULL) {
665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000666 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000667 if (defaults == NULL)
668 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000670 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
672 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 xmlDefAttrsPtr temp;
674
675 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000676 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000677 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 defaults->maxAttrs *= 2;
681 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
682 }
683
684 /*
685 * plit the element name into prefix:localname , the string found
686 * are within the DTD and hen not associated to namespace names.
687 */
688 name = xmlSplitQName3(fullattr, &len);
689 if (name == NULL) {
690 name = xmlDictLookup(ctxt->dict, fullattr, -1);
691 prefix = NULL;
692 } else {
693 name = xmlDictLookup(ctxt->dict, name, -1);
694 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
695 }
696
697 defaults->values[4 * defaults->nbAttrs] = name;
698 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
699 /* intern the string and precompute the end */
700 len = xmlStrlen(value);
701 value = xmlDictLookup(ctxt->dict, value, len);
702 defaults->values[4 * defaults->nbAttrs + 2] = value;
703 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
704 defaults->nbAttrs++;
705
706 return;
707
708mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000709 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000710 return;
711}
712
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000713/**
714 * xmlAddSpecialAttr:
715 * @ctxt: an XML parser context
716 * @fullname: the element fullname
717 * @fullattr: the attribute fullname
718 * @type: the attribute type
719 *
720 * Register that this attribute is not CDATA
721 */
722static void
723xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
724 const xmlChar *fullname,
725 const xmlChar *fullattr,
726 int type)
727{
728 if (ctxt->attsSpecial == NULL) {
729 ctxt->attsSpecial = xmlHashCreate(10);
730 if (ctxt->attsSpecial == NULL)
731 goto mem_error;
732 }
733
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000734 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
735 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000736 return;
737
738mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000739 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000740 return;
741}
742
Daniel Veillard4432df22003-09-28 18:58:27 +0000743/**
744 * xmlCheckLanguageID:
745 * @lang: pointer to the string value
746 *
747 * Checks that the value conforms to the LanguageID production:
748 *
749 * NOTE: this is somewhat deprecated, those productions were removed from
750 * the XML Second edition.
751 *
752 * [33] LanguageID ::= Langcode ('-' Subcode)*
753 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
754 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
755 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
756 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
757 * [38] Subcode ::= ([a-z] | [A-Z])+
758 *
759 * Returns 1 if correct 0 otherwise
760 **/
761int
762xmlCheckLanguageID(const xmlChar * lang)
763{
764 const xmlChar *cur = lang;
765
766 if (cur == NULL)
767 return (0);
768 if (((cur[0] == 'i') && (cur[1] == '-')) ||
769 ((cur[0] == 'I') && (cur[1] == '-'))) {
770 /*
771 * IANA code
772 */
773 cur += 2;
774 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
775 ((cur[0] >= 'a') && (cur[0] <= 'z')))
776 cur++;
777 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
778 ((cur[0] == 'X') && (cur[1] == '-'))) {
779 /*
780 * User code
781 */
782 cur += 2;
783 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
784 ((cur[0] >= 'a') && (cur[0] <= 'z')))
785 cur++;
786 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
787 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
788 /*
789 * ISO639
790 */
791 cur++;
792 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
793 ((cur[0] >= 'a') && (cur[0] <= 'z')))
794 cur++;
795 else
796 return (0);
797 } else
798 return (0);
799 while (cur[0] != 0) { /* non input consuming */
800 if (cur[0] != '-')
801 return (0);
802 cur++;
803 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
804 ((cur[0] >= 'a') && (cur[0] <= 'z')))
805 cur++;
806 else
807 return (0);
808 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
809 ((cur[0] >= 'a') && (cur[0] <= 'z')))
810 cur++;
811 }
812 return (1);
813}
814
Owen Taylor3473f882001-02-23 17:55:21 +0000815/************************************************************************
816 * *
817 * Parser stacks related functions and macros *
818 * *
819 ************************************************************************/
820
821xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
822 const xmlChar ** str);
823
Daniel Veillard0fb18932003-09-07 09:14:37 +0000824#ifdef SAX2
825/**
826 * nsPush:
827 * @ctxt: an XML parser context
828 * @prefix: the namespace prefix or NULL
829 * @URL: the namespace name
830 *
831 * Pushes a new parser namespace on top of the ns stack
832 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000833 * Returns -1 in case of error, -2 if the namespace should be discarded
834 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000835 */
836static int
837nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
838{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000839 if (ctxt->options & XML_PARSE_NSCLEAN) {
840 int i;
841 for (i = 0;i < ctxt->nsNr;i += 2) {
842 if (ctxt->nsTab[i] == prefix) {
843 /* in scope */
844 if (ctxt->nsTab[i + 1] == URL)
845 return(-2);
846 /* out of scope keep it */
847 break;
848 }
849 }
850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
852 ctxt->nsMax = 10;
853 ctxt->nsNr = 0;
854 ctxt->nsTab = (const xmlChar **)
855 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
856 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000857 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax = 0;
859 return (-1);
860 }
861 } else if (ctxt->nsNr >= ctxt->nsMax) {
862 ctxt->nsMax *= 2;
863 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000864 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
866 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000867 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax /= 2;
869 return (-1);
870 }
871 }
872 ctxt->nsTab[ctxt->nsNr++] = prefix;
873 ctxt->nsTab[ctxt->nsNr++] = URL;
874 return (ctxt->nsNr);
875}
876/**
877 * nsPop:
878 * @ctxt: an XML parser context
879 * @nr: the number to pop
880 *
881 * Pops the top @nr parser prefix/namespace from the ns stack
882 *
883 * Returns the number of namespaces removed
884 */
885static int
886nsPop(xmlParserCtxtPtr ctxt, int nr)
887{
888 int i;
889
890 if (ctxt->nsTab == NULL) return(0);
891 if (ctxt->nsNr < nr) {
892 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
893 nr = ctxt->nsNr;
894 }
895 if (ctxt->nsNr <= 0)
896 return (0);
897
898 for (i = 0;i < nr;i++) {
899 ctxt->nsNr--;
900 ctxt->nsTab[ctxt->nsNr] = NULL;
901 }
902 return(nr);
903}
904#endif
905
906static int
907xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
908 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 int maxatts;
911
912 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000913 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 atts = (const xmlChar **)
915 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
919 if (attallocs == NULL) goto mem_error;
920 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000921 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000922 } else if (nr + 5 > ctxt->maxatts) {
923 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
925 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
929 (maxatts / 5) * sizeof(int));
930 if (attallocs == NULL) goto mem_error;
931 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000932 ctxt->maxatts = maxatts;
933 }
934 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000935mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000936 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000938}
939
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000940/**
941 * inputPush:
942 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000944 *
945 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 *
947 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000948 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000949int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000950inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
951{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000952 if ((ctxt == NULL) || (value == NULL))
953 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000954 if (ctxt->inputNr >= ctxt->inputMax) {
955 ctxt->inputMax *= 2;
956 ctxt->inputTab =
957 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
958 ctxt->inputMax *
959 sizeof(ctxt->inputTab[0]));
960 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000961 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962 return (0);
963 }
964 }
965 ctxt->inputTab[ctxt->inputNr] = value;
966 ctxt->input = value;
967 return (ctxt->inputNr++);
968}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000969/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000970 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971 * @ctxt: an XML parser context
972 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000977xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978inputPop(xmlParserCtxtPtr ctxt)
979{
980 xmlParserInputPtr ret;
981
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000982 if (ctxt == NULL)
983 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000984 if (ctxt->inputNr <= 0)
985 return (0);
986 ctxt->inputNr--;
987 if (ctxt->inputNr > 0)
988 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
989 else
990 ctxt->input = NULL;
991 ret = ctxt->inputTab[ctxt->inputNr];
992 ctxt->inputTab[ctxt->inputNr] = 0;
993 return (ret);
994}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000995/**
996 * nodePush:
997 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000998 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000999 *
1000 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 *
1002 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001003 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001004int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001005nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1006{
1007 if (ctxt->nodeNr >= ctxt->nodeMax) {
1008 ctxt->nodeMax *= 2;
1009 ctxt->nodeTab =
1010 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1011 ctxt->nodeMax *
1012 sizeof(ctxt->nodeTab[0]));
1013 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001014 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 return (0);
1016 }
1017 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001018 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001019 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001020 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1021 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001022 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001023 return(0);
1024 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001025 ctxt->nodeTab[ctxt->nodeNr] = value;
1026 ctxt->node = value;
1027 return (ctxt->nodeNr++);
1028}
1029/**
1030 * nodePop:
1031 * @ctxt: an XML parser context
1032 *
1033 * Pops the top element node from the node stack
1034 *
1035 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001036 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001037xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001038nodePop(xmlParserCtxtPtr ctxt)
1039{
1040 xmlNodePtr ret;
1041
1042 if (ctxt->nodeNr <= 0)
1043 return (0);
1044 ctxt->nodeNr--;
1045 if (ctxt->nodeNr > 0)
1046 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1047 else
1048 ctxt->node = NULL;
1049 ret = ctxt->nodeTab[ctxt->nodeNr];
1050 ctxt->nodeTab[ctxt->nodeNr] = 0;
1051 return (ret);
1052}
Daniel Veillarda2351322004-06-27 12:08:10 +00001053
1054#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001055/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * nameNsPush:
1057 * @ctxt: an XML parser context
1058 * @value: the element name
1059 * @prefix: the element prefix
1060 * @URI: the element namespace name
1061 *
1062 * Pushes a new element name/prefix/URL on top of the name stack
1063 *
1064 * Returns -1 in case of error, the index in the stack otherwise
1065 */
1066static int
1067nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1068 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1069{
1070 if (ctxt->nameNr >= ctxt->nameMax) {
1071 const xmlChar * *tmp;
1072 void **tmp2;
1073 ctxt->nameMax *= 2;
1074 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1075 ctxt->nameMax *
1076 sizeof(ctxt->nameTab[0]));
1077 if (tmp == NULL) {
1078 ctxt->nameMax /= 2;
1079 goto mem_error;
1080 }
1081 ctxt->nameTab = tmp;
1082 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1083 ctxt->nameMax * 3 *
1084 sizeof(ctxt->pushTab[0]));
1085 if (tmp2 == NULL) {
1086 ctxt->nameMax /= 2;
1087 goto mem_error;
1088 }
1089 ctxt->pushTab = tmp2;
1090 }
1091 ctxt->nameTab[ctxt->nameNr] = value;
1092 ctxt->name = value;
1093 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1094 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001095 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 return (ctxt->nameNr++);
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001099 return (-1);
1100}
1101/**
1102 * nameNsPop:
1103 * @ctxt: an XML parser context
1104 *
1105 * Pops the top element/prefix/URI name from the name stack
1106 *
1107 * Returns the name just removed
1108 */
1109static const xmlChar *
1110nameNsPop(xmlParserCtxtPtr ctxt)
1111{
1112 const xmlChar *ret;
1113
1114 if (ctxt->nameNr <= 0)
1115 return (0);
1116 ctxt->nameNr--;
1117 if (ctxt->nameNr > 0)
1118 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1119 else
1120 ctxt->name = NULL;
1121 ret = ctxt->nameTab[ctxt->nameNr];
1122 ctxt->nameTab[ctxt->nameNr] = NULL;
1123 return (ret);
1124}
Daniel Veillarda2351322004-06-27 12:08:10 +00001125#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126
1127/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001128 * namePush:
1129 * @ctxt: an XML parser context
1130 * @value: the element name
1131 *
1132 * Pushes a new element name on top of the name stack
1133 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001135 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001136int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001137namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001138{
1139 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143 ctxt->nameMax *
1144 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 if (tmp == NULL) {
1146 ctxt->nameMax /= 2;
1147 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001148 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150 }
1151 ctxt->nameTab[ctxt->nameNr] = value;
1152 ctxt->name = value;
1153 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001155 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001157}
1158/**
1159 * namePop:
1160 * @ctxt: an XML parser context
1161 *
1162 * Pops the top element name from the name stack
1163 *
1164 * Returns the name just removed
1165 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001166const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001167namePop(xmlParserCtxtPtr ctxt)
1168{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001169 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170
1171 if (ctxt->nameNr <= 0)
1172 return (0);
1173 ctxt->nameNr--;
1174 if (ctxt->nameNr > 0)
1175 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1176 else
1177 ctxt->name = NULL;
1178 ret = ctxt->nameTab[ctxt->nameNr];
1179 ctxt->nameTab[ctxt->nameNr] = 0;
1180 return (ret);
1181}
Owen Taylor3473f882001-02-23 17:55:21 +00001182
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001183static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001184 if (ctxt->spaceNr >= ctxt->spaceMax) {
1185 ctxt->spaceMax *= 2;
1186 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1187 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1188 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001189 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001190 return(0);
1191 }
1192 }
1193 ctxt->spaceTab[ctxt->spaceNr] = val;
1194 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1195 return(ctxt->spaceNr++);
1196}
1197
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001198static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001199 int ret;
1200 if (ctxt->spaceNr <= 0) return(0);
1201 ctxt->spaceNr--;
1202 if (ctxt->spaceNr > 0)
1203 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1204 else
1205 ctxt->space = NULL;
1206 ret = ctxt->spaceTab[ctxt->spaceNr];
1207 ctxt->spaceTab[ctxt->spaceNr] = -1;
1208 return(ret);
1209}
1210
1211/*
1212 * Macros for accessing the content. Those should be used only by the parser,
1213 * and not exported.
1214 *
1215 * Dirty macros, i.e. one often need to make assumption on the context to
1216 * use them
1217 *
1218 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1219 * To be used with extreme caution since operations consuming
1220 * characters may move the input buffer to a different location !
1221 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1222 * This should be used internally by the parser
1223 * only to compare to ASCII values otherwise it would break when
1224 * running with UTF-8 encoding.
1225 * RAW same as CUR but in the input buffer, bypass any token
1226 * extraction that may have been done
1227 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1228 * to compare on ASCII based substring.
1229 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001230 * strings without newlines within the parser.
1231 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1232 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001233 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1234 *
1235 * NEXT Skip to the next character, this does the proper decoding
1236 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001237 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001238 * CUR_CHAR(l) returns the current unicode character (int), set l
1239 * to the number of xmlChars used for the encoding [0-5].
1240 * CUR_SCHAR same but operate on a string instead of the context
1241 * COPY_BUF copy the current unicode char to the target buffer, increment
1242 * the index
1243 * GROW, SHRINK handling of input buffers
1244 */
1245
Daniel Veillardfdc91562002-07-01 21:52:03 +00001246#define RAW (*ctxt->input->cur)
1247#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001248#define NXT(val) ctxt->input->cur[(val)]
1249#define CUR_PTR ctxt->input->cur
1250
Daniel Veillarda07050d2003-10-19 14:46:32 +00001251#define CMP4( s, c1, c2, c3, c4 ) \
1252 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1253 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1254#define CMP5( s, c1, c2, c3, c4, c5 ) \
1255 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1256#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1257 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1258#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1259 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1260#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1261 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1262#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1263 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1264 ((unsigned char *) s)[ 8 ] == c9 )
1265#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1266 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1267 ((unsigned char *) s)[ 9 ] == c10 )
1268
Owen Taylor3473f882001-02-23 17:55:21 +00001269#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001270 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001271 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001272 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001273 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1274 xmlPopInput(ctxt); \
1275 } while (0)
1276
Daniel Veillard0b787f32004-03-26 17:29:53 +00001277#define SKIPL(val) do { \
1278 int skipl; \
1279 for(skipl=0; skipl<val; skipl++) { \
1280 if (*(ctxt->input->cur) == '\n') { \
1281 ctxt->input->line++; ctxt->input->col = 1; \
1282 } else ctxt->input->col++; \
1283 ctxt->nbChars++; \
1284 ctxt->input->cur++; \
1285 } \
1286 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1287 if ((*ctxt->input->cur == 0) && \
1288 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1289 xmlPopInput(ctxt); \
1290 } while (0)
1291
Daniel Veillarda880b122003-04-21 21:36:41 +00001292#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001293 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1294 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001295 xmlSHRINK (ctxt);
1296
1297static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1298 xmlParserInputShrink(ctxt->input);
1299 if ((*ctxt->input->cur == 0) &&
1300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001302 }
Owen Taylor3473f882001-02-23 17:55:21 +00001303
Daniel Veillarda880b122003-04-21 21:36:41 +00001304#define GROW if ((ctxt->progressive == 0) && \
1305 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001306 xmlGROW (ctxt);
1307
1308static void xmlGROW (xmlParserCtxtPtr ctxt) {
1309 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1310 if ((*ctxt->input->cur == 0) &&
1311 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1312 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001313}
Owen Taylor3473f882001-02-23 17:55:21 +00001314
1315#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1316
1317#define NEXT xmlNextChar(ctxt)
1318
Daniel Veillard21a0f912001-02-25 19:54:14 +00001319#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001320 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001321 ctxt->input->cur++; \
1322 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001323 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001324 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1325 }
1326
Owen Taylor3473f882001-02-23 17:55:21 +00001327#define NEXTL(l) do { \
1328 if (*(ctxt->input->cur) == '\n') { \
1329 ctxt->input->line++; ctxt->input->col = 1; \
1330 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001331 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001332 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001333 } while (0)
1334
1335#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1336#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1337
1338#define COPY_BUF(l,b,i,v) \
1339 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001340 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342/**
1343 * xmlSkipBlankChars:
1344 * @ctxt: the XML parser context
1345 *
1346 * skip all blanks character found at that point in the input streams.
1347 * It pops up finished entities in the process if allowable at that point.
1348 *
1349 * Returns the number of space chars skipped
1350 */
1351
1352int
1353xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001354 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001355
1356 /*
1357 * It's Okay to use CUR/NEXT here since all the blanks are on
1358 * the ASCII range.
1359 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001360 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1361 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001362 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001364 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001365 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001366 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001367 if (*cur == '\n') {
1368 ctxt->input->line++; ctxt->input->col = 1;
1369 }
1370 cur++;
1371 res++;
1372 if (*cur == 0) {
1373 ctxt->input->cur = cur;
1374 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1375 cur = ctxt->input->cur;
1376 }
1377 }
1378 ctxt->input->cur = cur;
1379 } else {
1380 int cur;
1381 do {
1382 cur = CUR;
1383 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1384 NEXT;
1385 cur = CUR;
1386 res++;
1387 }
1388 while ((cur == 0) && (ctxt->inputNr > 1) &&
1389 (ctxt->instate != XML_PARSER_COMMENT)) {
1390 xmlPopInput(ctxt);
1391 cur = CUR;
1392 }
1393 /*
1394 * Need to handle support of entities branching here
1395 */
1396 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1397 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1398 }
Owen Taylor3473f882001-02-23 17:55:21 +00001399 return(res);
1400}
1401
1402/************************************************************************
1403 * *
1404 * Commodity functions to handle entities *
1405 * *
1406 ************************************************************************/
1407
1408/**
1409 * xmlPopInput:
1410 * @ctxt: an XML parser context
1411 *
1412 * xmlPopInput: the current input pointed by ctxt->input came to an end
1413 * pop it and return the next char.
1414 *
1415 * Returns the current xmlChar in the parser context
1416 */
1417xmlChar
1418xmlPopInput(xmlParserCtxtPtr ctxt) {
1419 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1420 if (xmlParserDebugEntities)
1421 xmlGenericError(xmlGenericErrorContext,
1422 "Popping input %d\n", ctxt->inputNr);
1423 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001424 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001425 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1426 return(xmlPopInput(ctxt));
1427 return(CUR);
1428}
1429
1430/**
1431 * xmlPushInput:
1432 * @ctxt: an XML parser context
1433 * @input: an XML parser input fragment (entity, XML fragment ...).
1434 *
1435 * xmlPushInput: switch to a new input stream which is stacked on top
1436 * of the previous one(s).
1437 */
1438void
1439xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1440 if (input == NULL) return;
1441
1442 if (xmlParserDebugEntities) {
1443 if ((ctxt->input != NULL) && (ctxt->input->filename))
1444 xmlGenericError(xmlGenericErrorContext,
1445 "%s(%d): ", ctxt->input->filename,
1446 ctxt->input->line);
1447 xmlGenericError(xmlGenericErrorContext,
1448 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1449 }
1450 inputPush(ctxt, input);
1451 GROW;
1452}
1453
1454/**
1455 * xmlParseCharRef:
1456 * @ctxt: an XML parser context
1457 *
1458 * parse Reference declarations
1459 *
1460 * [66] CharRef ::= '&#' [0-9]+ ';' |
1461 * '&#x' [0-9a-fA-F]+ ';'
1462 *
1463 * [ WFC: Legal Character ]
1464 * Characters referred to using character references must match the
1465 * production for Char.
1466 *
1467 * Returns the value parsed (as an int), 0 in case of error
1468 */
1469int
1470xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001471 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001472 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001473 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001474
Owen Taylor3473f882001-02-23 17:55:21 +00001475 /*
1476 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1477 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001478 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001479 (NXT(2) == 'x')) {
1480 SKIP(3);
1481 GROW;
1482 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001483 if (count++ > 20) {
1484 count = 0;
1485 GROW;
1486 }
1487 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001488 val = val * 16 + (CUR - '0');
1489 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1490 val = val * 16 + (CUR - 'a') + 10;
1491 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1492 val = val * 16 + (CUR - 'A') + 10;
1493 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001494 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001495 val = 0;
1496 break;
1497 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001498 if (val > 0x10FFFF)
1499 outofrange = val;
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 NEXT;
1502 count++;
1503 }
1504 if (RAW == ';') {
1505 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001506 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001507 ctxt->nbChars ++;
1508 ctxt->input->cur++;
1509 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001510 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 SKIP(2);
1512 GROW;
1513 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001514 if (count++ > 20) {
1515 count = 0;
1516 GROW;
1517 }
1518 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001519 val = val * 10 + (CUR - '0');
1520 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001521 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001522 val = 0;
1523 break;
1524 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001525 if (val > 0x10FFFF)
1526 outofrange = val;
1527
Owen Taylor3473f882001-02-23 17:55:21 +00001528 NEXT;
1529 count++;
1530 }
1531 if (RAW == ';') {
1532 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001533 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001534 ctxt->nbChars ++;
1535 ctxt->input->cur++;
1536 }
1537 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001538 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001539 }
1540
1541 /*
1542 * [ WFC: Legal Character ]
1543 * Characters referred to using character references must match the
1544 * production for Char.
1545 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001546 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001547 return(val);
1548 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001549 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1550 "xmlParseCharRef: invalid xmlChar value %d\n",
1551 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001552 }
1553 return(0);
1554}
1555
1556/**
1557 * xmlParseStringCharRef:
1558 * @ctxt: an XML parser context
1559 * @str: a pointer to an index in the string
1560 *
1561 * parse Reference declarations, variant parsing from a string rather
1562 * than an an input flow.
1563 *
1564 * [66] CharRef ::= '&#' [0-9]+ ';' |
1565 * '&#x' [0-9a-fA-F]+ ';'
1566 *
1567 * [ WFC: Legal Character ]
1568 * Characters referred to using character references must match the
1569 * production for Char.
1570 *
1571 * Returns the value parsed (as an int), 0 in case of error, str will be
1572 * updated to the current value of the index
1573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001574static int
Owen Taylor3473f882001-02-23 17:55:21 +00001575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1576 const xmlChar *ptr;
1577 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001578 unsigned int val = 0;
1579 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001580
1581 if ((str == NULL) || (*str == NULL)) return(0);
1582 ptr = *str;
1583 cur = *ptr;
1584 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1585 ptr += 3;
1586 cur = *ptr;
1587 while (cur != ';') { /* Non input consuming loop */
1588 if ((cur >= '0') && (cur <= '9'))
1589 val = val * 16 + (cur - '0');
1590 else if ((cur >= 'a') && (cur <= 'f'))
1591 val = val * 16 + (cur - 'a') + 10;
1592 else if ((cur >= 'A') && (cur <= 'F'))
1593 val = val * 16 + (cur - 'A') + 10;
1594 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001595 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001596 val = 0;
1597 break;
1598 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001599 if (val > 0x10FFFF)
1600 outofrange = val;
1601
Owen Taylor3473f882001-02-23 17:55:21 +00001602 ptr++;
1603 cur = *ptr;
1604 }
1605 if (cur == ';')
1606 ptr++;
1607 } else if ((cur == '&') && (ptr[1] == '#')){
1608 ptr += 2;
1609 cur = *ptr;
1610 while (cur != ';') { /* Non input consuming loops */
1611 if ((cur >= '0') && (cur <= '9'))
1612 val = val * 10 + (cur - '0');
1613 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001614 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001615 val = 0;
1616 break;
1617 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001618 if (val > 0x10FFFF)
1619 outofrange = val;
1620
Owen Taylor3473f882001-02-23 17:55:21 +00001621 ptr++;
1622 cur = *ptr;
1623 }
1624 if (cur == ';')
1625 ptr++;
1626 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001627 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001628 return(0);
1629 }
1630 *str = ptr;
1631
1632 /*
1633 * [ WFC: Legal Character ]
1634 * Characters referred to using character references must match the
1635 * production for Char.
1636 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001637 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001638 return(val);
1639 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1641 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1642 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001643 }
1644 return(0);
1645}
1646
1647/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001648 * xmlNewBlanksWrapperInputStream:
1649 * @ctxt: an XML parser context
1650 * @entity: an Entity pointer
1651 *
1652 * Create a new input stream for wrapping
1653 * blanks around a PEReference
1654 *
1655 * Returns the new input stream or NULL
1656 */
1657
1658static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1659
Daniel Veillardf4862f02002-09-10 11:13:43 +00001660static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001661xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1662 xmlParserInputPtr input;
1663 xmlChar *buffer;
1664 size_t length;
1665 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001666 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1667 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001668 return(NULL);
1669 }
1670 if (xmlParserDebugEntities)
1671 xmlGenericError(xmlGenericErrorContext,
1672 "new blanks wrapper for entity: %s\n", entity->name);
1673 input = xmlNewInputStream(ctxt);
1674 if (input == NULL) {
1675 return(NULL);
1676 }
1677 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001678 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001679 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001681 return(NULL);
1682 }
1683 buffer [0] = ' ';
1684 buffer [1] = '%';
1685 buffer [length-3] = ';';
1686 buffer [length-2] = ' ';
1687 buffer [length-1] = 0;
1688 memcpy(buffer + 2, entity->name, length - 5);
1689 input->free = deallocblankswrapper;
1690 input->base = buffer;
1691 input->cur = buffer;
1692 input->length = length;
1693 input->end = &buffer[length];
1694 return(input);
1695}
1696
1697/**
Owen Taylor3473f882001-02-23 17:55:21 +00001698 * xmlParserHandlePEReference:
1699 * @ctxt: the parser context
1700 *
1701 * [69] PEReference ::= '%' Name ';'
1702 *
1703 * [ WFC: No Recursion ]
1704 * A parsed entity must not contain a recursive
1705 * reference to itself, either directly or indirectly.
1706 *
1707 * [ WFC: Entity Declared ]
1708 * In a document without any DTD, a document with only an internal DTD
1709 * subset which contains no parameter entity references, or a document
1710 * with "standalone='yes'", ... ... The declaration of a parameter
1711 * entity must precede any reference to it...
1712 *
1713 * [ VC: Entity Declared ]
1714 * In a document with an external subset or external parameter entities
1715 * with "standalone='no'", ... ... The declaration of a parameter entity
1716 * must precede any reference to it...
1717 *
1718 * [ WFC: In DTD ]
1719 * Parameter-entity references may only appear in the DTD.
1720 * NOTE: misleading but this is handled.
1721 *
1722 * A PEReference may have been detected in the current input stream
1723 * the handling is done accordingly to
1724 * http://www.w3.org/TR/REC-xml#entproc
1725 * i.e.
1726 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001727 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001728 */
1729void
1730xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001731 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 xmlEntityPtr entity = NULL;
1733 xmlParserInputPtr input;
1734
Owen Taylor3473f882001-02-23 17:55:21 +00001735 if (RAW != '%') return;
1736 switch(ctxt->instate) {
1737 case XML_PARSER_CDATA_SECTION:
1738 return;
1739 case XML_PARSER_COMMENT:
1740 return;
1741 case XML_PARSER_START_TAG:
1742 return;
1743 case XML_PARSER_END_TAG:
1744 return;
1745 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001746 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001747 return;
1748 case XML_PARSER_PROLOG:
1749 case XML_PARSER_START:
1750 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001751 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 return;
1753 case XML_PARSER_ENTITY_DECL:
1754 case XML_PARSER_CONTENT:
1755 case XML_PARSER_ATTRIBUTE_VALUE:
1756 case XML_PARSER_PI:
1757 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001758 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001759 /* we just ignore it there */
1760 return;
1761 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001762 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001763 return;
1764 case XML_PARSER_ENTITY_VALUE:
1765 /*
1766 * NOTE: in the case of entity values, we don't do the
1767 * substitution here since we need the literal
1768 * entity value to be able to save the internal
1769 * subset of the document.
1770 * This will be handled by xmlStringDecodeEntities
1771 */
1772 return;
1773 case XML_PARSER_DTD:
1774 /*
1775 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1776 * In the internal DTD subset, parameter-entity references
1777 * can occur only where markup declarations can occur, not
1778 * within markup declarations.
1779 * In that case this is handled in xmlParseMarkupDecl
1780 */
1781 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1782 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001783 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001784 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001785 break;
1786 case XML_PARSER_IGNORE:
1787 return;
1788 }
1789
1790 NEXT;
1791 name = xmlParseName(ctxt);
1792 if (xmlParserDebugEntities)
1793 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001794 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001796 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001797 } else {
1798 if (RAW == ';') {
1799 NEXT;
1800 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1801 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1802 if (entity == NULL) {
1803
1804 /*
1805 * [ WFC: Entity Declared ]
1806 * In a document without any DTD, a document with only an
1807 * internal DTD subset which contains no parameter entity
1808 * references, or a document with "standalone='yes'", ...
1809 * ... The declaration of a parameter entity must precede
1810 * any reference to it...
1811 */
1812 if ((ctxt->standalone == 1) ||
1813 ((ctxt->hasExternalSubset == 0) &&
1814 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001816 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001817 } else {
1818 /*
1819 * [ VC: Entity Declared ]
1820 * In a document with an external subset or external
1821 * parameter entities with "standalone='no'", ...
1822 * ... The declaration of a parameter entity must precede
1823 * any reference to it...
1824 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001825 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1826 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1827 "PEReference: %%%s; not found\n",
1828 name);
1829 } else
1830 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1831 "PEReference: %%%s; not found\n",
1832 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001833 ctxt->valid = 0;
1834 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001835 } else if (ctxt->input->free != deallocblankswrapper) {
1836 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1837 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001838 } else {
1839 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1840 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001841 xmlChar start[4];
1842 xmlCharEncoding enc;
1843
Owen Taylor3473f882001-02-23 17:55:21 +00001844 /*
1845 * handle the extra spaces added before and after
1846 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001847 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001848 */
1849 input = xmlNewEntityInputStream(ctxt, entity);
1850 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001851
1852 /*
1853 * Get the 4 first bytes and decode the charset
1854 * if enc != XML_CHAR_ENCODING_NONE
1855 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001856 * Note that, since we may have some non-UTF8
1857 * encoding (like UTF16, bug 135229), the 'length'
1858 * is not known, but we can calculate based upon
1859 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860 */
1861 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001862 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001863 start[0] = RAW;
1864 start[1] = NXT(1);
1865 start[2] = NXT(2);
1866 start[3] = NXT(3);
1867 enc = xmlDetectCharEncoding(start, 4);
1868 if (enc != XML_CHAR_ENCODING_NONE) {
1869 xmlSwitchEncoding(ctxt, enc);
1870 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001871 }
1872
Owen Taylor3473f882001-02-23 17:55:21 +00001873 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001874 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1875 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001876 xmlParseTextDecl(ctxt);
1877 }
Owen Taylor3473f882001-02-23 17:55:21 +00001878 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001879 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1880 "PEReference: %s is not a parameter entity\n",
1881 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001882 }
1883 }
1884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001885 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001886 }
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888}
1889
1890/*
1891 * Macro used to grow the current buffer.
1892 */
1893#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001894 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001895 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001896 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001897 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001898 if (tmp == NULL) goto mem_error; \
1899 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001900}
1901
1902/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001903 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001904 * @ctxt: the parser context
1905 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001907 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1908 * @end: an end marker xmlChar, 0 if none
1909 * @end2: an end marker xmlChar, 0 if none
1910 * @end3: an end marker xmlChar, 0 if none
1911 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001912 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001913 *
1914 * [67] Reference ::= EntityRef | CharRef
1915 *
1916 * [69] PEReference ::= '%' Name ';'
1917 *
1918 * Returns A newly allocated string with the substitution done. The caller
1919 * must deallocate it !
1920 */
1921xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001922xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1923 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001924 xmlChar *buffer = NULL;
1925 int buffer_size = 0;
1926
1927 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001928 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001929 xmlEntityPtr ent;
1930 int c,l;
1931 int nbchars = 0;
1932
Daniel Veillarde57ec792003-09-10 10:50:59 +00001933 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001934 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001935 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001936
1937 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001938 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(NULL);
1940 }
1941
1942 /*
1943 * allocate a translation buffer.
1944 */
1945 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001946 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001947 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001948
1949 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001950 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001951 * we are operating on already parsed values.
1952 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001953 if (str < last)
1954 c = CUR_SCHAR(str, l);
1955 else
1956 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001957 while ((c != 0) && (c != end) && /* non input consuming loop */
1958 (c != end2) && (c != end3)) {
1959
1960 if (c == 0) break;
1961 if ((c == '&') && (str[1] == '#')) {
1962 int val = xmlParseStringCharRef(ctxt, &str);
1963 if (val != 0) {
1964 COPY_BUF(0,buffer,nbchars,val);
1965 }
1966 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1967 if (xmlParserDebugEntities)
1968 xmlGenericError(xmlGenericErrorContext,
1969 "String decoding Entity Reference: %.30s\n",
1970 str);
1971 ent = xmlParseStringEntityRef(ctxt, &str);
1972 if ((ent != NULL) &&
1973 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1974 if (ent->content != NULL) {
1975 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1976 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001977 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1978 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001979 }
1980 } else if ((ent != NULL) && (ent->content != NULL)) {
1981 xmlChar *rep;
1982
1983 ctxt->depth++;
1984 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1985 0, 0, 0);
1986 ctxt->depth--;
1987 if (rep != NULL) {
1988 current = rep;
1989 while (*current != 0) { /* non input consuming loop */
1990 buffer[nbchars++] = *current++;
1991 if (nbchars >
1992 buffer_size - XML_PARSER_BUFFER_SIZE) {
1993 growBuffer(buffer);
1994 }
1995 }
1996 xmlFree(rep);
1997 }
1998 } else if (ent != NULL) {
1999 int i = xmlStrlen(ent->name);
2000 const xmlChar *cur = ent->name;
2001
2002 buffer[nbchars++] = '&';
2003 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2004 growBuffer(buffer);
2005 }
2006 for (;i > 0;i--)
2007 buffer[nbchars++] = *cur++;
2008 buffer[nbchars++] = ';';
2009 }
2010 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2011 if (xmlParserDebugEntities)
2012 xmlGenericError(xmlGenericErrorContext,
2013 "String decoding PE Reference: %.30s\n", str);
2014 ent = xmlParseStringPEReference(ctxt, &str);
2015 if (ent != NULL) {
2016 xmlChar *rep;
2017
2018 ctxt->depth++;
2019 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2020 0, 0, 0);
2021 ctxt->depth--;
2022 if (rep != NULL) {
2023 current = rep;
2024 while (*current != 0) { /* non input consuming loop */
2025 buffer[nbchars++] = *current++;
2026 if (nbchars >
2027 buffer_size - XML_PARSER_BUFFER_SIZE) {
2028 growBuffer(buffer);
2029 }
2030 }
2031 xmlFree(rep);
2032 }
2033 }
2034 } else {
2035 COPY_BUF(l,buffer,nbchars,c);
2036 str += l;
2037 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2038 growBuffer(buffer);
2039 }
2040 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002041 if (str < last)
2042 c = CUR_SCHAR(str, l);
2043 else
2044 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002045 }
2046 buffer[nbchars++] = 0;
2047 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002048
2049mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002050 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002051 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002052}
2053
Daniel Veillarde57ec792003-09-10 10:50:59 +00002054/**
2055 * xmlStringDecodeEntities:
2056 * @ctxt: the parser context
2057 * @str: the input string
2058 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2059 * @end: an end marker xmlChar, 0 if none
2060 * @end2: an end marker xmlChar, 0 if none
2061 * @end3: an end marker xmlChar, 0 if none
2062 *
2063 * Takes a entity string content and process to do the adequate substitutions.
2064 *
2065 * [67] Reference ::= EntityRef | CharRef
2066 *
2067 * [69] PEReference ::= '%' Name ';'
2068 *
2069 * Returns A newly allocated string with the substitution done. The caller
2070 * must deallocate it !
2071 */
2072xmlChar *
2073xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2074 xmlChar end, xmlChar end2, xmlChar end3) {
2075 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2076 end, end2, end3));
2077}
Owen Taylor3473f882001-02-23 17:55:21 +00002078
2079/************************************************************************
2080 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002081 * Commodity functions, cleanup needed ? *
2082 * *
2083 ************************************************************************/
2084
2085/**
2086 * areBlanks:
2087 * @ctxt: an XML parser context
2088 * @str: a xmlChar *
2089 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002090 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002091 *
2092 * Is this a sequence of blank chars that one can ignore ?
2093 *
2094 * Returns 1 if ignorable 0 otherwise.
2095 */
2096
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002097static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2098 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002099 int i, ret;
2100 xmlNodePtr lastChild;
2101
Daniel Veillard05c13a22001-09-09 08:38:09 +00002102 /*
2103 * Don't spend time trying to differentiate them, the same callback is
2104 * used !
2105 */
2106 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002107 return(0);
2108
Owen Taylor3473f882001-02-23 17:55:21 +00002109 /*
2110 * Check for xml:space value.
2111 */
2112 if (*(ctxt->space) == 1)
2113 return(0);
2114
2115 /*
2116 * Check that the string is made of blanks
2117 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002118 if (blank_chars == 0) {
2119 for (i = 0;i < len;i++)
2120 if (!(IS_BLANK_CH(str[i]))) return(0);
2121 }
Owen Taylor3473f882001-02-23 17:55:21 +00002122
2123 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002124 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002125 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002126 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002127 if (ctxt->myDoc != NULL) {
2128 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2129 if (ret == 0) return(1);
2130 if (ret == 1) return(0);
2131 }
2132
2133 /*
2134 * Otherwise, heuristic :-\
2135 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002136 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if ((ctxt->node->children == NULL) &&
2138 (RAW == '<') && (NXT(1) == '/')) return(0);
2139
2140 lastChild = xmlGetLastChild(ctxt->node);
2141 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002142 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2143 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002144 } else if (xmlNodeIsText(lastChild))
2145 return(0);
2146 else if ((ctxt->node->children != NULL) &&
2147 (xmlNodeIsText(ctxt->node->children)))
2148 return(0);
2149 return(1);
2150}
2151
Owen Taylor3473f882001-02-23 17:55:21 +00002152/************************************************************************
2153 * *
2154 * Extra stuff for namespace support *
2155 * Relates to http://www.w3.org/TR/WD-xml-names *
2156 * *
2157 ************************************************************************/
2158
2159/**
2160 * xmlSplitQName:
2161 * @ctxt: an XML parser context
2162 * @name: an XML parser context
2163 * @prefix: a xmlChar **
2164 *
2165 * parse an UTF8 encoded XML qualified name string
2166 *
2167 * [NS 5] QName ::= (Prefix ':')? LocalPart
2168 *
2169 * [NS 6] Prefix ::= NCName
2170 *
2171 * [NS 7] LocalPart ::= NCName
2172 *
2173 * Returns the local part, and prefix is updated
2174 * to get the Prefix if any.
2175 */
2176
2177xmlChar *
2178xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2179 xmlChar buf[XML_MAX_NAMELEN + 5];
2180 xmlChar *buffer = NULL;
2181 int len = 0;
2182 int max = XML_MAX_NAMELEN;
2183 xmlChar *ret = NULL;
2184 const xmlChar *cur = name;
2185 int c;
2186
2187 *prefix = NULL;
2188
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002189 if (cur == NULL) return(NULL);
2190
Owen Taylor3473f882001-02-23 17:55:21 +00002191#ifndef XML_XML_NAMESPACE
2192 /* xml: prefix is not really a namespace */
2193 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2194 (cur[2] == 'l') && (cur[3] == ':'))
2195 return(xmlStrdup(name));
2196#endif
2197
Daniel Veillard597bc482003-07-24 16:08:28 +00002198 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002199 if (cur[0] == ':')
2200 return(xmlStrdup(name));
2201
2202 c = *cur++;
2203 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2204 buf[len++] = c;
2205 c = *cur++;
2206 }
2207 if (len >= max) {
2208 /*
2209 * Okay someone managed to make a huge name, so he's ready to pay
2210 * for the processing speed.
2211 */
2212 max = len * 2;
2213
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002214 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002215 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002216 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002217 return(NULL);
2218 }
2219 memcpy(buffer, buf, len);
2220 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2221 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002222 xmlChar *tmp;
2223
Owen Taylor3473f882001-02-23 17:55:21 +00002224 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002225 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002226 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002227 if (tmp == NULL) {
2228 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002229 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002230 return(NULL);
2231 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002233 }
2234 buffer[len++] = c;
2235 c = *cur++;
2236 }
2237 buffer[len] = 0;
2238 }
2239
Daniel Veillard597bc482003-07-24 16:08:28 +00002240 /* nasty but well=formed
2241 if ((c == ':') && (*cur == 0)) {
2242 return(xmlStrdup(name));
2243 } */
2244
Owen Taylor3473f882001-02-23 17:55:21 +00002245 if (buffer == NULL)
2246 ret = xmlStrndup(buf, len);
2247 else {
2248 ret = buffer;
2249 buffer = NULL;
2250 max = XML_MAX_NAMELEN;
2251 }
2252
2253
2254 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002255 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002256 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002257 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002258 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002259 }
Owen Taylor3473f882001-02-23 17:55:21 +00002260 len = 0;
2261
Daniel Veillardbb284f42002-10-16 18:02:47 +00002262 /*
2263 * Check that the first character is proper to start
2264 * a new name
2265 */
2266 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2267 ((c >= 0x41) && (c <= 0x5A)) ||
2268 (c == '_') || (c == ':'))) {
2269 int l;
2270 int first = CUR_SCHAR(cur, l);
2271
2272 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002273 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002274 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002275 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002276 }
2277 }
2278 cur++;
2279
Owen Taylor3473f882001-02-23 17:55:21 +00002280 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2281 buf[len++] = c;
2282 c = *cur++;
2283 }
2284 if (len >= max) {
2285 /*
2286 * Okay someone managed to make a huge name, so he's ready to pay
2287 * for the processing speed.
2288 */
2289 max = len * 2;
2290
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002291 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002292 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002293 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002294 return(NULL);
2295 }
2296 memcpy(buffer, buf, len);
2297 while (c != 0) { /* tested bigname2.xml */
2298 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002299 xmlChar *tmp;
2300
Owen Taylor3473f882001-02-23 17:55:21 +00002301 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002302 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002303 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002304 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002305 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002306 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return(NULL);
2308 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002310 }
2311 buffer[len++] = c;
2312 c = *cur++;
2313 }
2314 buffer[len] = 0;
2315 }
2316
2317 if (buffer == NULL)
2318 ret = xmlStrndup(buf, len);
2319 else {
2320 ret = buffer;
2321 }
2322 }
2323
2324 return(ret);
2325}
2326
2327/************************************************************************
2328 * *
2329 * The parser itself *
2330 * Relates to http://www.w3.org/TR/REC-xml *
2331 * *
2332 ************************************************************************/
2333
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002334static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002335static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002336 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002337
Owen Taylor3473f882001-02-23 17:55:21 +00002338/**
2339 * xmlParseName:
2340 * @ctxt: an XML parser context
2341 *
2342 * parse an XML name.
2343 *
2344 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2345 * CombiningChar | Extender
2346 *
2347 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2348 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002349 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002350 *
2351 * Returns the Name parsed or NULL
2352 */
2353
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002354const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002355xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002356 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002357 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002358 int count = 0;
2359
2360 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002361
2362 /*
2363 * Accelerator for simple ASCII names
2364 */
2365 in = ctxt->input->cur;
2366 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2367 ((*in >= 0x41) && (*in <= 0x5A)) ||
2368 (*in == '_') || (*in == ':')) {
2369 in++;
2370 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2371 ((*in >= 0x41) && (*in <= 0x5A)) ||
2372 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002373 (*in == '_') || (*in == '-') ||
2374 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002375 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002376 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002377 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002379 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002380 ctxt->nbChars += count;
2381 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002382 if (ret == NULL)
2383 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002384 return(ret);
2385 }
2386 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002387 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002388}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389
Daniel Veillard46de64e2002-05-29 08:21:33 +00002390/**
2391 * xmlParseNameAndCompare:
2392 * @ctxt: an XML parser context
2393 *
2394 * parse an XML name and compares for match
2395 * (specialized for endtag parsing)
2396 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002397 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2398 * and the name for mismatch
2399 */
2400
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002401static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002402xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002403 register const xmlChar *cmp = other;
2404 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002405 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002406
2407 GROW;
2408
2409 in = ctxt->input->cur;
2410 while (*in != 0 && *in == *cmp) {
2411 ++in;
2412 ++cmp;
2413 }
William M. Brack76e95df2003-10-18 16:20:14 +00002414 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002415 /* success */
2416 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002417 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002418 }
2419 /* failure (or end of input buffer), check with full function */
2420 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002421 /* strings coming from the dictionnary direct compare possible */
2422 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002423 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
2425 return ret;
2426}
2427
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002429xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002430 int len = 0, l;
2431 int c;
2432 int count = 0;
2433
2434 /*
2435 * Handler for more complex cases
2436 */
2437 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002438 c = CUR_CHAR(l);
2439 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2440 (!IS_LETTER(c) && (c != '_') &&
2441 (c != ':'))) {
2442 return(NULL);
2443 }
2444
2445 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002446 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002447 (c == '.') || (c == '-') ||
2448 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002449 (IS_COMBINING(c)) ||
2450 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (count++ > 100) {
2452 count = 0;
2453 GROW;
2454 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002455 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002456 NEXTL(l);
2457 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002458 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002459 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002460}
2461
2462/**
2463 * xmlParseStringName:
2464 * @ctxt: an XML parser context
2465 * @str: a pointer to the string pointer (IN/OUT)
2466 *
2467 * parse an XML name.
2468 *
2469 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2470 * CombiningChar | Extender
2471 *
2472 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2473 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002474 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002476 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002477 * is updated to the current location in the string.
2478 */
2479
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002480static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002481xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2482 xmlChar buf[XML_MAX_NAMELEN + 5];
2483 const xmlChar *cur = *str;
2484 int len = 0, l;
2485 int c;
2486
2487 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002488 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002489 (c != ':')) {
2490 return(NULL);
2491 }
2492
William M. Brack871611b2003-10-18 04:53:14 +00002493 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002494 (c == '.') || (c == '-') ||
2495 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002496 (IS_COMBINING(c)) ||
2497 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 COPY_BUF(l,buf,len,c);
2499 cur += l;
2500 c = CUR_SCHAR(cur, l);
2501 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2502 /*
2503 * Okay someone managed to make a huge name, so he's ready to pay
2504 * for the processing speed.
2505 */
2506 xmlChar *buffer;
2507 int max = len * 2;
2508
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002509 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002510 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002511 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002512 return(NULL);
2513 }
2514 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002515 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002516 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002517 (c == '.') || (c == '-') ||
2518 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002519 (IS_COMBINING(c)) ||
2520 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 COPY_BUF(l,buffer,len,c);
2534 cur += l;
2535 c = CUR_SCHAR(cur, l);
2536 }
2537 buffer[len] = 0;
2538 *str = cur;
2539 return(buffer);
2540 }
2541 }
2542 *str = cur;
2543 return(xmlStrndup(buf, len));
2544}
2545
2546/**
2547 * xmlParseNmtoken:
2548 * @ctxt: an XML parser context
2549 *
2550 * parse an XML Nmtoken.
2551 *
2552 * [7] Nmtoken ::= (NameChar)+
2553 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002554 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002555 *
2556 * Returns the Nmtoken parsed or NULL
2557 */
2558
2559xmlChar *
2560xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2561 xmlChar buf[XML_MAX_NAMELEN + 5];
2562 int len = 0, l;
2563 int c;
2564 int count = 0;
2565
2566 GROW;
2567 c = CUR_CHAR(l);
2568
William M. Brack871611b2003-10-18 04:53:14 +00002569 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002570 (c == '.') || (c == '-') ||
2571 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002572 (IS_COMBINING(c)) ||
2573 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002574 if (count++ > 100) {
2575 count = 0;
2576 GROW;
2577 }
2578 COPY_BUF(l,buf,len,c);
2579 NEXTL(l);
2580 c = CUR_CHAR(l);
2581 if (len >= XML_MAX_NAMELEN) {
2582 /*
2583 * Okay someone managed to make a huge token, so he's ready to pay
2584 * for the processing speed.
2585 */
2586 xmlChar *buffer;
2587 int max = len * 2;
2588
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002589 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002590 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002591 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002592 return(NULL);
2593 }
2594 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002595 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002596 (c == '.') || (c == '-') ||
2597 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002598 (IS_COMBINING(c)) ||
2599 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002600 if (count++ > 100) {
2601 count = 0;
2602 GROW;
2603 }
2604 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002605 xmlChar *tmp;
2606
Owen Taylor3473f882001-02-23 17:55:21 +00002607 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002608 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002609 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002610 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002611 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002612 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002613 return(NULL);
2614 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002615 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 COPY_BUF(l,buffer,len,c);
2618 NEXTL(l);
2619 c = CUR_CHAR(l);
2620 }
2621 buffer[len] = 0;
2622 return(buffer);
2623 }
2624 }
2625 if (len == 0)
2626 return(NULL);
2627 return(xmlStrndup(buf, len));
2628}
2629
2630/**
2631 * xmlParseEntityValue:
2632 * @ctxt: an XML parser context
2633 * @orig: if non-NULL store a copy of the original entity value
2634 *
2635 * parse a value for ENTITY declarations
2636 *
2637 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2638 * "'" ([^%&'] | PEReference | Reference)* "'"
2639 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002640 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002641 */
2642
2643xmlChar *
2644xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2645 xmlChar *buf = NULL;
2646 int len = 0;
2647 int size = XML_PARSER_BUFFER_SIZE;
2648 int c, l;
2649 xmlChar stop;
2650 xmlChar *ret = NULL;
2651 const xmlChar *cur = NULL;
2652 xmlParserInputPtr input;
2653
2654 if (RAW == '"') stop = '"';
2655 else if (RAW == '\'') stop = '\'';
2656 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002657 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002658 return(NULL);
2659 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002660 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002661 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002662 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002663 return(NULL);
2664 }
2665
2666 /*
2667 * The content of the entity definition is copied in a buffer.
2668 */
2669
2670 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2671 input = ctxt->input;
2672 GROW;
2673 NEXT;
2674 c = CUR_CHAR(l);
2675 /*
2676 * NOTE: 4.4.5 Included in Literal
2677 * When a parameter entity reference appears in a literal entity
2678 * value, ... a single or double quote character in the replacement
2679 * text is always treated as a normal data character and will not
2680 * terminate the literal.
2681 * In practice it means we stop the loop only when back at parsing
2682 * the initial entity and the quote is found
2683 */
William M. Brack871611b2003-10-18 04:53:14 +00002684 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002685 (ctxt->input != input))) {
2686 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002687 xmlChar *tmp;
2688
Owen Taylor3473f882001-02-23 17:55:21 +00002689 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002690 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2691 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002692 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002693 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002694 return(NULL);
2695 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002696 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002697 }
2698 COPY_BUF(l,buf,len,c);
2699 NEXTL(l);
2700 /*
2701 * Pop-up of finished entities.
2702 */
2703 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2704 xmlPopInput(ctxt);
2705
2706 GROW;
2707 c = CUR_CHAR(l);
2708 if (c == 0) {
2709 GROW;
2710 c = CUR_CHAR(l);
2711 }
2712 }
2713 buf[len] = 0;
2714
2715 /*
2716 * Raise problem w.r.t. '&' and '%' being used in non-entities
2717 * reference constructs. Note Charref will be handled in
2718 * xmlStringDecodeEntities()
2719 */
2720 cur = buf;
2721 while (*cur != 0) { /* non input consuming */
2722 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2723 xmlChar *name;
2724 xmlChar tmp = *cur;
2725
2726 cur++;
2727 name = xmlParseStringName(ctxt, &cur);
2728 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002729 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002730 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002731 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002732 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002733 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2734 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002735 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002736 }
2737 if (name != NULL)
2738 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002739 if (*cur == 0)
2740 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 }
2742 cur++;
2743 }
2744
2745 /*
2746 * Then PEReference entities are substituted.
2747 */
2748 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002749 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002750 xmlFree(buf);
2751 } else {
2752 NEXT;
2753 /*
2754 * NOTE: 4.4.7 Bypassed
2755 * When a general entity reference appears in the EntityValue in
2756 * an entity declaration, it is bypassed and left as is.
2757 * so XML_SUBSTITUTE_REF is not set here.
2758 */
2759 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2760 0, 0, 0);
2761 if (orig != NULL)
2762 *orig = buf;
2763 else
2764 xmlFree(buf);
2765 }
2766
2767 return(ret);
2768}
2769
2770/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002771 * xmlParseAttValueComplex:
2772 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002773 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002774 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002775 *
2776 * parse a value for an attribute, this is the fallback function
2777 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002778 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002779 *
2780 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2781 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002782static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002783xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002784 xmlChar limit = 0;
2785 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002786 int len = 0;
2787 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002788 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002789 xmlChar *current = NULL;
2790 xmlEntityPtr ent;
2791
Owen Taylor3473f882001-02-23 17:55:21 +00002792 if (NXT(0) == '"') {
2793 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2794 limit = '"';
2795 NEXT;
2796 } else if (NXT(0) == '\'') {
2797 limit = '\'';
2798 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2799 NEXT;
2800 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002801 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002802 return(NULL);
2803 }
2804
2805 /*
2806 * allocate a translation buffer.
2807 */
2808 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002809 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002810 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002811
2812 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002813 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002814 */
2815 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002816 while ((NXT(0) != limit) && /* checked */
2817 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002818 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002819 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002820 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (NXT(1) == '#') {
2822 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002823
Owen Taylor3473f882001-02-23 17:55:21 +00002824 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002825 if (ctxt->replaceEntities) {
2826 if (len > buf_size - 10) {
2827 growBuffer(buf);
2828 }
2829 buf[len++] = '&';
2830 } else {
2831 /*
2832 * The reparsing will be done in xmlStringGetNodeList()
2833 * called by the attribute() function in SAX.c
2834 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002835 if (len > buf_size - 10) {
2836 growBuffer(buf);
2837 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002838 buf[len++] = '&';
2839 buf[len++] = '#';
2840 buf[len++] = '3';
2841 buf[len++] = '8';
2842 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002843 }
2844 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002845 if (len > buf_size - 10) {
2846 growBuffer(buf);
2847 }
Owen Taylor3473f882001-02-23 17:55:21 +00002848 len += xmlCopyChar(0, &buf[len], val);
2849 }
2850 } else {
2851 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002852 if ((ent != NULL) &&
2853 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2854 if (len > buf_size - 10) {
2855 growBuffer(buf);
2856 }
2857 if ((ctxt->replaceEntities == 0) &&
2858 (ent->content[0] == '&')) {
2859 buf[len++] = '&';
2860 buf[len++] = '#';
2861 buf[len++] = '3';
2862 buf[len++] = '8';
2863 buf[len++] = ';';
2864 } else {
2865 buf[len++] = ent->content[0];
2866 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002867 } else if ((ent != NULL) &&
2868 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869 xmlChar *rep;
2870
2871 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2872 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002873 XML_SUBSTITUTE_REF,
2874 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (rep != NULL) {
2876 current = rep;
2877 while (*current != 0) { /* non input consuming */
2878 buf[len++] = *current++;
2879 if (len > buf_size - 10) {
2880 growBuffer(buf);
2881 }
2882 }
2883 xmlFree(rep);
2884 }
2885 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002886 if (len > buf_size - 10) {
2887 growBuffer(buf);
2888 }
Owen Taylor3473f882001-02-23 17:55:21 +00002889 if (ent->content != NULL)
2890 buf[len++] = ent->content[0];
2891 }
2892 } else if (ent != NULL) {
2893 int i = xmlStrlen(ent->name);
2894 const xmlChar *cur = ent->name;
2895
2896 /*
2897 * This may look absurd but is needed to detect
2898 * entities problems
2899 */
2900 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2901 (ent->content != NULL)) {
2902 xmlChar *rep;
2903 rep = xmlStringDecodeEntities(ctxt, ent->content,
2904 XML_SUBSTITUTE_REF, 0, 0, 0);
2905 if (rep != NULL)
2906 xmlFree(rep);
2907 }
2908
2909 /*
2910 * Just output the reference
2911 */
2912 buf[len++] = '&';
2913 if (len > buf_size - i - 10) {
2914 growBuffer(buf);
2915 }
2916 for (;i > 0;i--)
2917 buf[len++] = *cur++;
2918 buf[len++] = ';';
2919 }
2920 }
2921 } else {
2922 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002923 if ((len != 0) || (!normalize)) {
2924 if ((!normalize) || (!in_space)) {
2925 COPY_BUF(l,buf,len,0x20);
2926 if (len > buf_size - 10) {
2927 growBuffer(buf);
2928 }
2929 }
2930 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002931 }
2932 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002933 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002934 COPY_BUF(l,buf,len,c);
2935 if (len > buf_size - 10) {
2936 growBuffer(buf);
2937 }
2938 }
2939 NEXTL(l);
2940 }
2941 GROW;
2942 c = CUR_CHAR(l);
2943 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 if ((in_space) && (normalize)) {
2945 while (buf[len - 1] == 0x20) len--;
2946 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002947 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002949 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002950 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002951 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2952 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002953 } else
2954 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002955 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002956 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002957
2958mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002959 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002960 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961}
2962
2963/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002964 * xmlParseAttValue:
2965 * @ctxt: an XML parser context
2966 *
2967 * parse a value for an attribute
2968 * Note: the parser won't do substitution of entities here, this
2969 * will be handled later in xmlStringGetNodeList
2970 *
2971 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2972 * "'" ([^<&'] | Reference)* "'"
2973 *
2974 * 3.3.3 Attribute-Value Normalization:
2975 * Before the value of an attribute is passed to the application or
2976 * checked for validity, the XML processor must normalize it as follows:
2977 * - a character reference is processed by appending the referenced
2978 * character to the attribute value
2979 * - an entity reference is processed by recursively processing the
2980 * replacement text of the entity
2981 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2982 * appending #x20 to the normalized value, except that only a single
2983 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2984 * parsed entity or the literal entity value of an internal parsed entity
2985 * - other characters are processed by appending them to the normalized value
2986 * If the declared value is not CDATA, then the XML processor must further
2987 * process the normalized attribute value by discarding any leading and
2988 * trailing space (#x20) characters, and by replacing sequences of space
2989 * (#x20) characters by a single space (#x20) character.
2990 * All attributes for which no declaration has been read should be treated
2991 * by a non-validating parser as if declared CDATA.
2992 *
2993 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2994 */
2995
2996
2997xmlChar *
2998xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003000}
3001
3002/**
Owen Taylor3473f882001-02-23 17:55:21 +00003003 * xmlParseSystemLiteral:
3004 * @ctxt: an XML parser context
3005 *
3006 * parse an XML Literal
3007 *
3008 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3009 *
3010 * Returns the SystemLiteral parsed or NULL
3011 */
3012
3013xmlChar *
3014xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3015 xmlChar *buf = NULL;
3016 int len = 0;
3017 int size = XML_PARSER_BUFFER_SIZE;
3018 int cur, l;
3019 xmlChar stop;
3020 int state = ctxt->instate;
3021 int count = 0;
3022
3023 SHRINK;
3024 if (RAW == '"') {
3025 NEXT;
3026 stop = '"';
3027 } else if (RAW == '\'') {
3028 NEXT;
3029 stop = '\'';
3030 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003031 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003032 return(NULL);
3033 }
3034
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003035 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
3040 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3041 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003042 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003044 xmlChar *tmp;
3045
Owen Taylor3473f882001-02-23 17:55:21 +00003046 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003047 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3048 if (tmp == NULL) {
3049 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003050 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003051 ctxt->instate = (xmlParserInputState) state;
3052 return(NULL);
3053 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003054 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003055 }
3056 count++;
3057 if (count > 50) {
3058 GROW;
3059 count = 0;
3060 }
3061 COPY_BUF(l,buf,len,cur);
3062 NEXTL(l);
3063 cur = CUR_CHAR(l);
3064 if (cur == 0) {
3065 GROW;
3066 SHRINK;
3067 cur = CUR_CHAR(l);
3068 }
3069 }
3070 buf[len] = 0;
3071 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003072 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003073 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003074 } else {
3075 NEXT;
3076 }
3077 return(buf);
3078}
3079
3080/**
3081 * xmlParsePubidLiteral:
3082 * @ctxt: an XML parser context
3083 *
3084 * parse an XML public literal
3085 *
3086 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3087 *
3088 * Returns the PubidLiteral parsed or NULL.
3089 */
3090
3091xmlChar *
3092xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3093 xmlChar *buf = NULL;
3094 int len = 0;
3095 int size = XML_PARSER_BUFFER_SIZE;
3096 xmlChar cur;
3097 xmlChar stop;
3098 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003099 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003100
3101 SHRINK;
3102 if (RAW == '"') {
3103 NEXT;
3104 stop = '"';
3105 } else if (RAW == '\'') {
3106 NEXT;
3107 stop = '\'';
3108 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003109 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003110 return(NULL);
3111 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003112 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003113 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003114 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003115 return(NULL);
3116 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003117 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003119 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003120 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003121 xmlChar *tmp;
3122
Owen Taylor3473f882001-02-23 17:55:21 +00003123 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003124 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3125 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003127 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003128 return(NULL);
3129 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003130 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003131 }
3132 buf[len++] = cur;
3133 count++;
3134 if (count > 50) {
3135 GROW;
3136 count = 0;
3137 }
3138 NEXT;
3139 cur = CUR;
3140 if (cur == 0) {
3141 GROW;
3142 SHRINK;
3143 cur = CUR;
3144 }
3145 }
3146 buf[len] = 0;
3147 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003148 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003149 } else {
3150 NEXT;
3151 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003152 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003153 return(buf);
3154}
3155
Daniel Veillard48b2f892001-02-25 16:11:03 +00003156void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003157/**
3158 * xmlParseCharData:
3159 * @ctxt: an XML parser context
3160 * @cdata: int indicating whether we are within a CDATA section
3161 *
3162 * parse a CharData section.
3163 * if we are within a CDATA section ']]>' marks an end of section.
3164 *
3165 * The right angle bracket (>) may be represented using the string "&gt;",
3166 * and must, for compatibility, be escaped using "&gt;" or a character
3167 * reference when it appears in the string "]]>" in content, when that
3168 * string is not marking the end of a CDATA section.
3169 *
3170 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3171 */
3172
3173void
3174xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003175 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003176 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003177 int line = ctxt->input->line;
3178 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003179
3180 SHRINK;
3181 GROW;
3182 /*
3183 * Accelerated common case where input don't need to be
3184 * modified before passing it to the handler.
3185 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003186 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003187 in = ctxt->input->cur;
3188 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003189get_more_space:
3190 while (*in == 0x20) in++;
3191 if (*in == 0xA) {
3192 ctxt->input->line++;
3193 in++;
3194 while (*in == 0xA) {
3195 ctxt->input->line++;
3196 in++;
3197 }
3198 goto get_more_space;
3199 }
3200 if (*in == '<') {
3201 nbchar = in - ctxt->input->cur;
3202 if (nbchar > 0) {
3203 const xmlChar *tmp = ctxt->input->cur;
3204 ctxt->input->cur = in;
3205
3206 if (ctxt->sax->ignorableWhitespace !=
3207 ctxt->sax->characters) {
3208 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3209 ctxt->sax->ignorableWhitespace(ctxt->userData,
3210 tmp, nbchar);
3211 } else if (ctxt->sax->characters != NULL)
3212 ctxt->sax->characters(ctxt->userData,
3213 tmp, nbchar);
3214 } else if (ctxt->sax->characters != NULL) {
3215 ctxt->sax->characters(ctxt->userData,
3216 tmp, nbchar);
3217 }
3218 }
3219 return;
3220 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003221get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003222 while (((*in > ']') && (*in <= 0x7F)) ||
3223 ((*in > '&') && (*in < '<')) ||
3224 ((*in > '<') && (*in < ']')) ||
3225 ((*in >= 0x20) && (*in < '&')) ||
3226 (*in == 0x09))
3227 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003228 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003229 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003230 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003231 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003232 ctxt->input->line++;
3233 in++;
3234 }
3235 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003236 }
3237 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003238 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003239 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003240 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003241 return;
3242 }
3243 in++;
3244 goto get_more;
3245 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003246 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003247 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003248 if ((ctxt->sax->ignorableWhitespace !=
3249 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003250 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003251 const xmlChar *tmp = ctxt->input->cur;
3252 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003253
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003254 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003255 ctxt->sax->ignorableWhitespace(ctxt->userData,
3256 tmp, nbchar);
3257 } else if (ctxt->sax->characters != NULL)
3258 ctxt->sax->characters(ctxt->userData,
3259 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003260 line = ctxt->input->line;
3261 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003262 } else {
3263 if (ctxt->sax->characters != NULL)
3264 ctxt->sax->characters(ctxt->userData,
3265 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003266 line = ctxt->input->line;
3267 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003268 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003269 }
3270 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003271 if (*in == 0xD) {
3272 in++;
3273 if (*in == 0xA) {
3274 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003275 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003276 ctxt->input->line++;
3277 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003278 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003279 in--;
3280 }
3281 if (*in == '<') {
3282 return;
3283 }
3284 if (*in == '&') {
3285 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003286 }
3287 SHRINK;
3288 GROW;
3289 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003290 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003291 nbchar = 0;
3292 }
Daniel Veillard50582112001-03-26 22:52:16 +00003293 ctxt->input->line = line;
3294 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003295 xmlParseCharDataComplex(ctxt, cdata);
3296}
3297
Daniel Veillard01c13b52002-12-10 15:19:08 +00003298/**
3299 * xmlParseCharDataComplex:
3300 * @ctxt: an XML parser context
3301 * @cdata: int indicating whether we are within a CDATA section
3302 *
3303 * parse a CharData section.this is the fallback function
3304 * of xmlParseCharData() when the parsing requires handling
3305 * of non-ASCII characters.
3306 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307void
3308xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003309 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3310 int nbchar = 0;
3311 int cur, l;
3312 int count = 0;
3313
3314 SHRINK;
3315 GROW;
3316 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003317 while ((cur != '<') && /* checked */
3318 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003319 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003320 if ((cur == ']') && (NXT(1) == ']') &&
3321 (NXT(2) == '>')) {
3322 if (cdata) break;
3323 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003324 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003325 }
3326 }
3327 COPY_BUF(l,buf,nbchar,cur);
3328 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003329 buf[nbchar] = 0;
3330
Owen Taylor3473f882001-02-23 17:55:21 +00003331 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003332 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003333 */
3334 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003335 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003336 if (ctxt->sax->ignorableWhitespace != NULL)
3337 ctxt->sax->ignorableWhitespace(ctxt->userData,
3338 buf, nbchar);
3339 } else {
3340 if (ctxt->sax->characters != NULL)
3341 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3342 }
3343 }
3344 nbchar = 0;
3345 }
3346 count++;
3347 if (count > 50) {
3348 GROW;
3349 count = 0;
3350 }
3351 NEXTL(l);
3352 cur = CUR_CHAR(l);
3353 }
3354 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003355 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003356 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003357 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003358 */
3359 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003360 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003361 if (ctxt->sax->ignorableWhitespace != NULL)
3362 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3363 } else {
3364 if (ctxt->sax->characters != NULL)
3365 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3366 }
3367 }
3368 }
3369}
3370
3371/**
3372 * xmlParseExternalID:
3373 * @ctxt: an XML parser context
3374 * @publicID: a xmlChar** receiving PubidLiteral
3375 * @strict: indicate whether we should restrict parsing to only
3376 * production [75], see NOTE below
3377 *
3378 * Parse an External ID or a Public ID
3379 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003380 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003381 * 'PUBLIC' S PubidLiteral S SystemLiteral
3382 *
3383 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3384 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3385 *
3386 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3387 *
3388 * Returns the function returns SystemLiteral and in the second
3389 * case publicID receives PubidLiteral, is strict is off
3390 * it is possible to return NULL and have publicID set.
3391 */
3392
3393xmlChar *
3394xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3395 xmlChar *URI = NULL;
3396
3397 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003398
3399 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003400 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003401 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003402 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3404 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003405 }
3406 SKIP_BLANKS;
3407 URI = xmlParseSystemLiteral(ctxt);
3408 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003411 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003412 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003413 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003415 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003416 }
3417 SKIP_BLANKS;
3418 *publicID = xmlParsePubidLiteral(ctxt);
3419 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003420 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 if (strict) {
3423 /*
3424 * We don't handle [83] so "S SystemLiteral" is required.
3425 */
William M. Brack76e95df2003-10-18 16:20:14 +00003426 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003428 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003429 }
3430 } else {
3431 /*
3432 * We handle [83] so we return immediately, if
3433 * "S SystemLiteral" is not detected. From a purely parsing
3434 * point of view that's a nice mess.
3435 */
3436 const xmlChar *ptr;
3437 GROW;
3438
3439 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003440 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003441
William M. Brack76e95df2003-10-18 16:20:14 +00003442 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003443 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3444 }
3445 SKIP_BLANKS;
3446 URI = xmlParseSystemLiteral(ctxt);
3447 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003448 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003449 }
3450 }
3451 return(URI);
3452}
3453
3454/**
3455 * xmlParseComment:
3456 * @ctxt: an XML parser context
3457 *
3458 * Skip an XML (SGML) comment <!-- .... -->
3459 * The spec says that "For compatibility, the string "--" (double-hyphen)
3460 * must not occur within comments. "
3461 *
3462 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3463 */
3464void
3465xmlParseComment(xmlParserCtxtPtr ctxt) {
3466 xmlChar *buf = NULL;
3467 int len;
3468 int size = XML_PARSER_BUFFER_SIZE;
3469 int q, ql;
3470 int r, rl;
3471 int cur, l;
3472 xmlParserInputState state;
3473 xmlParserInputPtr input = ctxt->input;
3474 int count = 0;
3475
3476 /*
3477 * Check that there is a comment right here.
3478 */
3479 if ((RAW != '<') || (NXT(1) != '!') ||
3480 (NXT(2) != '-') || (NXT(3) != '-')) return;
3481
3482 state = ctxt->instate;
3483 ctxt->instate = XML_PARSER_COMMENT;
3484 SHRINK;
3485 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003486 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003487 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003489 ctxt->instate = state;
3490 return;
3491 }
3492 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003493 if (q == 0)
3494 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 NEXTL(ql);
3496 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003497 if (r == 0)
3498 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 NEXTL(rl);
3500 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003501 if (cur == 0)
3502 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003503 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003504 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003505 ((cur != '>') ||
3506 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003507 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003508 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003509 }
3510 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003511 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003512 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003513 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3514 if (new_buf == NULL) {
3515 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003516 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003517 ctxt->instate = state;
3518 return;
3519 }
William M. Bracka3215c72004-07-31 16:24:01 +00003520 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003521 }
3522 COPY_BUF(ql,buf,len,q);
3523 q = r;
3524 ql = rl;
3525 r = cur;
3526 rl = l;
3527
3528 count++;
3529 if (count > 50) {
3530 GROW;
3531 count = 0;
3532 }
3533 NEXTL(l);
3534 cur = CUR_CHAR(l);
3535 if (cur == 0) {
3536 SHRINK;
3537 GROW;
3538 cur = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003542 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003543 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003544 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003545 xmlFree(buf);
3546 } else {
3547 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003548 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3549 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003550 }
3551 NEXT;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3553 (!ctxt->disableSAX))
3554 ctxt->sax->comment(ctxt->userData, buf);
3555 xmlFree(buf);
3556 }
3557 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003558 return;
3559not_terminated:
3560 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3561 "Comment not terminated\n", NULL);
3562 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003563}
3564
3565/**
3566 * xmlParsePITarget:
3567 * @ctxt: an XML parser context
3568 *
3569 * parse the name of a PI
3570 *
3571 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3572 *
3573 * Returns the PITarget name or NULL
3574 */
3575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003577xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003578 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003579
3580 name = xmlParseName(ctxt);
3581 if ((name != NULL) &&
3582 ((name[0] == 'x') || (name[0] == 'X')) &&
3583 ((name[1] == 'm') || (name[1] == 'M')) &&
3584 ((name[2] == 'l') || (name[2] == 'L'))) {
3585 int i;
3586 if ((name[0] == 'x') && (name[1] == 'm') &&
3587 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003588 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003589 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003590 return(name);
3591 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003592 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 return(name);
3594 }
3595 for (i = 0;;i++) {
3596 if (xmlW3CPIs[i] == NULL) break;
3597 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3598 return(name);
3599 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003600 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3601 "xmlParsePITarget: invalid name prefix 'xml'\n",
3602 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003603 }
3604 return(name);
3605}
3606
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003607#ifdef LIBXML_CATALOG_ENABLED
3608/**
3609 * xmlParseCatalogPI:
3610 * @ctxt: an XML parser context
3611 * @catalog: the PI value string
3612 *
3613 * parse an XML Catalog Processing Instruction.
3614 *
3615 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3616 *
3617 * Occurs only if allowed by the user and if happening in the Misc
3618 * part of the document before any doctype informations
3619 * This will add the given catalog to the parsing context in order
3620 * to be used if there is a resolution need further down in the document
3621 */
3622
3623static void
3624xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3625 xmlChar *URL = NULL;
3626 const xmlChar *tmp, *base;
3627 xmlChar marker;
3628
3629 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003630 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003631 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3632 goto error;
3633 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003634 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003635 if (*tmp != '=') {
3636 return;
3637 }
3638 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003639 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003640 marker = *tmp;
3641 if ((marker != '\'') && (marker != '"'))
3642 goto error;
3643 tmp++;
3644 base = tmp;
3645 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3646 if (*tmp == 0)
3647 goto error;
3648 URL = xmlStrndup(base, tmp - base);
3649 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003650 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003651 if (*tmp != 0)
3652 goto error;
3653
3654 if (URL != NULL) {
3655 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3656 xmlFree(URL);
3657 }
3658 return;
3659
3660error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003661 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3662 "Catalog PI syntax error: %s\n",
3663 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003664 if (URL != NULL)
3665 xmlFree(URL);
3666}
3667#endif
3668
Owen Taylor3473f882001-02-23 17:55:21 +00003669/**
3670 * xmlParsePI:
3671 * @ctxt: an XML parser context
3672 *
3673 * parse an XML Processing Instruction.
3674 *
3675 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3676 *
3677 * The processing is transfered to SAX once parsed.
3678 */
3679
3680void
3681xmlParsePI(xmlParserCtxtPtr ctxt) {
3682 xmlChar *buf = NULL;
3683 int len = 0;
3684 int size = XML_PARSER_BUFFER_SIZE;
3685 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003686 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003687 xmlParserInputState state;
3688 int count = 0;
3689
3690 if ((RAW == '<') && (NXT(1) == '?')) {
3691 xmlParserInputPtr input = ctxt->input;
3692 state = ctxt->instate;
3693 ctxt->instate = XML_PARSER_PI;
3694 /*
3695 * this is a Processing Instruction.
3696 */
3697 SKIP(2);
3698 SHRINK;
3699
3700 /*
3701 * Parse the target name and check for special support like
3702 * namespace.
3703 */
3704 target = xmlParsePITarget(ctxt);
3705 if (target != NULL) {
3706 if ((RAW == '?') && (NXT(1) == '>')) {
3707 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003708 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3709 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003710 }
3711 SKIP(2);
3712
3713 /*
3714 * SAX: PI detected.
3715 */
3716 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3717 (ctxt->sax->processingInstruction != NULL))
3718 ctxt->sax->processingInstruction(ctxt->userData,
3719 target, NULL);
3720 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 return;
3722 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 ctxt->instate = state;
3727 return;
3728 }
3729 cur = CUR;
3730 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003731 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3732 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 }
3734 SKIP_BLANKS;
3735 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003736 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003737 ((cur != '?') || (NXT(1) != '>'))) {
3738 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003739 xmlChar *tmp;
3740
Owen Taylor3473f882001-02-23 17:55:21 +00003741 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003742 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3743 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003744 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003745 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003746 ctxt->instate = state;
3747 return;
3748 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003749 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003750 }
3751 count++;
3752 if (count > 50) {
3753 GROW;
3754 count = 0;
3755 }
3756 COPY_BUF(l,buf,len,cur);
3757 NEXTL(l);
3758 cur = CUR_CHAR(l);
3759 if (cur == 0) {
3760 SHRINK;
3761 GROW;
3762 cur = CUR_CHAR(l);
3763 }
3764 }
3765 buf[len] = 0;
3766 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003767 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3768 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 } else {
3770 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3772 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003773 }
3774 SKIP(2);
3775
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003776#ifdef LIBXML_CATALOG_ENABLED
3777 if (((state == XML_PARSER_MISC) ||
3778 (state == XML_PARSER_START)) &&
3779 (xmlStrEqual(target, XML_CATALOG_PI))) {
3780 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3781 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3782 (allow == XML_CATA_ALLOW_ALL))
3783 xmlParseCatalogPI(ctxt, buf);
3784 }
3785#endif
3786
3787
Owen Taylor3473f882001-02-23 17:55:21 +00003788 /*
3789 * SAX: PI detected.
3790 */
3791 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3792 (ctxt->sax->processingInstruction != NULL))
3793 ctxt->sax->processingInstruction(ctxt->userData,
3794 target, buf);
3795 }
3796 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003798 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
3800 ctxt->instate = state;
3801 }
3802}
3803
3804/**
3805 * xmlParseNotationDecl:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse a notation declaration
3809 *
3810 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3811 *
3812 * Hence there is actually 3 choices:
3813 * 'PUBLIC' S PubidLiteral
3814 * 'PUBLIC' S PubidLiteral S SystemLiteral
3815 * and 'SYSTEM' S SystemLiteral
3816 *
3817 * See the NOTE on xmlParseExternalID().
3818 */
3819
3820void
3821xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003822 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003823 xmlChar *Pubid;
3824 xmlChar *Systemid;
3825
Daniel Veillarda07050d2003-10-19 14:46:32 +00003826 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003827 xmlParserInputPtr input = ctxt->input;
3828 SHRINK;
3829 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003830 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003831 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3832 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003833 return;
3834 }
3835 SKIP_BLANKS;
3836
Daniel Veillard76d66f42001-05-16 21:05:17 +00003837 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003839 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003840 return;
3841 }
William M. Brack76e95df2003-10-18 16:20:14 +00003842 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003843 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003844 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003845 return;
3846 }
3847 SKIP_BLANKS;
3848
3849 /*
3850 * Parse the IDs.
3851 */
3852 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3853 SKIP_BLANKS;
3854
3855 if (RAW == '>') {
3856 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3858 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
3860 NEXT;
3861 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3862 (ctxt->sax->notationDecl != NULL))
3863 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3864 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003865 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003866 }
Owen Taylor3473f882001-02-23 17:55:21 +00003867 if (Systemid != NULL) xmlFree(Systemid);
3868 if (Pubid != NULL) xmlFree(Pubid);
3869 }
3870}
3871
3872/**
3873 * xmlParseEntityDecl:
3874 * @ctxt: an XML parser context
3875 *
3876 * parse <!ENTITY declarations
3877 *
3878 * [70] EntityDecl ::= GEDecl | PEDecl
3879 *
3880 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3881 *
3882 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3883 *
3884 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3885 *
3886 * [74] PEDef ::= EntityValue | ExternalID
3887 *
3888 * [76] NDataDecl ::= S 'NDATA' S Name
3889 *
3890 * [ VC: Notation Declared ]
3891 * The Name must match the declared name of a notation.
3892 */
3893
3894void
3895xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003896 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 xmlChar *value = NULL;
3898 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003899 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 int isParameter = 0;
3901 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003902 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003903
3904 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003905 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003906 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003907 SHRINK;
3908 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003909 skipped = SKIP_BLANKS;
3910 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003911 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3912 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003913 }
Owen Taylor3473f882001-02-23 17:55:21 +00003914
3915 if (RAW == '%') {
3916 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003917 skipped = SKIP_BLANKS;
3918 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003919 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3920 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003921 }
Owen Taylor3473f882001-02-23 17:55:21 +00003922 isParameter = 1;
3923 }
3924
Daniel Veillard76d66f42001-05-16 21:05:17 +00003925 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003926 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003927 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3928 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003929 return;
3930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003931 skipped = SKIP_BLANKS;
3932 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3934 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003935 }
Owen Taylor3473f882001-02-23 17:55:21 +00003936
Daniel Veillardf5582f12002-06-11 10:08:16 +00003937 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003938 /*
3939 * handle the various case of definitions...
3940 */
3941 if (isParameter) {
3942 if ((RAW == '"') || (RAW == '\'')) {
3943 value = xmlParseEntityValue(ctxt, &orig);
3944 if (value) {
3945 if ((ctxt->sax != NULL) &&
3946 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3947 ctxt->sax->entityDecl(ctxt->userData, name,
3948 XML_INTERNAL_PARAMETER_ENTITY,
3949 NULL, NULL, value);
3950 }
3951 } else {
3952 URI = xmlParseExternalID(ctxt, &literal, 1);
3953 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003954 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003955 }
3956 if (URI) {
3957 xmlURIPtr uri;
3958
3959 uri = xmlParseURI((const char *) URI);
3960 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003961 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3962 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003963 /*
3964 * This really ought to be a well formedness error
3965 * but the XML Core WG decided otherwise c.f. issue
3966 * E26 of the XML erratas.
3967 */
Owen Taylor3473f882001-02-23 17:55:21 +00003968 } else {
3969 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003970 /*
3971 * Okay this is foolish to block those but not
3972 * invalid URIs.
3973 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003974 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 } else {
3976 if ((ctxt->sax != NULL) &&
3977 (!ctxt->disableSAX) &&
3978 (ctxt->sax->entityDecl != NULL))
3979 ctxt->sax->entityDecl(ctxt->userData, name,
3980 XML_EXTERNAL_PARAMETER_ENTITY,
3981 literal, URI, NULL);
3982 }
3983 xmlFreeURI(uri);
3984 }
3985 }
3986 }
3987 } else {
3988 if ((RAW == '"') || (RAW == '\'')) {
3989 value = xmlParseEntityValue(ctxt, &orig);
3990 if ((ctxt->sax != NULL) &&
3991 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3992 ctxt->sax->entityDecl(ctxt->userData, name,
3993 XML_INTERNAL_GENERAL_ENTITY,
3994 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003995 /*
3996 * For expat compatibility in SAX mode.
3997 */
3998 if ((ctxt->myDoc == NULL) ||
3999 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4000 if (ctxt->myDoc == NULL) {
4001 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4002 }
4003 if (ctxt->myDoc->intSubset == NULL)
4004 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4005 BAD_CAST "fake", NULL, NULL);
4006
Daniel Veillard1af9a412003-08-20 22:54:39 +00004007 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4008 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004009 }
Owen Taylor3473f882001-02-23 17:55:21 +00004010 } else {
4011 URI = xmlParseExternalID(ctxt, &literal, 1);
4012 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004013 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 }
4015 if (URI) {
4016 xmlURIPtr uri;
4017
4018 uri = xmlParseURI((const char *)URI);
4019 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004020 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4021 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004022 /*
4023 * This really ought to be a well formedness error
4024 * but the XML Core WG decided otherwise c.f. issue
4025 * E26 of the XML erratas.
4026 */
Owen Taylor3473f882001-02-23 17:55:21 +00004027 } else {
4028 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004029 /*
4030 * Okay this is foolish to block those but not
4031 * invalid URIs.
4032 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004033 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004034 }
4035 xmlFreeURI(uri);
4036 }
4037 }
William M. Brack76e95df2003-10-18 16:20:14 +00004038 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4040 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004041 }
4042 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004043 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004044 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004045 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4047 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004048 }
4049 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004050 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004051 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4052 (ctxt->sax->unparsedEntityDecl != NULL))
4053 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4054 literal, URI, ndata);
4055 } else {
4056 if ((ctxt->sax != NULL) &&
4057 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4058 ctxt->sax->entityDecl(ctxt->userData, name,
4059 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4060 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004061 /*
4062 * For expat compatibility in SAX mode.
4063 * assuming the entity repalcement was asked for
4064 */
4065 if ((ctxt->replaceEntities != 0) &&
4066 ((ctxt->myDoc == NULL) ||
4067 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4068 if (ctxt->myDoc == NULL) {
4069 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4070 }
4071
4072 if (ctxt->myDoc->intSubset == NULL)
4073 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4074 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004075 xmlSAX2EntityDecl(ctxt, name,
4076 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4077 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004078 }
Owen Taylor3473f882001-02-23 17:55:21 +00004079 }
4080 }
4081 }
4082 SKIP_BLANKS;
4083 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004084 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004085 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004086 } else {
4087 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004088 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4089 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004090 }
4091 NEXT;
4092 }
4093 if (orig != NULL) {
4094 /*
4095 * Ugly mechanism to save the raw entity value.
4096 */
4097 xmlEntityPtr cur = NULL;
4098
4099 if (isParameter) {
4100 if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->getParameterEntity != NULL))
4102 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4103 } else {
4104 if ((ctxt->sax != NULL) &&
4105 (ctxt->sax->getEntity != NULL))
4106 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004107 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004108 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004109 }
Owen Taylor3473f882001-02-23 17:55:21 +00004110 }
4111 if (cur != NULL) {
4112 if (cur->orig != NULL)
4113 xmlFree(orig);
4114 else
4115 cur->orig = orig;
4116 } else
4117 xmlFree(orig);
4118 }
Owen Taylor3473f882001-02-23 17:55:21 +00004119 if (value != NULL) xmlFree(value);
4120 if (URI != NULL) xmlFree(URI);
4121 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004122 }
4123}
4124
4125/**
4126 * xmlParseDefaultDecl:
4127 * @ctxt: an XML parser context
4128 * @value: Receive a possible fixed default value for the attribute
4129 *
4130 * Parse an attribute default declaration
4131 *
4132 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4133 *
4134 * [ VC: Required Attribute ]
4135 * if the default declaration is the keyword #REQUIRED, then the
4136 * attribute must be specified for all elements of the type in the
4137 * attribute-list declaration.
4138 *
4139 * [ VC: Attribute Default Legal ]
4140 * The declared default value must meet the lexical constraints of
4141 * the declared attribute type c.f. xmlValidateAttributeDecl()
4142 *
4143 * [ VC: Fixed Attribute Default ]
4144 * if an attribute has a default value declared with the #FIXED
4145 * keyword, instances of that attribute must match the default value.
4146 *
4147 * [ WFC: No < in Attribute Values ]
4148 * handled in xmlParseAttValue()
4149 *
4150 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4151 * or XML_ATTRIBUTE_FIXED.
4152 */
4153
4154int
4155xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4156 int val;
4157 xmlChar *ret;
4158
4159 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004160 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004161 SKIP(9);
4162 return(XML_ATTRIBUTE_REQUIRED);
4163 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004164 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004165 SKIP(8);
4166 return(XML_ATTRIBUTE_IMPLIED);
4167 }
4168 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004169 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004170 SKIP(6);
4171 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004172 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004173 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4174 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004175 }
4176 SKIP_BLANKS;
4177 }
4178 ret = xmlParseAttValue(ctxt);
4179 ctxt->instate = XML_PARSER_DTD;
4180 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004181 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004182 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004183 } else
4184 *value = ret;
4185 return(val);
4186}
4187
4188/**
4189 * xmlParseNotationType:
4190 * @ctxt: an XML parser context
4191 *
4192 * parse an Notation attribute type.
4193 *
4194 * Note: the leading 'NOTATION' S part has already being parsed...
4195 *
4196 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4197 *
4198 * [ VC: Notation Attributes ]
4199 * Values of this type must match one of the notation names included
4200 * in the declaration; all notation names in the declaration must be declared.
4201 *
4202 * Returns: the notation attribute tree built while parsing
4203 */
4204
4205xmlEnumerationPtr
4206xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004207 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004208 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4209
4210 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 return(NULL);
4213 }
4214 SHRINK;
4215 do {
4216 NEXT;
4217 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004218 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004219 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4221 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004222 return(ret);
4223 }
4224 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004225 if (cur == NULL) return(ret);
4226 if (last == NULL) ret = last = cur;
4227 else {
4228 last->next = cur;
4229 last = cur;
4230 }
4231 SKIP_BLANKS;
4232 } while (RAW == '|');
4233 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004234 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 if ((last != NULL) && (last != ret))
4236 xmlFreeEnumeration(last);
4237 return(ret);
4238 }
4239 NEXT;
4240 return(ret);
4241}
4242
4243/**
4244 * xmlParseEnumerationType:
4245 * @ctxt: an XML parser context
4246 *
4247 * parse an Enumeration attribute type.
4248 *
4249 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4250 *
4251 * [ VC: Enumeration ]
4252 * Values of this type must match one of the Nmtoken tokens in
4253 * the declaration
4254 *
4255 * Returns: the enumeration attribute tree built while parsing
4256 */
4257
4258xmlEnumerationPtr
4259xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4260 xmlChar *name;
4261 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4262
4263 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 return(NULL);
4266 }
4267 SHRINK;
4268 do {
4269 NEXT;
4270 SKIP_BLANKS;
4271 name = xmlParseNmtoken(ctxt);
4272 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004273 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004274 return(ret);
4275 }
4276 cur = xmlCreateEnumeration(name);
4277 xmlFree(name);
4278 if (cur == NULL) return(ret);
4279 if (last == NULL) ret = last = cur;
4280 else {
4281 last->next = cur;
4282 last = cur;
4283 }
4284 SKIP_BLANKS;
4285 } while (RAW == '|');
4286 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 return(ret);
4289 }
4290 NEXT;
4291 return(ret);
4292}
4293
4294/**
4295 * xmlParseEnumeratedType:
4296 * @ctxt: an XML parser context
4297 * @tree: the enumeration tree built while parsing
4298 *
4299 * parse an Enumerated attribute type.
4300 *
4301 * [57] EnumeratedType ::= NotationType | Enumeration
4302 *
4303 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4304 *
4305 *
4306 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4307 */
4308
4309int
4310xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004311 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004312 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004313 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004314 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4315 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004316 return(0);
4317 }
4318 SKIP_BLANKS;
4319 *tree = xmlParseNotationType(ctxt);
4320 if (*tree == NULL) return(0);
4321 return(XML_ATTRIBUTE_NOTATION);
4322 }
4323 *tree = xmlParseEnumerationType(ctxt);
4324 if (*tree == NULL) return(0);
4325 return(XML_ATTRIBUTE_ENUMERATION);
4326}
4327
4328/**
4329 * xmlParseAttributeType:
4330 * @ctxt: an XML parser context
4331 * @tree: the enumeration tree built while parsing
4332 *
4333 * parse the Attribute list def for an element
4334 *
4335 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4336 *
4337 * [55] StringType ::= 'CDATA'
4338 *
4339 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4340 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4341 *
4342 * Validity constraints for attribute values syntax are checked in
4343 * xmlValidateAttributeValue()
4344 *
4345 * [ VC: ID ]
4346 * Values of type ID must match the Name production. A name must not
4347 * appear more than once in an XML document as a value of this type;
4348 * i.e., ID values must uniquely identify the elements which bear them.
4349 *
4350 * [ VC: One ID per Element Type ]
4351 * No element type may have more than one ID attribute specified.
4352 *
4353 * [ VC: ID Attribute Default ]
4354 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4355 *
4356 * [ VC: IDREF ]
4357 * Values of type IDREF must match the Name production, and values
4358 * of type IDREFS must match Names; each IDREF Name must match the value
4359 * of an ID attribute on some element in the XML document; i.e. IDREF
4360 * values must match the value of some ID attribute.
4361 *
4362 * [ VC: Entity Name ]
4363 * Values of type ENTITY must match the Name production, values
4364 * of type ENTITIES must match Names; each Entity Name must match the
4365 * name of an unparsed entity declared in the DTD.
4366 *
4367 * [ VC: Name Token ]
4368 * Values of type NMTOKEN must match the Nmtoken production; values
4369 * of type NMTOKENS must match Nmtokens.
4370 *
4371 * Returns the attribute type
4372 */
4373int
4374xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4375 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004376 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 SKIP(5);
4378 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004379 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004380 SKIP(6);
4381 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004382 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 SKIP(5);
4384 return(XML_ATTRIBUTE_IDREF);
4385 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4386 SKIP(2);
4387 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004388 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004389 SKIP(6);
4390 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004391 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004392 SKIP(8);
4393 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004394 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004395 SKIP(8);
4396 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004397 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004398 SKIP(7);
4399 return(XML_ATTRIBUTE_NMTOKEN);
4400 }
4401 return(xmlParseEnumeratedType(ctxt, tree));
4402}
4403
4404/**
4405 * xmlParseAttributeListDecl:
4406 * @ctxt: an XML parser context
4407 *
4408 * : parse the Attribute list def for an element
4409 *
4410 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4411 *
4412 * [53] AttDef ::= S Name S AttType S DefaultDecl
4413 *
4414 */
4415void
4416xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004417 const xmlChar *elemName;
4418 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004419 xmlEnumerationPtr tree;
4420
Daniel Veillarda07050d2003-10-19 14:46:32 +00004421 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004422 xmlParserInputPtr input = ctxt->input;
4423
4424 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004425 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004427 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004428 }
4429 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004430 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004432 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4433 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004434 return;
4435 }
4436 SKIP_BLANKS;
4437 GROW;
4438 while (RAW != '>') {
4439 const xmlChar *check = CUR_PTR;
4440 int type;
4441 int def;
4442 xmlChar *defaultValue = NULL;
4443
4444 GROW;
4445 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004446 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004448 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4449 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004450 break;
4451 }
4452 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004453 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004455 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004456 if (defaultValue != NULL)
4457 xmlFree(defaultValue);
4458 break;
4459 }
4460 SKIP_BLANKS;
4461
4462 type = xmlParseAttributeType(ctxt, &tree);
4463 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004464 if (defaultValue != NULL)
4465 xmlFree(defaultValue);
4466 break;
4467 }
4468
4469 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004470 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004471 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4472 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004473 if (defaultValue != NULL)
4474 xmlFree(defaultValue);
4475 if (tree != NULL)
4476 xmlFreeEnumeration(tree);
4477 break;
4478 }
4479 SKIP_BLANKS;
4480
4481 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4482 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004483 if (defaultValue != NULL)
4484 xmlFree(defaultValue);
4485 if (tree != NULL)
4486 xmlFreeEnumeration(tree);
4487 break;
4488 }
4489
4490 GROW;
4491 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004492 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004494 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004495 if (defaultValue != NULL)
4496 xmlFree(defaultValue);
4497 if (tree != NULL)
4498 xmlFreeEnumeration(tree);
4499 break;
4500 }
4501 SKIP_BLANKS;
4502 }
4503 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004504 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4505 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004506 if (defaultValue != NULL)
4507 xmlFree(defaultValue);
4508 if (tree != NULL)
4509 xmlFreeEnumeration(tree);
4510 break;
4511 }
4512 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4513 (ctxt->sax->attributeDecl != NULL))
4514 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4515 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004516 else if (tree != NULL)
4517 xmlFreeEnumeration(tree);
4518
4519 if ((ctxt->sax2) && (defaultValue != NULL) &&
4520 (def != XML_ATTRIBUTE_IMPLIED) &&
4521 (def != XML_ATTRIBUTE_REQUIRED)) {
4522 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4523 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004524 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4525 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4526 }
Owen Taylor3473f882001-02-23 17:55:21 +00004527 if (defaultValue != NULL)
4528 xmlFree(defaultValue);
4529 GROW;
4530 }
4531 if (RAW == '>') {
4532 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004533 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4534 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004535 }
4536 NEXT;
4537 }
Owen Taylor3473f882001-02-23 17:55:21 +00004538 }
4539}
4540
4541/**
4542 * xmlParseElementMixedContentDecl:
4543 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004544 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004545 *
4546 * parse the declaration for a Mixed Element content
4547 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4548 *
4549 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4550 * '(' S? '#PCDATA' S? ')'
4551 *
4552 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4553 *
4554 * [ VC: No Duplicate Types ]
4555 * The same name must not appear more than once in a single
4556 * mixed-content declaration.
4557 *
4558 * returns: the list of the xmlElementContentPtr describing the element choices
4559 */
4560xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004561xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004562 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004563 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004564
4565 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004566 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004567 SKIP(7);
4568 SKIP_BLANKS;
4569 SHRINK;
4570 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004571 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004572 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4573"Element content declaration doesn't start and stop in the same entity\n",
4574 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004575 }
Owen Taylor3473f882001-02-23 17:55:21 +00004576 NEXT;
4577 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4578 if (RAW == '*') {
4579 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4580 NEXT;
4581 }
4582 return(ret);
4583 }
4584 if ((RAW == '(') || (RAW == '|')) {
4585 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4586 if (ret == NULL) return(NULL);
4587 }
4588 while (RAW == '|') {
4589 NEXT;
4590 if (elem == NULL) {
4591 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4592 if (ret == NULL) return(NULL);
4593 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004594 if (cur != NULL)
4595 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004596 cur = ret;
4597 } else {
4598 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4599 if (n == NULL) return(NULL);
4600 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004601 if (n->c1 != NULL)
4602 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004603 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004604 if (n != NULL)
4605 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004607 }
4608 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004609 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004610 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004612 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004613 xmlFreeElementContent(cur);
4614 return(NULL);
4615 }
4616 SKIP_BLANKS;
4617 GROW;
4618 }
4619 if ((RAW == ')') && (NXT(1) == '*')) {
4620 if (elem != NULL) {
4621 cur->c2 = xmlNewElementContent(elem,
4622 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004623 if (cur->c2 != NULL)
4624 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004625 }
4626 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004627 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004628 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4629"Element content declaration doesn't start and stop in the same entity\n",
4630 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004631 }
Owen Taylor3473f882001-02-23 17:55:21 +00004632 SKIP(2);
4633 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004634 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004635 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 return(NULL);
4637 }
4638
4639 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004640 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 return(ret);
4643}
4644
4645/**
4646 * xmlParseElementChildrenContentDecl:
4647 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004648 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004649 *
4650 * parse the declaration for a Mixed Element content
4651 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4652 *
4653 *
4654 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4655 *
4656 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4657 *
4658 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4659 *
4660 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4661 *
4662 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4663 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004664 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004665 * opening or closing parentheses in a choice, seq, or Mixed
4666 * construct is contained in the replacement text for a parameter
4667 * entity, both must be contained in the same replacement text. For
4668 * interoperability, if a parameter-entity reference appears in a
4669 * choice, seq, or Mixed construct, its replacement text should not
4670 * be empty, and neither the first nor last non-blank character of
4671 * the replacement text should be a connector (| or ,).
4672 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004673 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004674 * hierarchy.
4675 */
4676xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004677xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004678 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004679 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004680 xmlChar type = 0;
4681
4682 SKIP_BLANKS;
4683 GROW;
4684 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004685 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686
Owen Taylor3473f882001-02-23 17:55:21 +00004687 /* Recurse on first child */
4688 NEXT;
4689 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004690 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 SKIP_BLANKS;
4692 GROW;
4693 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004694 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 return(NULL);
4698 }
4699 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004700 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004701 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004702 return(NULL);
4703 }
Owen Taylor3473f882001-02-23 17:55:21 +00004704 GROW;
4705 if (RAW == '?') {
4706 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4707 NEXT;
4708 } else if (RAW == '*') {
4709 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4710 NEXT;
4711 } else if (RAW == '+') {
4712 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4713 NEXT;
4714 } else {
4715 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4716 }
Owen Taylor3473f882001-02-23 17:55:21 +00004717 GROW;
4718 }
4719 SKIP_BLANKS;
4720 SHRINK;
4721 while (RAW != ')') {
4722 /*
4723 * Each loop we parse one separator and one element.
4724 */
4725 if (RAW == ',') {
4726 if (type == 0) type = CUR;
4727
4728 /*
4729 * Detect "Name | Name , Name" error
4730 */
4731 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004732 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004733 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004734 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004735 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004736 xmlFreeElementContent(last);
4737 if (ret != NULL)
4738 xmlFreeElementContent(ret);
4739 return(NULL);
4740 }
4741 NEXT;
4742
4743 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4744 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004745 if ((last != NULL) && (last != ret))
4746 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004747 xmlFreeElementContent(ret);
4748 return(NULL);
4749 }
4750 if (last == NULL) {
4751 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004752 if (ret != NULL)
4753 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004754 ret = cur = op;
4755 } else {
4756 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004757 if (op != NULL)
4758 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004759 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004760 if (last != NULL)
4761 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004762 cur =op;
4763 last = NULL;
4764 }
4765 } else if (RAW == '|') {
4766 if (type == 0) type = CUR;
4767
4768 /*
4769 * Detect "Name , Name | Name" error
4770 */
4771 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004772 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004773 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004774 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004775 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004776 xmlFreeElementContent(last);
4777 if (ret != NULL)
4778 xmlFreeElementContent(ret);
4779 return(NULL);
4780 }
4781 NEXT;
4782
4783 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4784 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004785 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004786 xmlFreeElementContent(last);
4787 if (ret != NULL)
4788 xmlFreeElementContent(ret);
4789 return(NULL);
4790 }
4791 if (last == NULL) {
4792 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004793 if (ret != NULL)
4794 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004795 ret = cur = op;
4796 } else {
4797 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004798 if (op != NULL)
4799 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004800 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004801 if (last != NULL)
4802 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004803 cur =op;
4804 last = NULL;
4805 }
4806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004807 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (ret != NULL)
4809 xmlFreeElementContent(ret);
4810 return(NULL);
4811 }
4812 GROW;
4813 SKIP_BLANKS;
4814 GROW;
4815 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004816 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004817 /* Recurse on second child */
4818 NEXT;
4819 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004820 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 SKIP_BLANKS;
4822 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004823 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004824 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004825 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004826 if (ret != NULL)
4827 xmlFreeElementContent(ret);
4828 return(NULL);
4829 }
4830 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 if (RAW == '?') {
4832 last->ocur = XML_ELEMENT_CONTENT_OPT;
4833 NEXT;
4834 } else if (RAW == '*') {
4835 last->ocur = XML_ELEMENT_CONTENT_MULT;
4836 NEXT;
4837 } else if (RAW == '+') {
4838 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4839 NEXT;
4840 } else {
4841 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4842 }
4843 }
4844 SKIP_BLANKS;
4845 GROW;
4846 }
4847 if ((cur != NULL) && (last != NULL)) {
4848 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004849 if (last != NULL)
4850 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004851 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004852 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004853 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4854"Element content declaration doesn't start and stop in the same entity\n",
4855 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004856 }
Owen Taylor3473f882001-02-23 17:55:21 +00004857 NEXT;
4858 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004859 if (ret != NULL) {
4860 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4861 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4862 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4863 else
4864 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4865 }
Owen Taylor3473f882001-02-23 17:55:21 +00004866 NEXT;
4867 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004868 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004869 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004870 cur = ret;
4871 /*
4872 * Some normalization:
4873 * (a | b* | c?)* == (a | b | c)*
4874 */
4875 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4876 if ((cur->c1 != NULL) &&
4877 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4878 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4879 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4880 if ((cur->c2 != NULL) &&
4881 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4882 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4883 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4884 cur = cur->c2;
4885 }
4886 }
Owen Taylor3473f882001-02-23 17:55:21 +00004887 NEXT;
4888 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004889 if (ret != NULL) {
4890 int found = 0;
4891
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004892 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4893 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4894 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004895 else
4896 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004897 /*
4898 * Some normalization:
4899 * (a | b*)+ == (a | b)*
4900 * (a | b?)+ == (a | b)*
4901 */
4902 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4903 if ((cur->c1 != NULL) &&
4904 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4905 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4906 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4907 found = 1;
4908 }
4909 if ((cur->c2 != NULL) &&
4910 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4911 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4912 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4913 found = 1;
4914 }
4915 cur = cur->c2;
4916 }
4917 if (found)
4918 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4919 }
Owen Taylor3473f882001-02-23 17:55:21 +00004920 NEXT;
4921 }
4922 return(ret);
4923}
4924
4925/**
4926 * xmlParseElementContentDecl:
4927 * @ctxt: an XML parser context
4928 * @name: the name of the element being defined.
4929 * @result: the Element Content pointer will be stored here if any
4930 *
4931 * parse the declaration for an Element content either Mixed or Children,
4932 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4933 *
4934 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4935 *
4936 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4937 */
4938
4939int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004940xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004941 xmlElementContentPtr *result) {
4942
4943 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004944 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004945 int res;
4946
4947 *result = NULL;
4948
4949 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004950 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004951 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004952 return(-1);
4953 }
4954 NEXT;
4955 GROW;
4956 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004957 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004958 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004959 res = XML_ELEMENT_TYPE_MIXED;
4960 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004961 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 res = XML_ELEMENT_TYPE_ELEMENT;
4963 }
Owen Taylor3473f882001-02-23 17:55:21 +00004964 SKIP_BLANKS;
4965 *result = tree;
4966 return(res);
4967}
4968
4969/**
4970 * xmlParseElementDecl:
4971 * @ctxt: an XML parser context
4972 *
4973 * parse an Element declaration.
4974 *
4975 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4976 *
4977 * [ VC: Unique Element Type Declaration ]
4978 * No element type may be declared more than once
4979 *
4980 * Returns the type of the element, or -1 in case of error
4981 */
4982int
4983xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004984 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004985 int ret = -1;
4986 xmlElementContentPtr content = NULL;
4987
4988 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004989 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004990 xmlParserInputPtr input = ctxt->input;
4991
4992 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004993 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4995 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004998 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5001 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 return(-1);
5003 }
5004 while ((RAW == 0) && (ctxt->inputNr > 1))
5005 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005006 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5008 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005009 }
5010 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005011 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005012 SKIP(5);
5013 /*
5014 * Element must always be empty.
5015 */
5016 ret = XML_ELEMENT_TYPE_EMPTY;
5017 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5018 (NXT(2) == 'Y')) {
5019 SKIP(3);
5020 /*
5021 * Element is a generic container.
5022 */
5023 ret = XML_ELEMENT_TYPE_ANY;
5024 } else if (RAW == '(') {
5025 ret = xmlParseElementContentDecl(ctxt, name, &content);
5026 } else {
5027 /*
5028 * [ WFC: PEs in Internal Subset ] error handling.
5029 */
5030 if ((RAW == '%') && (ctxt->external == 0) &&
5031 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005032 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005033 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005034 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005035 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005036 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5037 }
Owen Taylor3473f882001-02-23 17:55:21 +00005038 return(-1);
5039 }
5040
5041 SKIP_BLANKS;
5042 /*
5043 * Pop-up of finished entities.
5044 */
5045 while ((RAW == 0) && (ctxt->inputNr > 1))
5046 xmlPopInput(ctxt);
5047 SKIP_BLANKS;
5048
5049 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005050 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005051 } else {
5052 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005053 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5054 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005055 }
5056
5057 NEXT;
5058 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5059 (ctxt->sax->elementDecl != NULL))
5060 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5061 content);
5062 }
5063 if (content != NULL) {
5064 xmlFreeElementContent(content);
5065 }
Owen Taylor3473f882001-02-23 17:55:21 +00005066 }
5067 return(ret);
5068}
5069
5070/**
Owen Taylor3473f882001-02-23 17:55:21 +00005071 * xmlParseConditionalSections
5072 * @ctxt: an XML parser context
5073 *
5074 * [61] conditionalSect ::= includeSect | ignoreSect
5075 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5076 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5077 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5078 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5079 */
5080
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005081static void
Owen Taylor3473f882001-02-23 17:55:21 +00005082xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5083 SKIP(3);
5084 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005085 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005086 SKIP(7);
5087 SKIP_BLANKS;
5088 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005089 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005090 } else {
5091 NEXT;
5092 }
5093 if (xmlParserDebugEntities) {
5094 if ((ctxt->input != NULL) && (ctxt->input->filename))
5095 xmlGenericError(xmlGenericErrorContext,
5096 "%s(%d): ", ctxt->input->filename,
5097 ctxt->input->line);
5098 xmlGenericError(xmlGenericErrorContext,
5099 "Entering INCLUDE Conditional Section\n");
5100 }
5101
5102 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5103 (NXT(2) != '>'))) {
5104 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005105 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005106
5107 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5108 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005109 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005110 NEXT;
5111 } else if (RAW == '%') {
5112 xmlParsePEReference(ctxt);
5113 } else
5114 xmlParseMarkupDecl(ctxt);
5115
5116 /*
5117 * Pop-up of finished entities.
5118 */
5119 while ((RAW == 0) && (ctxt->inputNr > 1))
5120 xmlPopInput(ctxt);
5121
Daniel Veillardfdc91562002-07-01 21:52:03 +00005122 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005123 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005124 break;
5125 }
5126 }
5127 if (xmlParserDebugEntities) {
5128 if ((ctxt->input != NULL) && (ctxt->input->filename))
5129 xmlGenericError(xmlGenericErrorContext,
5130 "%s(%d): ", ctxt->input->filename,
5131 ctxt->input->line);
5132 xmlGenericError(xmlGenericErrorContext,
5133 "Leaving INCLUDE Conditional Section\n");
5134 }
5135
Daniel Veillarda07050d2003-10-19 14:46:32 +00005136 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005137 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005138 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005139 int depth = 0;
5140
5141 SKIP(6);
5142 SKIP_BLANKS;
5143 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005144 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 } else {
5146 NEXT;
5147 }
5148 if (xmlParserDebugEntities) {
5149 if ((ctxt->input != NULL) && (ctxt->input->filename))
5150 xmlGenericError(xmlGenericErrorContext,
5151 "%s(%d): ", ctxt->input->filename,
5152 ctxt->input->line);
5153 xmlGenericError(xmlGenericErrorContext,
5154 "Entering IGNORE Conditional Section\n");
5155 }
5156
5157 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005158 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005159 * But disable SAX event generating DTD building in the meantime
5160 */
5161 state = ctxt->disableSAX;
5162 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 ctxt->instate = XML_PARSER_IGNORE;
5165
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005166 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005167 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5168 depth++;
5169 SKIP(3);
5170 continue;
5171 }
5172 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5173 if (--depth >= 0) SKIP(3);
5174 continue;
5175 }
5176 NEXT;
5177 continue;
5178 }
5179
5180 ctxt->disableSAX = state;
5181 ctxt->instate = instate;
5182
5183 if (xmlParserDebugEntities) {
5184 if ((ctxt->input != NULL) && (ctxt->input->filename))
5185 xmlGenericError(xmlGenericErrorContext,
5186 "%s(%d): ", ctxt->input->filename,
5187 ctxt->input->line);
5188 xmlGenericError(xmlGenericErrorContext,
5189 "Leaving IGNORE Conditional Section\n");
5190 }
5191
5192 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005193 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 }
5195
5196 if (RAW == 0)
5197 SHRINK;
5198
5199 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005200 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005201 } else {
5202 SKIP(3);
5203 }
5204}
5205
5206/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005207 * xmlParseMarkupDecl:
5208 * @ctxt: an XML parser context
5209 *
5210 * parse Markup declarations
5211 *
5212 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5213 * NotationDecl | PI | Comment
5214 *
5215 * [ VC: Proper Declaration/PE Nesting ]
5216 * Parameter-entity replacement text must be properly nested with
5217 * markup declarations. That is to say, if either the first character
5218 * or the last character of a markup declaration (markupdecl above) is
5219 * contained in the replacement text for a parameter-entity reference,
5220 * both must be contained in the same replacement text.
5221 *
5222 * [ WFC: PEs in Internal Subset ]
5223 * In the internal DTD subset, parameter-entity references can occur
5224 * only where markup declarations can occur, not within markup declarations.
5225 * (This does not apply to references that occur in external parameter
5226 * entities or to the external subset.)
5227 */
5228void
5229xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5230 GROW;
5231 xmlParseElementDecl(ctxt);
5232 xmlParseAttributeListDecl(ctxt);
5233 xmlParseEntityDecl(ctxt);
5234 xmlParseNotationDecl(ctxt);
5235 xmlParsePI(ctxt);
5236 xmlParseComment(ctxt);
5237 /*
5238 * This is only for internal subset. On external entities,
5239 * the replacement is done before parsing stage
5240 */
5241 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5242 xmlParsePEReference(ctxt);
5243
5244 /*
5245 * Conditional sections are allowed from entities included
5246 * by PE References in the internal subset.
5247 */
5248 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5249 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5250 xmlParseConditionalSections(ctxt);
5251 }
5252 }
5253
5254 ctxt->instate = XML_PARSER_DTD;
5255}
5256
5257/**
5258 * xmlParseTextDecl:
5259 * @ctxt: an XML parser context
5260 *
5261 * parse an XML declaration header for external entities
5262 *
5263 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5264 *
5265 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5266 */
5267
5268void
5269xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5270 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005271 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005272
5273 /*
5274 * We know that '<?xml' is here.
5275 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005276 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277 SKIP(5);
5278 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005279 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005280 return;
5281 }
5282
William M. Brack76e95df2003-10-18 16:20:14 +00005283 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005284 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5285 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005286 }
5287 SKIP_BLANKS;
5288
5289 /*
5290 * We may have the VersionInfo here.
5291 */
5292 version = xmlParseVersionInfo(ctxt);
5293 if (version == NULL)
5294 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005295 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005299 }
5300 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005301 ctxt->input->version = version;
5302
5303 /*
5304 * We must have the encoding declaration
5305 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005306 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005307 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5308 /*
5309 * The XML REC instructs us to stop parsing right here
5310 */
5311 return;
5312 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005313 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5314 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5315 "Missing encoding in text declaration\n");
5316 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005317
5318 SKIP_BLANKS;
5319 if ((RAW == '?') && (NXT(1) == '>')) {
5320 SKIP(2);
5321 } else if (RAW == '>') {
5322 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005323 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005324 NEXT;
5325 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005326 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005327 MOVETO_ENDTAG(CUR_PTR);
5328 NEXT;
5329 }
5330}
5331
5332/**
Owen Taylor3473f882001-02-23 17:55:21 +00005333 * xmlParseExternalSubset:
5334 * @ctxt: an XML parser context
5335 * @ExternalID: the external identifier
5336 * @SystemID: the system identifier (or URL)
5337 *
5338 * parse Markup declarations from an external subset
5339 *
5340 * [30] extSubset ::= textDecl? extSubsetDecl
5341 *
5342 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5343 */
5344void
5345xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5346 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005347 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005348 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005349 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005350 xmlParseTextDecl(ctxt);
5351 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5352 /*
5353 * The XML REC instructs us to stop parsing right here
5354 */
5355 ctxt->instate = XML_PARSER_EOF;
5356 return;
5357 }
5358 }
5359 if (ctxt->myDoc == NULL) {
5360 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5361 }
5362 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5363 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5364
5365 ctxt->instate = XML_PARSER_DTD;
5366 ctxt->external = 1;
5367 while (((RAW == '<') && (NXT(1) == '?')) ||
5368 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005369 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005370 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005371 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005372
5373 GROW;
5374 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5375 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005376 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005377 NEXT;
5378 } else if (RAW == '%') {
5379 xmlParsePEReference(ctxt);
5380 } else
5381 xmlParseMarkupDecl(ctxt);
5382
5383 /*
5384 * Pop-up of finished entities.
5385 */
5386 while ((RAW == 0) && (ctxt->inputNr > 1))
5387 xmlPopInput(ctxt);
5388
Daniel Veillardfdc91562002-07-01 21:52:03 +00005389 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005390 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005391 break;
5392 }
5393 }
5394
5395 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005396 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005397 }
5398
5399}
5400
5401/**
5402 * xmlParseReference:
5403 * @ctxt: an XML parser context
5404 *
5405 * parse and handle entity references in content, depending on the SAX
5406 * interface, this may end-up in a call to character() if this is a
5407 * CharRef, a predefined entity, if there is no reference() callback.
5408 * or if the parser was asked to switch to that mode.
5409 *
5410 * [67] Reference ::= EntityRef | CharRef
5411 */
5412void
5413xmlParseReference(xmlParserCtxtPtr ctxt) {
5414 xmlEntityPtr ent;
5415 xmlChar *val;
5416 if (RAW != '&') return;
5417
5418 if (NXT(1) == '#') {
5419 int i = 0;
5420 xmlChar out[10];
5421 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005422 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005423
5424 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5425 /*
5426 * So we are using non-UTF-8 buffers
5427 * Check that the char fit on 8bits, if not
5428 * generate a CharRef.
5429 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005430 if (value <= 0xFF) {
5431 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005432 out[1] = 0;
5433 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5434 (!ctxt->disableSAX))
5435 ctxt->sax->characters(ctxt->userData, out, 1);
5436 } else {
5437 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005438 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005439 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005440 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005441 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5442 (!ctxt->disableSAX))
5443 ctxt->sax->reference(ctxt->userData, out);
5444 }
5445 } else {
5446 /*
5447 * Just encode the value in UTF-8
5448 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005449 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005450 out[i] = 0;
5451 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5452 (!ctxt->disableSAX))
5453 ctxt->sax->characters(ctxt->userData, out, i);
5454 }
5455 } else {
5456 ent = xmlParseEntityRef(ctxt);
5457 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005458 if (!ctxt->wellFormed)
5459 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if ((ent->name != NULL) &&
5461 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5462 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005463 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005464
5465
5466 /*
5467 * The first reference to the entity trigger a parsing phase
5468 * where the ent->children is filled with the result from
5469 * the parsing.
5470 */
5471 if (ent->children == NULL) {
5472 xmlChar *value;
5473 value = ent->content;
5474
5475 /*
5476 * Check that this entity is well formed
5477 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005478 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005479 (value[1] == 0) && (value[0] == '<') &&
5480 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5481 /*
5482 * DONE: get definite answer on this !!!
5483 * Lots of entity decls are used to declare a single
5484 * char
5485 * <!ENTITY lt "<">
5486 * Which seems to be valid since
5487 * 2.4: The ampersand character (&) and the left angle
5488 * bracket (<) may appear in their literal form only
5489 * when used ... They are also legal within the literal
5490 * entity value of an internal entity declaration;i
5491 * see "4.3.2 Well-Formed Parsed Entities".
5492 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5493 * Looking at the OASIS test suite and James Clark
5494 * tests, this is broken. However the XML REC uses
5495 * it. Is the XML REC not well-formed ????
5496 * This is a hack to avoid this problem
5497 *
5498 * ANSWER: since lt gt amp .. are already defined,
5499 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005500 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005501 * is lousy but acceptable.
5502 */
5503 list = xmlNewDocText(ctxt->myDoc, value);
5504 if (list != NULL) {
5505 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5506 (ent->children == NULL)) {
5507 ent->children = list;
5508 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005509 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005510 list->parent = (xmlNodePtr) ent;
5511 } else {
5512 xmlFreeNodeList(list);
5513 }
5514 } else if (list != NULL) {
5515 xmlFreeNodeList(list);
5516 }
5517 } else {
5518 /*
5519 * 4.3.2: An internal general parsed entity is well-formed
5520 * if its replacement text matches the production labeled
5521 * content.
5522 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005523
5524 void *user_data;
5525 /*
5526 * This is a bit hackish but this seems the best
5527 * way to make sure both SAX and DOM entity support
5528 * behaves okay.
5529 */
5530 if (ctxt->userData == ctxt)
5531 user_data = NULL;
5532 else
5533 user_data = ctxt->userData;
5534
Owen Taylor3473f882001-02-23 17:55:21 +00005535 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5536 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005537 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5538 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005539 ctxt->depth--;
5540 } else if (ent->etype ==
5541 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5542 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005543 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005544 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005545 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005546 ctxt->depth--;
5547 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005548 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005549 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5550 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 }
5552 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005553 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005554 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005555 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005556 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5557 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005558 (ent->children == NULL)) {
5559 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005560 if (ctxt->replaceEntities) {
5561 /*
5562 * Prune it directly in the generated document
5563 * except for single text nodes.
5564 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005565 if (((list->type == XML_TEXT_NODE) &&
5566 (list->next == NULL)) ||
5567 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005568 list->parent = (xmlNodePtr) ent;
5569 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005570 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005571 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005572 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005573 while (list != NULL) {
5574 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005575 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 if (list->next == NULL)
5577 ent->last = list;
5578 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005579 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005580 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005581#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005582 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5583 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005584#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 }
5586 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005587 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 while (list != NULL) {
5589 list->parent = (xmlNodePtr) ent;
5590 if (list->next == NULL)
5591 ent->last = list;
5592 list = list->next;
5593 }
Owen Taylor3473f882001-02-23 17:55:21 +00005594 }
5595 } else {
5596 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005597 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005598 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005599 } else if ((ret != XML_ERR_OK) &&
5600 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005601 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 } else if (list != NULL) {
5603 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005604 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005605 }
5606 }
5607 }
5608 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5609 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5610 /*
5611 * Create a node.
5612 */
5613 ctxt->sax->reference(ctxt->userData, ent->name);
5614 return;
5615 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005616 /*
5617 * There is a problem on the handling of _private for entities
5618 * (bug 155816): Should we copy the content of the field from
5619 * the entity (possibly overwriting some value set by the user
5620 * when a copy is created), should we leave it alone, or should
5621 * we try to take care of different situations? The problem
5622 * is exacerbated by the usage of this field by the xmlReader.
5623 * To fix this bug, we look at _private on the created node
5624 * and, if it's NULL, we copy in whatever was in the entity.
5625 * If it's not NULL we leave it alone. This is somewhat of a
5626 * hack - maybe we should have further tests to determine
5627 * what to do.
5628 */
Owen Taylor3473f882001-02-23 17:55:21 +00005629 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5630 /*
5631 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005632 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005633 * In the first occurrence list contains the replacement.
5634 * progressive == 2 means we are operating on the Reader
5635 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005636 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005637 if (((list == NULL) && (ent->owner == 0)) ||
5638 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005639 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005640
5641 /*
5642 * when operating on a reader, the entities definitions
5643 * are always owning the entities subtree.
5644 if (ctxt->parseMode == XML_PARSE_READER)
5645 ent->owner = 1;
5646 */
5647
Daniel Veillard62f313b2001-07-04 19:49:14 +00005648 cur = ent->children;
5649 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005650 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005651 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005652 if (nw->_private == NULL)
5653 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005654 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005655 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005656 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005657 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005658 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005659 if (cur == ent->last) {
5660 /*
5661 * needed to detect some strange empty
5662 * node cases in the reader tests
5663 */
5664 if ((ctxt->parseMode == XML_PARSE_READER) &&
5665 (nw->type == XML_ELEMENT_NODE) &&
5666 (nw->children == NULL))
5667 nw->extra = 1;
5668
Daniel Veillard62f313b2001-07-04 19:49:14 +00005669 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005670 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005671 cur = cur->next;
5672 }
Daniel Veillard81273902003-09-30 00:43:48 +00005673#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005674 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005675 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005676#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005677 } else if (list == NULL) {
5678 xmlNodePtr nw = NULL, cur, next, last,
5679 firstChild = NULL;
5680 /*
5681 * Copy the entity child list and make it the new
5682 * entity child list. The goal is to make sure any
5683 * ID or REF referenced will be the one from the
5684 * document content and not the entity copy.
5685 */
5686 cur = ent->children;
5687 ent->children = NULL;
5688 last = ent->last;
5689 ent->last = NULL;
5690 while (cur != NULL) {
5691 next = cur->next;
5692 cur->next = NULL;
5693 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005694 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005695 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005696 if (nw->_private == NULL)
5697 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005698 if (firstChild == NULL){
5699 firstChild = cur;
5700 }
5701 xmlAddChild((xmlNodePtr) ent, nw);
5702 xmlAddChild(ctxt->node, cur);
5703 }
5704 if (cur == last)
5705 break;
5706 cur = next;
5707 }
5708 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005709#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005710 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5711 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005712#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005713 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005714 const xmlChar *nbktext;
5715
Daniel Veillard62f313b2001-07-04 19:49:14 +00005716 /*
5717 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005718 * node with a possible previous text one which
5719 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005720 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005721 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5722 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005723 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005724 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005725 if ((ent->last != ent->children) &&
5726 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005727 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005728 xmlAddChildList(ctxt->node, ent->children);
5729 }
5730
Owen Taylor3473f882001-02-23 17:55:21 +00005731 /*
5732 * This is to avoid a nasty side effect, see
5733 * characters() in SAX.c
5734 */
5735 ctxt->nodemem = 0;
5736 ctxt->nodelen = 0;
5737 return;
5738 } else {
5739 /*
5740 * Probably running in SAX mode
5741 */
5742 xmlParserInputPtr input;
5743
5744 input = xmlNewEntityInputStream(ctxt, ent);
5745 xmlPushInput(ctxt, input);
5746 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005747 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5748 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005749 xmlParseTextDecl(ctxt);
5750 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5751 /*
5752 * The XML REC instructs us to stop parsing right here
5753 */
5754 ctxt->instate = XML_PARSER_EOF;
5755 return;
5756 }
5757 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005758 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5759 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 }
5761 }
5762 return;
5763 }
5764 }
5765 } else {
5766 val = ent->content;
5767 if (val == NULL) return;
5768 /*
5769 * inline the entity.
5770 */
5771 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5772 (!ctxt->disableSAX))
5773 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5774 }
5775 }
5776}
5777
5778/**
5779 * xmlParseEntityRef:
5780 * @ctxt: an XML parser context
5781 *
5782 * parse ENTITY references declarations
5783 *
5784 * [68] EntityRef ::= '&' Name ';'
5785 *
5786 * [ WFC: Entity Declared ]
5787 * In a document without any DTD, a document with only an internal DTD
5788 * subset which contains no parameter entity references, or a document
5789 * with "standalone='yes'", the Name given in the entity reference
5790 * must match that in an entity declaration, except that well-formed
5791 * documents need not declare any of the following entities: amp, lt,
5792 * gt, apos, quot. The declaration of a parameter entity must precede
5793 * any reference to it. Similarly, the declaration of a general entity
5794 * must precede any reference to it which appears in a default value in an
5795 * attribute-list declaration. Note that if entities are declared in the
5796 * external subset or in external parameter entities, a non-validating
5797 * processor is not obligated to read and process their declarations;
5798 * for such documents, the rule that an entity must be declared is a
5799 * well-formedness constraint only if standalone='yes'.
5800 *
5801 * [ WFC: Parsed Entity ]
5802 * An entity reference must not contain the name of an unparsed entity
5803 *
5804 * Returns the xmlEntityPtr if found, or NULL otherwise.
5805 */
5806xmlEntityPtr
5807xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005808 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005809 xmlEntityPtr ent = NULL;
5810
5811 GROW;
5812
5813 if (RAW == '&') {
5814 NEXT;
5815 name = xmlParseName(ctxt);
5816 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005817 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5818 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005819 } else {
5820 if (RAW == ';') {
5821 NEXT;
5822 /*
5823 * Ask first SAX for entity resolution, otherwise try the
5824 * predefined set.
5825 */
5826 if (ctxt->sax != NULL) {
5827 if (ctxt->sax->getEntity != NULL)
5828 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005829 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005830 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005831 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5832 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005833 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005834 }
Owen Taylor3473f882001-02-23 17:55:21 +00005835 }
5836 /*
5837 * [ WFC: Entity Declared ]
5838 * In a document without any DTD, a document with only an
5839 * internal DTD subset which contains no parameter entity
5840 * references, or a document with "standalone='yes'", the
5841 * Name given in the entity reference must match that in an
5842 * entity declaration, except that well-formed documents
5843 * need not declare any of the following entities: amp, lt,
5844 * gt, apos, quot.
5845 * The declaration of a parameter entity must precede any
5846 * reference to it.
5847 * Similarly, the declaration of a general entity must
5848 * precede any reference to it which appears in a default
5849 * value in an attribute-list declaration. Note that if
5850 * entities are declared in the external subset or in
5851 * external parameter entities, a non-validating processor
5852 * is not obligated to read and process their declarations;
5853 * for such documents, the rule that an entity must be
5854 * declared is a well-formedness constraint only if
5855 * standalone='yes'.
5856 */
5857 if (ent == NULL) {
5858 if ((ctxt->standalone == 1) ||
5859 ((ctxt->hasExternalSubset == 0) &&
5860 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005861 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005862 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005863 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005864 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005865 "Entity '%s' not defined\n", name);
5866 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005867 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005868 }
5869
5870 /*
5871 * [ WFC: Parsed Entity ]
5872 * An entity reference must not contain the name of an
5873 * unparsed entity
5874 */
5875 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005876 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005877 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 }
5879
5880 /*
5881 * [ WFC: No External Entity References ]
5882 * Attribute values cannot contain direct or indirect
5883 * entity references to external entities.
5884 */
5885 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5886 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5888 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 }
5890 /*
5891 * [ WFC: No < in Attribute Values ]
5892 * The replacement text of any entity referred to directly or
5893 * indirectly in an attribute value (other than "&lt;") must
5894 * not contain a <.
5895 */
5896 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5897 (ent != NULL) &&
5898 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5899 (ent->content != NULL) &&
5900 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005901 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005902 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005903 }
5904
5905 /*
5906 * Internal check, no parameter entities here ...
5907 */
5908 else {
5909 switch (ent->etype) {
5910 case XML_INTERNAL_PARAMETER_ENTITY:
5911 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005912 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5913 "Attempt to reference the parameter entity '%s'\n",
5914 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005915 break;
5916 default:
5917 break;
5918 }
5919 }
5920
5921 /*
5922 * [ WFC: No Recursion ]
5923 * A parsed entity must not contain a recursive reference
5924 * to itself, either directly or indirectly.
5925 * Done somewhere else
5926 */
5927
5928 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005929 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005930 }
Owen Taylor3473f882001-02-23 17:55:21 +00005931 }
5932 }
5933 return(ent);
5934}
5935
5936/**
5937 * xmlParseStringEntityRef:
5938 * @ctxt: an XML parser context
5939 * @str: a pointer to an index in the string
5940 *
5941 * parse ENTITY references declarations, but this version parses it from
5942 * a string value.
5943 *
5944 * [68] EntityRef ::= '&' Name ';'
5945 *
5946 * [ WFC: Entity Declared ]
5947 * In a document without any DTD, a document with only an internal DTD
5948 * subset which contains no parameter entity references, or a document
5949 * with "standalone='yes'", the Name given in the entity reference
5950 * must match that in an entity declaration, except that well-formed
5951 * documents need not declare any of the following entities: amp, lt,
5952 * gt, apos, quot. The declaration of a parameter entity must precede
5953 * any reference to it. Similarly, the declaration of a general entity
5954 * must precede any reference to it which appears in a default value in an
5955 * attribute-list declaration. Note that if entities are declared in the
5956 * external subset or in external parameter entities, a non-validating
5957 * processor is not obligated to read and process their declarations;
5958 * for such documents, the rule that an entity must be declared is a
5959 * well-formedness constraint only if standalone='yes'.
5960 *
5961 * [ WFC: Parsed Entity ]
5962 * An entity reference must not contain the name of an unparsed entity
5963 *
5964 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5965 * is updated to the current location in the string.
5966 */
5967xmlEntityPtr
5968xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5969 xmlChar *name;
5970 const xmlChar *ptr;
5971 xmlChar cur;
5972 xmlEntityPtr ent = NULL;
5973
5974 if ((str == NULL) || (*str == NULL))
5975 return(NULL);
5976 ptr = *str;
5977 cur = *ptr;
5978 if (cur == '&') {
5979 ptr++;
5980 cur = *ptr;
5981 name = xmlParseStringName(ctxt, &ptr);
5982 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005983 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005985 } else {
5986 if (*ptr == ';') {
5987 ptr++;
5988 /*
5989 * Ask first SAX for entity resolution, otherwise try the
5990 * predefined set.
5991 */
5992 if (ctxt->sax != NULL) {
5993 if (ctxt->sax->getEntity != NULL)
5994 ent = ctxt->sax->getEntity(ctxt->userData, name);
5995 if (ent == NULL)
5996 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005997 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005998 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005999 }
Owen Taylor3473f882001-02-23 17:55:21 +00006000 }
6001 /*
6002 * [ WFC: Entity Declared ]
6003 * In a document without any DTD, a document with only an
6004 * internal DTD subset which contains no parameter entity
6005 * references, or a document with "standalone='yes'", the
6006 * Name given in the entity reference must match that in an
6007 * entity declaration, except that well-formed documents
6008 * need not declare any of the following entities: amp, lt,
6009 * gt, apos, quot.
6010 * The declaration of a parameter entity must precede any
6011 * reference to it.
6012 * Similarly, the declaration of a general entity must
6013 * precede any reference to it which appears in a default
6014 * value in an attribute-list declaration. Note that if
6015 * entities are declared in the external subset or in
6016 * external parameter entities, a non-validating processor
6017 * is not obligated to read and process their declarations;
6018 * for such documents, the rule that an entity must be
6019 * declared is a well-formedness constraint only if
6020 * standalone='yes'.
6021 */
6022 if (ent == NULL) {
6023 if ((ctxt->standalone == 1) ||
6024 ((ctxt->hasExternalSubset == 0) &&
6025 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006026 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006027 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006028 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006029 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006030 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006031 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006032 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006033 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006034 }
6035
6036 /*
6037 * [ WFC: Parsed Entity ]
6038 * An entity reference must not contain the name of an
6039 * unparsed entity
6040 */
6041 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006042 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006043 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 }
6045
6046 /*
6047 * [ WFC: No External Entity References ]
6048 * Attribute values cannot contain direct or indirect
6049 * entity references to external entities.
6050 */
6051 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6052 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006053 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006054 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006055 }
6056 /*
6057 * [ WFC: No < in Attribute Values ]
6058 * The replacement text of any entity referred to directly or
6059 * indirectly in an attribute value (other than "&lt;") must
6060 * not contain a <.
6061 */
6062 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6063 (ent != NULL) &&
6064 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6065 (ent->content != NULL) &&
6066 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006067 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6068 "'<' in entity '%s' is not allowed in attributes values\n",
6069 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 }
6071
6072 /*
6073 * Internal check, no parameter entities here ...
6074 */
6075 else {
6076 switch (ent->etype) {
6077 case XML_INTERNAL_PARAMETER_ENTITY:
6078 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006079 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6080 "Attempt to reference the parameter entity '%s'\n",
6081 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 break;
6083 default:
6084 break;
6085 }
6086 }
6087
6088 /*
6089 * [ WFC: No Recursion ]
6090 * A parsed entity must not contain a recursive reference
6091 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006092 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006093 */
6094
6095 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
6098 xmlFree(name);
6099 }
6100 }
6101 *str = ptr;
6102 return(ent);
6103}
6104
6105/**
6106 * xmlParsePEReference:
6107 * @ctxt: an XML parser context
6108 *
6109 * parse PEReference declarations
6110 * The entity content is handled directly by pushing it's content as
6111 * a new input stream.
6112 *
6113 * [69] PEReference ::= '%' Name ';'
6114 *
6115 * [ WFC: No Recursion ]
6116 * A parsed entity must not contain a recursive
6117 * reference to itself, either directly or indirectly.
6118 *
6119 * [ WFC: Entity Declared ]
6120 * In a document without any DTD, a document with only an internal DTD
6121 * subset which contains no parameter entity references, or a document
6122 * with "standalone='yes'", ... ... The declaration of a parameter
6123 * entity must precede any reference to it...
6124 *
6125 * [ VC: Entity Declared ]
6126 * In a document with an external subset or external parameter entities
6127 * with "standalone='no'", ... ... The declaration of a parameter entity
6128 * must precede any reference to it...
6129 *
6130 * [ WFC: In DTD ]
6131 * Parameter-entity references may only appear in the DTD.
6132 * NOTE: misleading but this is handled.
6133 */
6134void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006135xmlParsePEReference(xmlParserCtxtPtr ctxt)
6136{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006137 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006138 xmlEntityPtr entity = NULL;
6139 xmlParserInputPtr input;
6140
6141 if (RAW == '%') {
6142 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006143 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006144 if (name == NULL) {
6145 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6146 "xmlParsePEReference: no name\n");
6147 } else {
6148 if (RAW == ';') {
6149 NEXT;
6150 if ((ctxt->sax != NULL) &&
6151 (ctxt->sax->getParameterEntity != NULL))
6152 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6153 name);
6154 if (entity == NULL) {
6155 /*
6156 * [ WFC: Entity Declared ]
6157 * In a document without any DTD, a document with only an
6158 * internal DTD subset which contains no parameter entity
6159 * references, or a document with "standalone='yes'", ...
6160 * ... The declaration of a parameter entity must precede
6161 * any reference to it...
6162 */
6163 if ((ctxt->standalone == 1) ||
6164 ((ctxt->hasExternalSubset == 0) &&
6165 (ctxt->hasPErefs == 0))) {
6166 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6167 "PEReference: %%%s; not found\n",
6168 name);
6169 } else {
6170 /*
6171 * [ VC: Entity Declared ]
6172 * In a document with an external subset or external
6173 * parameter entities with "standalone='no'", ...
6174 * ... The declaration of a parameter entity must
6175 * precede any reference to it...
6176 */
6177 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6178 "PEReference: %%%s; not found\n",
6179 name, NULL);
6180 ctxt->valid = 0;
6181 }
6182 } else {
6183 /*
6184 * Internal checking in case the entity quest barfed
6185 */
6186 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6187 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6188 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6189 "Internal: %%%s; is not a parameter entity\n",
6190 name, NULL);
6191 } else if (ctxt->input->free != deallocblankswrapper) {
6192 input =
6193 xmlNewBlanksWrapperInputStream(ctxt, entity);
6194 xmlPushInput(ctxt, input);
6195 } else {
6196 /*
6197 * TODO !!!
6198 * handle the extra spaces added before and after
6199 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6200 */
6201 input = xmlNewEntityInputStream(ctxt, entity);
6202 xmlPushInput(ctxt, input);
6203 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006204 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006205 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006206 xmlParseTextDecl(ctxt);
6207 if (ctxt->errNo ==
6208 XML_ERR_UNSUPPORTED_ENCODING) {
6209 /*
6210 * The XML REC instructs us to stop parsing
6211 * right here
6212 */
6213 ctxt->instate = XML_PARSER_EOF;
6214 return;
6215 }
6216 }
6217 }
6218 }
6219 ctxt->hasPErefs = 1;
6220 } else {
6221 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6222 }
6223 }
Owen Taylor3473f882001-02-23 17:55:21 +00006224 }
6225}
6226
6227/**
6228 * xmlParseStringPEReference:
6229 * @ctxt: an XML parser context
6230 * @str: a pointer to an index in the string
6231 *
6232 * parse PEReference declarations
6233 *
6234 * [69] PEReference ::= '%' Name ';'
6235 *
6236 * [ WFC: No Recursion ]
6237 * A parsed entity must not contain a recursive
6238 * reference to itself, either directly or indirectly.
6239 *
6240 * [ WFC: Entity Declared ]
6241 * In a document without any DTD, a document with only an internal DTD
6242 * subset which contains no parameter entity references, or a document
6243 * with "standalone='yes'", ... ... The declaration of a parameter
6244 * entity must precede any reference to it...
6245 *
6246 * [ VC: Entity Declared ]
6247 * In a document with an external subset or external parameter entities
6248 * with "standalone='no'", ... ... The declaration of a parameter entity
6249 * must precede any reference to it...
6250 *
6251 * [ WFC: In DTD ]
6252 * Parameter-entity references may only appear in the DTD.
6253 * NOTE: misleading but this is handled.
6254 *
6255 * Returns the string of the entity content.
6256 * str is updated to the current value of the index
6257 */
6258xmlEntityPtr
6259xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6260 const xmlChar *ptr;
6261 xmlChar cur;
6262 xmlChar *name;
6263 xmlEntityPtr entity = NULL;
6264
6265 if ((str == NULL) || (*str == NULL)) return(NULL);
6266 ptr = *str;
6267 cur = *ptr;
6268 if (cur == '%') {
6269 ptr++;
6270 cur = *ptr;
6271 name = xmlParseStringName(ctxt, &ptr);
6272 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6274 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006275 } else {
6276 cur = *ptr;
6277 if (cur == ';') {
6278 ptr++;
6279 cur = *ptr;
6280 if ((ctxt->sax != NULL) &&
6281 (ctxt->sax->getParameterEntity != NULL))
6282 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6283 name);
6284 if (entity == NULL) {
6285 /*
6286 * [ WFC: Entity Declared ]
6287 * In a document without any DTD, a document with only an
6288 * internal DTD subset which contains no parameter entity
6289 * references, or a document with "standalone='yes'", ...
6290 * ... The declaration of a parameter entity must precede
6291 * any reference to it...
6292 */
6293 if ((ctxt->standalone == 1) ||
6294 ((ctxt->hasExternalSubset == 0) &&
6295 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006296 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006297 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006298 } else {
6299 /*
6300 * [ VC: Entity Declared ]
6301 * In a document with an external subset or external
6302 * parameter entities with "standalone='no'", ...
6303 * ... The declaration of a parameter entity must
6304 * precede any reference to it...
6305 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006306 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6307 "PEReference: %%%s; not found\n",
6308 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006309 ctxt->valid = 0;
6310 }
6311 } else {
6312 /*
6313 * Internal checking in case the entity quest barfed
6314 */
6315 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6316 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006317 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6318 "%%%s; is not a parameter entity\n",
6319 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006320 }
6321 }
6322 ctxt->hasPErefs = 1;
6323 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006324 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006325 }
6326 xmlFree(name);
6327 }
6328 }
6329 *str = ptr;
6330 return(entity);
6331}
6332
6333/**
6334 * xmlParseDocTypeDecl:
6335 * @ctxt: an XML parser context
6336 *
6337 * parse a DOCTYPE declaration
6338 *
6339 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6340 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6341 *
6342 * [ VC: Root Element Type ]
6343 * The Name in the document type declaration must match the element
6344 * type of the root element.
6345 */
6346
6347void
6348xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006349 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 xmlChar *ExternalID = NULL;
6351 xmlChar *URI = NULL;
6352
6353 /*
6354 * We know that '<!DOCTYPE' has been detected.
6355 */
6356 SKIP(9);
6357
6358 SKIP_BLANKS;
6359
6360 /*
6361 * Parse the DOCTYPE name.
6362 */
6363 name = xmlParseName(ctxt);
6364 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006365 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6366 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006367 }
6368 ctxt->intSubName = name;
6369
6370 SKIP_BLANKS;
6371
6372 /*
6373 * Check for SystemID and ExternalID
6374 */
6375 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6376
6377 if ((URI != NULL) || (ExternalID != NULL)) {
6378 ctxt->hasExternalSubset = 1;
6379 }
6380 ctxt->extSubURI = URI;
6381 ctxt->extSubSystem = ExternalID;
6382
6383 SKIP_BLANKS;
6384
6385 /*
6386 * Create and update the internal subset.
6387 */
6388 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6389 (!ctxt->disableSAX))
6390 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6391
6392 /*
6393 * Is there any internal subset declarations ?
6394 * they are handled separately in xmlParseInternalSubset()
6395 */
6396 if (RAW == '[')
6397 return;
6398
6399 /*
6400 * We should be at the end of the DOCTYPE declaration.
6401 */
6402 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006403 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006404 }
6405 NEXT;
6406}
6407
6408/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006409 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006410 * @ctxt: an XML parser context
6411 *
6412 * parse the internal subset declaration
6413 *
6414 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6415 */
6416
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006417static void
Owen Taylor3473f882001-02-23 17:55:21 +00006418xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6419 /*
6420 * Is there any DTD definition ?
6421 */
6422 if (RAW == '[') {
6423 ctxt->instate = XML_PARSER_DTD;
6424 NEXT;
6425 /*
6426 * Parse the succession of Markup declarations and
6427 * PEReferences.
6428 * Subsequence (markupdecl | PEReference | S)*
6429 */
6430 while (RAW != ']') {
6431 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006432 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006433
6434 SKIP_BLANKS;
6435 xmlParseMarkupDecl(ctxt);
6436 xmlParsePEReference(ctxt);
6437
6438 /*
6439 * Pop-up of finished entities.
6440 */
6441 while ((RAW == 0) && (ctxt->inputNr > 1))
6442 xmlPopInput(ctxt);
6443
6444 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006445 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006446 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006447 break;
6448 }
6449 }
6450 if (RAW == ']') {
6451 NEXT;
6452 SKIP_BLANKS;
6453 }
6454 }
6455
6456 /*
6457 * We should be at the end of the DOCTYPE declaration.
6458 */
6459 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006460 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 }
6462 NEXT;
6463}
6464
Daniel Veillard81273902003-09-30 00:43:48 +00006465#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006466/**
6467 * xmlParseAttribute:
6468 * @ctxt: an XML parser context
6469 * @value: a xmlChar ** used to store the value of the attribute
6470 *
6471 * parse an attribute
6472 *
6473 * [41] Attribute ::= Name Eq AttValue
6474 *
6475 * [ WFC: No External Entity References ]
6476 * Attribute values cannot contain direct or indirect entity references
6477 * to external entities.
6478 *
6479 * [ WFC: No < in Attribute Values ]
6480 * The replacement text of any entity referred to directly or indirectly in
6481 * an attribute value (other than "&lt;") must not contain a <.
6482 *
6483 * [ VC: Attribute Value Type ]
6484 * The attribute must have been declared; the value must be of the type
6485 * declared for it.
6486 *
6487 * [25] Eq ::= S? '=' S?
6488 *
6489 * With namespace:
6490 *
6491 * [NS 11] Attribute ::= QName Eq AttValue
6492 *
6493 * Also the case QName == xmlns:??? is handled independently as a namespace
6494 * definition.
6495 *
6496 * Returns the attribute name, and the value in *value.
6497 */
6498
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006499const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006500xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006501 const xmlChar *name;
6502 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006503
6504 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006505 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006506 name = xmlParseName(ctxt);
6507 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006508 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006509 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006510 return(NULL);
6511 }
6512
6513 /*
6514 * read the value
6515 */
6516 SKIP_BLANKS;
6517 if (RAW == '=') {
6518 NEXT;
6519 SKIP_BLANKS;
6520 val = xmlParseAttValue(ctxt);
6521 ctxt->instate = XML_PARSER_CONTENT;
6522 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006523 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006524 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006525 return(NULL);
6526 }
6527
6528 /*
6529 * Check that xml:lang conforms to the specification
6530 * No more registered as an error, just generate a warning now
6531 * since this was deprecated in XML second edition
6532 */
6533 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6534 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006535 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6536 "Malformed value for xml:lang : %s\n",
6537 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006538 }
6539 }
6540
6541 /*
6542 * Check that xml:space conforms to the specification
6543 */
6544 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6545 if (xmlStrEqual(val, BAD_CAST "default"))
6546 *(ctxt->space) = 0;
6547 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6548 *(ctxt->space) = 1;
6549 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006550 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006551"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006552 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 }
6555
6556 *value = val;
6557 return(name);
6558}
6559
6560/**
6561 * xmlParseStartTag:
6562 * @ctxt: an XML parser context
6563 *
6564 * parse a start of tag either for rule element or
6565 * EmptyElement. In both case we don't parse the tag closing chars.
6566 *
6567 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6568 *
6569 * [ WFC: Unique Att Spec ]
6570 * No attribute name may appear more than once in the same start-tag or
6571 * empty-element tag.
6572 *
6573 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6574 *
6575 * [ WFC: Unique Att Spec ]
6576 * No attribute name may appear more than once in the same start-tag or
6577 * empty-element tag.
6578 *
6579 * With namespace:
6580 *
6581 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6582 *
6583 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6584 *
6585 * Returns the element name parsed
6586 */
6587
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006588const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006589xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006590 const xmlChar *name;
6591 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006592 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006593 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006595 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 int i;
6597
6598 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006599 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006600
6601 name = xmlParseName(ctxt);
6602 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006603 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006604 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006605 return(NULL);
6606 }
6607
6608 /*
6609 * Now parse the attributes, it ends up with the ending
6610 *
6611 * (S Attribute)* S?
6612 */
6613 SKIP_BLANKS;
6614 GROW;
6615
Daniel Veillard21a0f912001-02-25 19:54:14 +00006616 while ((RAW != '>') &&
6617 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006618 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006619 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006620 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006621
6622 attname = xmlParseAttribute(ctxt, &attvalue);
6623 if ((attname != NULL) && (attvalue != NULL)) {
6624 /*
6625 * [ WFC: Unique Att Spec ]
6626 * No attribute name may appear more than once in the same
6627 * start-tag or empty-element tag.
6628 */
6629 for (i = 0; i < nbatts;i += 2) {
6630 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006631 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006632 xmlFree(attvalue);
6633 goto failed;
6634 }
6635 }
Owen Taylor3473f882001-02-23 17:55:21 +00006636 /*
6637 * Add the pair to atts
6638 */
6639 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006640 maxatts = 22; /* allow for 10 attrs by default */
6641 atts = (const xmlChar **)
6642 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006643 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006644 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006645 if (attvalue != NULL)
6646 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006647 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006649 ctxt->atts = atts;
6650 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006651 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006652 const xmlChar **n;
6653
Owen Taylor3473f882001-02-23 17:55:21 +00006654 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006655 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006656 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006657 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006658 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006659 if (attvalue != NULL)
6660 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006661 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006662 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006663 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006664 ctxt->atts = atts;
6665 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 }
6667 atts[nbatts++] = attname;
6668 atts[nbatts++] = attvalue;
6669 atts[nbatts] = NULL;
6670 atts[nbatts + 1] = NULL;
6671 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006672 if (attvalue != NULL)
6673 xmlFree(attvalue);
6674 }
6675
6676failed:
6677
Daniel Veillard3772de32002-12-17 10:31:45 +00006678 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006679 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6680 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006681 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6683 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006684 }
6685 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006686 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6687 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006688 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6689 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006690 break;
6691 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006692 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006693 GROW;
6694 }
6695
6696 /*
6697 * SAX: Start of Element !
6698 */
6699 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006700 (!ctxt->disableSAX)) {
6701 if (nbatts > 0)
6702 ctxt->sax->startElement(ctxt->userData, name, atts);
6703 else
6704 ctxt->sax->startElement(ctxt->userData, name, NULL);
6705 }
Owen Taylor3473f882001-02-23 17:55:21 +00006706
6707 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006708 /* Free only the content strings */
6709 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006710 if (atts[i] != NULL)
6711 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006712 }
6713 return(name);
6714}
6715
6716/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006717 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006718 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006719 * @line: line of the start tag
6720 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006721 *
6722 * parse an end of tag
6723 *
6724 * [42] ETag ::= '</' Name S? '>'
6725 *
6726 * With namespace
6727 *
6728 * [NS 9] ETag ::= '</' QName S? '>'
6729 */
6730
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006731static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006732xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006733 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006734
6735 GROW;
6736 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006737 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006738 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006739 return;
6740 }
6741 SKIP(2);
6742
Daniel Veillard46de64e2002-05-29 08:21:33 +00006743 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006744
6745 /*
6746 * We should definitely be at the ending "S? '>'" part
6747 */
6748 GROW;
6749 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006750 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006751 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006753 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006754
6755 /*
6756 * [ WFC: Element Type Match ]
6757 * The Name in an element's end-tag must match the element type in the
6758 * start-tag.
6759 *
6760 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006761 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006762 if (name == NULL) name = BAD_CAST "unparseable";
6763 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006764 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006765 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006766 }
6767
6768 /*
6769 * SAX: End of Tag
6770 */
6771 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6772 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006773 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006774
Daniel Veillarde57ec792003-09-10 10:50:59 +00006775 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006776 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006777 return;
6778}
6779
6780/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006781 * xmlParseEndTag:
6782 * @ctxt: an XML parser context
6783 *
6784 * parse an end of tag
6785 *
6786 * [42] ETag ::= '</' Name S? '>'
6787 *
6788 * With namespace
6789 *
6790 * [NS 9] ETag ::= '</' QName S? '>'
6791 */
6792
6793void
6794xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006795 xmlParseEndTag1(ctxt, 0);
6796}
Daniel Veillard81273902003-09-30 00:43:48 +00006797#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006798
6799/************************************************************************
6800 * *
6801 * SAX 2 specific operations *
6802 * *
6803 ************************************************************************/
6804
6805static const xmlChar *
6806xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6807 int len = 0, l;
6808 int c;
6809 int count = 0;
6810
6811 /*
6812 * Handler for more complex cases
6813 */
6814 GROW;
6815 c = CUR_CHAR(l);
6816 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006817 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006818 return(NULL);
6819 }
6820
6821 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006822 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006823 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006824 (IS_COMBINING(c)) ||
6825 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006826 if (count++ > 100) {
6827 count = 0;
6828 GROW;
6829 }
6830 len += l;
6831 NEXTL(l);
6832 c = CUR_CHAR(l);
6833 }
6834 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6835}
6836
6837/*
6838 * xmlGetNamespace:
6839 * @ctxt: an XML parser context
6840 * @prefix: the prefix to lookup
6841 *
6842 * Lookup the namespace name for the @prefix (which ca be NULL)
6843 * The prefix must come from the @ctxt->dict dictionnary
6844 *
6845 * Returns the namespace name or NULL if not bound
6846 */
6847static const xmlChar *
6848xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6849 int i;
6850
Daniel Veillarde57ec792003-09-10 10:50:59 +00006851 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006852 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006853 if (ctxt->nsTab[i] == prefix) {
6854 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6855 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006856 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006857 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006858 return(NULL);
6859}
6860
6861/**
6862 * xmlParseNCName:
6863 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006864 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006865 *
6866 * parse an XML name.
6867 *
6868 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6869 * CombiningChar | Extender
6870 *
6871 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6872 *
6873 * Returns the Name parsed or NULL
6874 */
6875
6876static const xmlChar *
6877xmlParseNCName(xmlParserCtxtPtr ctxt) {
6878 const xmlChar *in;
6879 const xmlChar *ret;
6880 int count = 0;
6881
6882 /*
6883 * Accelerator for simple ASCII names
6884 */
6885 in = ctxt->input->cur;
6886 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6887 ((*in >= 0x41) && (*in <= 0x5A)) ||
6888 (*in == '_')) {
6889 in++;
6890 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6891 ((*in >= 0x41) && (*in <= 0x5A)) ||
6892 ((*in >= 0x30) && (*in <= 0x39)) ||
6893 (*in == '_') || (*in == '-') ||
6894 (*in == '.'))
6895 in++;
6896 if ((*in > 0) && (*in < 0x80)) {
6897 count = in - ctxt->input->cur;
6898 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6899 ctxt->input->cur = in;
6900 ctxt->nbChars += count;
6901 ctxt->input->col += count;
6902 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006903 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006904 }
6905 return(ret);
6906 }
6907 }
6908 return(xmlParseNCNameComplex(ctxt));
6909}
6910
6911/**
6912 * xmlParseQName:
6913 * @ctxt: an XML parser context
6914 * @prefix: pointer to store the prefix part
6915 *
6916 * parse an XML Namespace QName
6917 *
6918 * [6] QName ::= (Prefix ':')? LocalPart
6919 * [7] Prefix ::= NCName
6920 * [8] LocalPart ::= NCName
6921 *
6922 * Returns the Name parsed or NULL
6923 */
6924
6925static const xmlChar *
6926xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6927 const xmlChar *l, *p;
6928
6929 GROW;
6930
6931 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006932 if (l == NULL) {
6933 if (CUR == ':') {
6934 l = xmlParseName(ctxt);
6935 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6937 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006938 *prefix = NULL;
6939 return(l);
6940 }
6941 }
6942 return(NULL);
6943 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006944 if (CUR == ':') {
6945 NEXT;
6946 p = l;
6947 l = xmlParseNCName(ctxt);
6948 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006949 xmlChar *tmp;
6950
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006951 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6952 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006953 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6954 p = xmlDictLookup(ctxt->dict, tmp, -1);
6955 if (tmp != NULL) xmlFree(tmp);
6956 *prefix = NULL;
6957 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006958 }
6959 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006960 xmlChar *tmp;
6961
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006962 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6963 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006964 NEXT;
6965 tmp = (xmlChar *) xmlParseName(ctxt);
6966 if (tmp != NULL) {
6967 tmp = xmlBuildQName(tmp, l, NULL, 0);
6968 l = xmlDictLookup(ctxt->dict, tmp, -1);
6969 if (tmp != NULL) xmlFree(tmp);
6970 *prefix = p;
6971 return(l);
6972 }
6973 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6974 l = xmlDictLookup(ctxt->dict, tmp, -1);
6975 if (tmp != NULL) xmlFree(tmp);
6976 *prefix = p;
6977 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006978 }
6979 *prefix = p;
6980 } else
6981 *prefix = NULL;
6982 return(l);
6983}
6984
6985/**
6986 * xmlParseQNameAndCompare:
6987 * @ctxt: an XML parser context
6988 * @name: the localname
6989 * @prefix: the prefix, if any.
6990 *
6991 * parse an XML name and compares for match
6992 * (specialized for endtag parsing)
6993 *
6994 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6995 * and the name for mismatch
6996 */
6997
6998static const xmlChar *
6999xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7000 xmlChar const *prefix) {
7001 const xmlChar *cmp = name;
7002 const xmlChar *in;
7003 const xmlChar *ret;
7004 const xmlChar *prefix2;
7005
7006 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7007
7008 GROW;
7009 in = ctxt->input->cur;
7010
7011 cmp = prefix;
7012 while (*in != 0 && *in == *cmp) {
7013 ++in;
7014 ++cmp;
7015 }
7016 if ((*cmp == 0) && (*in == ':')) {
7017 in++;
7018 cmp = name;
7019 while (*in != 0 && *in == *cmp) {
7020 ++in;
7021 ++cmp;
7022 }
William M. Brack76e95df2003-10-18 16:20:14 +00007023 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007024 /* success */
7025 ctxt->input->cur = in;
7026 return((const xmlChar*) 1);
7027 }
7028 }
7029 /*
7030 * all strings coms from the dictionary, equality can be done directly
7031 */
7032 ret = xmlParseQName (ctxt, &prefix2);
7033 if ((ret == name) && (prefix == prefix2))
7034 return((const xmlChar*) 1);
7035 return ret;
7036}
7037
7038/**
7039 * xmlParseAttValueInternal:
7040 * @ctxt: an XML parser context
7041 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007042 * @alloc: whether the attribute was reallocated as a new string
7043 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007044 *
7045 * parse a value for an attribute.
7046 * NOTE: if no normalization is needed, the routine will return pointers
7047 * directly from the data buffer.
7048 *
7049 * 3.3.3 Attribute-Value Normalization:
7050 * Before the value of an attribute is passed to the application or
7051 * checked for validity, the XML processor must normalize it as follows:
7052 * - a character reference is processed by appending the referenced
7053 * character to the attribute value
7054 * - an entity reference is processed by recursively processing the
7055 * replacement text of the entity
7056 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7057 * appending #x20 to the normalized value, except that only a single
7058 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7059 * parsed entity or the literal entity value of an internal parsed entity
7060 * - other characters are processed by appending them to the normalized value
7061 * If the declared value is not CDATA, then the XML processor must further
7062 * process the normalized attribute value by discarding any leading and
7063 * trailing space (#x20) characters, and by replacing sequences of space
7064 * (#x20) characters by a single space (#x20) character.
7065 * All attributes for which no declaration has been read should be treated
7066 * by a non-validating parser as if declared CDATA.
7067 *
7068 * Returns the AttValue parsed or NULL. The value has to be freed by the
7069 * caller if it was copied, this can be detected by val[*len] == 0.
7070 */
7071
7072static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007073xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7074 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007075{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007076 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007077 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007078 xmlChar *ret = NULL;
7079
7080 GROW;
7081 in = (xmlChar *) CUR_PTR;
7082 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007083 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007084 return (NULL);
7085 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007086 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007087
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007088 /*
7089 * try to handle in this routine the most common case where no
7090 * allocation of a new string is required and where content is
7091 * pure ASCII.
7092 */
7093 limit = *in++;
7094 end = ctxt->input->end;
7095 start = in;
7096 if (in >= end) {
7097 const xmlChar *oldbase = ctxt->input->base;
7098 GROW;
7099 if (oldbase != ctxt->input->base) {
7100 long delta = ctxt->input->base - oldbase;
7101 start = start + delta;
7102 in = in + delta;
7103 }
7104 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007105 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007106 if (normalize) {
7107 /*
7108 * Skip any leading spaces
7109 */
7110 while ((in < end) && (*in != limit) &&
7111 ((*in == 0x20) || (*in == 0x9) ||
7112 (*in == 0xA) || (*in == 0xD))) {
7113 in++;
7114 start = in;
7115 if (in >= end) {
7116 const xmlChar *oldbase = ctxt->input->base;
7117 GROW;
7118 if (oldbase != ctxt->input->base) {
7119 long delta = ctxt->input->base - oldbase;
7120 start = start + delta;
7121 in = in + delta;
7122 }
7123 end = ctxt->input->end;
7124 }
7125 }
7126 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7127 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7128 if ((*in++ == 0x20) && (*in == 0x20)) break;
7129 if (in >= end) {
7130 const xmlChar *oldbase = ctxt->input->base;
7131 GROW;
7132 if (oldbase != ctxt->input->base) {
7133 long delta = ctxt->input->base - oldbase;
7134 start = start + delta;
7135 in = in + delta;
7136 }
7137 end = ctxt->input->end;
7138 }
7139 }
7140 last = in;
7141 /*
7142 * skip the trailing blanks
7143 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007144 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007145 while ((in < end) && (*in != limit) &&
7146 ((*in == 0x20) || (*in == 0x9) ||
7147 (*in == 0xA) || (*in == 0xD))) {
7148 in++;
7149 if (in >= end) {
7150 const xmlChar *oldbase = ctxt->input->base;
7151 GROW;
7152 if (oldbase != ctxt->input->base) {
7153 long delta = ctxt->input->base - oldbase;
7154 start = start + delta;
7155 in = in + delta;
7156 last = last + delta;
7157 }
7158 end = ctxt->input->end;
7159 }
7160 }
7161 if (*in != limit) goto need_complex;
7162 } else {
7163 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7164 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7165 in++;
7166 if (in >= end) {
7167 const xmlChar *oldbase = ctxt->input->base;
7168 GROW;
7169 if (oldbase != ctxt->input->base) {
7170 long delta = ctxt->input->base - oldbase;
7171 start = start + delta;
7172 in = in + delta;
7173 }
7174 end = ctxt->input->end;
7175 }
7176 }
7177 last = in;
7178 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007179 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007180 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007181 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007182 *len = last - start;
7183 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007185 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007186 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007187 }
7188 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007189 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007191need_complex:
7192 if (alloc) *alloc = 1;
7193 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194}
7195
7196/**
7197 * xmlParseAttribute2:
7198 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007199 * @pref: the element prefix
7200 * @elem: the element name
7201 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007202 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007203 * @len: an int * to save the length of the attribute
7204 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007205 *
7206 * parse an attribute in the new SAX2 framework.
7207 *
7208 * Returns the attribute name, and the value in *value, .
7209 */
7210
7211static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007212xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7213 const xmlChar *pref, const xmlChar *elem,
7214 const xmlChar **prefix, xmlChar **value,
7215 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007216 const xmlChar *name;
7217 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007218 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007219
7220 *value = NULL;
7221 GROW;
7222 name = xmlParseQName(ctxt, prefix);
7223 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007224 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7225 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007226 return(NULL);
7227 }
7228
7229 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007230 * get the type if needed
7231 */
7232 if (ctxt->attsSpecial != NULL) {
7233 int type;
7234
7235 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7236 pref, elem, *prefix, name);
7237 if (type != 0) normalize = 1;
7238 }
7239
7240 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 * read the value
7242 */
7243 SKIP_BLANKS;
7244 if (RAW == '=') {
7245 NEXT;
7246 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007247 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248 ctxt->instate = XML_PARSER_CONTENT;
7249 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007250 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007251 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 return(NULL);
7253 }
7254
7255 /*
7256 * Check that xml:lang conforms to the specification
7257 * No more registered as an error, just generate a warning now
7258 * since this was deprecated in XML second edition
7259 */
7260 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7261 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007262 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7263 "Malformed value for xml:lang : %s\n",
7264 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007265 }
7266 }
7267
7268 /*
7269 * Check that xml:space conforms to the specification
7270 */
7271 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7272 if (xmlStrEqual(val, BAD_CAST "default"))
7273 *(ctxt->space) = 0;
7274 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7275 *(ctxt->space) = 1;
7276 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007277 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7279 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280 }
7281 }
7282
7283 *value = val;
7284 return(name);
7285}
7286
7287/**
7288 * xmlParseStartTag2:
7289 * @ctxt: an XML parser context
7290 *
7291 * parse a start of tag either for rule element or
7292 * EmptyElement. In both case we don't parse the tag closing chars.
7293 * This routine is called when running SAX2 parsing
7294 *
7295 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7296 *
7297 * [ WFC: Unique Att Spec ]
7298 * No attribute name may appear more than once in the same start-tag or
7299 * empty-element tag.
7300 *
7301 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7302 *
7303 * [ WFC: Unique Att Spec ]
7304 * No attribute name may appear more than once in the same start-tag or
7305 * empty-element tag.
7306 *
7307 * With namespace:
7308 *
7309 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7310 *
7311 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7312 *
7313 * Returns the element name parsed
7314 */
7315
7316static const xmlChar *
7317xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007318 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007319 const xmlChar *localname;
7320 const xmlChar *prefix;
7321 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007322 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007323 const xmlChar *nsname;
7324 xmlChar *attvalue;
7325 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007327 int nratts, nbatts, nbdef;
7328 int i, j, nbNs, attval;
7329 const xmlChar *base;
7330 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331
7332 if (RAW != '<') return(NULL);
7333 NEXT1;
7334
7335 /*
7336 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7337 * point since the attribute values may be stored as pointers to
7338 * the buffer and calling SHRINK would destroy them !
7339 * The Shrinking is only possible once the full set of attribute
7340 * callbacks have been done.
7341 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007342reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007343 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007344 base = ctxt->input->base;
7345 cur = ctxt->input->cur - ctxt->input->base;
7346 nbatts = 0;
7347 nratts = 0;
7348 nbdef = 0;
7349 nbNs = 0;
7350 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007351
7352 localname = xmlParseQName(ctxt, &prefix);
7353 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007354 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7355 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007356 return(NULL);
7357 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007358 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359
7360 /*
7361 * Now parse the attributes, it ends up with the ending
7362 *
7363 * (S Attribute)* S?
7364 */
7365 SKIP_BLANKS;
7366 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007367 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368
7369 while ((RAW != '>') &&
7370 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007371 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007372 const xmlChar *q = CUR_PTR;
7373 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007374 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007375
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007376 attname = xmlParseAttribute2(ctxt, prefix, localname,
7377 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378 if ((attname != NULL) && (attvalue != NULL)) {
7379 if (len < 0) len = xmlStrlen(attvalue);
7380 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007381 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7382 xmlURIPtr uri;
7383
7384 if (*URL != 0) {
7385 uri = xmlParseURI((const char *) URL);
7386 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007387 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7388 "xmlns: %s not a valid URI\n",
7389 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 } else {
7391 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007392 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7393 "xmlns: URI %s is not absolute\n",
7394 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007395 }
7396 xmlFreeURI(uri);
7397 }
7398 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007399 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007400 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007401 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007402 for (j = 1;j <= nbNs;j++)
7403 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7404 break;
7405 if (j <= nbNs)
7406 xmlErrAttributeDup(ctxt, NULL, attname);
7407 else
7408 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007409 if (alloc != 0) xmlFree(attvalue);
7410 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007411 continue;
7412 }
7413 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007414 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7415 xmlURIPtr uri;
7416
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007417 if (attname == ctxt->str_xml) {
7418 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007419 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7420 "xml namespace prefix mapped to wrong URI\n",
7421 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007422 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007423 /*
7424 * Do not keep a namespace definition node
7425 */
7426 if (alloc != 0) xmlFree(attvalue);
7427 SKIP_BLANKS;
7428 continue;
7429 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007430 uri = xmlParseURI((const char *) URL);
7431 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007432 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7433 "xmlns:%s: '%s' is not a valid URI\n",
7434 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007435 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007436 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007437 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7438 "xmlns:%s: URI %s is not absolute\n",
7439 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007440 }
7441 xmlFreeURI(uri);
7442 }
7443
Daniel Veillard0fb18932003-09-07 09:14:37 +00007444 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007445 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007446 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007447 for (j = 1;j <= nbNs;j++)
7448 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7449 break;
7450 if (j <= nbNs)
7451 xmlErrAttributeDup(ctxt, aprefix, attname);
7452 else
7453 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007454 if (alloc != 0) xmlFree(attvalue);
7455 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007456 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007457 continue;
7458 }
7459
7460 /*
7461 * Add the pair to atts
7462 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007463 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7464 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007465 if (attvalue[len] == 0)
7466 xmlFree(attvalue);
7467 goto failed;
7468 }
7469 maxatts = ctxt->maxatts;
7470 atts = ctxt->atts;
7471 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007472 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007473 atts[nbatts++] = attname;
7474 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007475 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 atts[nbatts++] = attvalue;
7477 attvalue += len;
7478 atts[nbatts++] = attvalue;
7479 /*
7480 * tag if some deallocation is needed
7481 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007482 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007483 } else {
7484 if ((attvalue != NULL) && (attvalue[len] == 0))
7485 xmlFree(attvalue);
7486 }
7487
7488failed:
7489
7490 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007491 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7493 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007494 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7496 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007497 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007498 }
7499 SKIP_BLANKS;
7500 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7501 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007502 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007503 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504 break;
7505 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007507 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007508 }
7509
Daniel Veillard0fb18932003-09-07 09:14:37 +00007510 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007511 * The attributes defaulting
7512 */
7513 if (ctxt->attsDefault != NULL) {
7514 xmlDefAttrsPtr defaults;
7515
7516 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7517 if (defaults != NULL) {
7518 for (i = 0;i < defaults->nbAttrs;i++) {
7519 attname = defaults->values[4 * i];
7520 aprefix = defaults->values[4 * i + 1];
7521
7522 /*
7523 * special work for namespaces defaulted defs
7524 */
7525 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7526 /*
7527 * check that it's not a defined namespace
7528 */
7529 for (j = 1;j <= nbNs;j++)
7530 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7531 break;
7532 if (j <= nbNs) continue;
7533
7534 nsname = xmlGetNamespace(ctxt, NULL);
7535 if (nsname != defaults->values[4 * i + 2]) {
7536 if (nsPush(ctxt, NULL,
7537 defaults->values[4 * i + 2]) > 0)
7538 nbNs++;
7539 }
7540 } else if (aprefix == ctxt->str_xmlns) {
7541 /*
7542 * check that it's not a defined namespace
7543 */
7544 for (j = 1;j <= nbNs;j++)
7545 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7546 break;
7547 if (j <= nbNs) continue;
7548
7549 nsname = xmlGetNamespace(ctxt, attname);
7550 if (nsname != defaults->values[2]) {
7551 if (nsPush(ctxt, attname,
7552 defaults->values[4 * i + 2]) > 0)
7553 nbNs++;
7554 }
7555 } else {
7556 /*
7557 * check that it's not a defined attribute
7558 */
7559 for (j = 0;j < nbatts;j+=5) {
7560 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7561 break;
7562 }
7563 if (j < nbatts) continue;
7564
7565 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7566 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007567 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007568 }
7569 maxatts = ctxt->maxatts;
7570 atts = ctxt->atts;
7571 }
7572 atts[nbatts++] = attname;
7573 atts[nbatts++] = aprefix;
7574 if (aprefix == NULL)
7575 atts[nbatts++] = NULL;
7576 else
7577 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7578 atts[nbatts++] = defaults->values[4 * i + 2];
7579 atts[nbatts++] = defaults->values[4 * i + 3];
7580 nbdef++;
7581 }
7582 }
7583 }
7584 }
7585
Daniel Veillarde70c8772003-11-25 07:21:18 +00007586 /*
7587 * The attributes checkings
7588 */
7589 for (i = 0; i < nbatts;i += 5) {
7590 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7591 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7592 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7593 "Namespace prefix %s for %s on %s is not defined\n",
7594 atts[i + 1], atts[i], localname);
7595 }
7596 atts[i + 2] = nsname;
7597 /*
7598 * [ WFC: Unique Att Spec ]
7599 * No attribute name may appear more than once in the same
7600 * start-tag or empty-element tag.
7601 * As extended by the Namespace in XML REC.
7602 */
7603 for (j = 0; j < i;j += 5) {
7604 if (atts[i] == atts[j]) {
7605 if (atts[i+1] == atts[j+1]) {
7606 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7607 break;
7608 }
7609 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7610 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7611 "Namespaced Attribute %s in '%s' redefined\n",
7612 atts[i], nsname, NULL);
7613 break;
7614 }
7615 }
7616 }
7617 }
7618
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 nsname = xmlGetNamespace(ctxt, prefix);
7620 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007621 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7622 "Namespace prefix %s on %s is not defined\n",
7623 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 }
7625 *pref = prefix;
7626 *URI = nsname;
7627
7628 /*
7629 * SAX: Start of Element !
7630 */
7631 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7632 (!ctxt->disableSAX)) {
7633 if (nbNs > 0)
7634 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7635 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7636 nbatts / 5, nbdef, atts);
7637 else
7638 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7639 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7640 }
7641
7642 /*
7643 * Free up attribute allocated strings if needed
7644 */
7645 if (attval != 0) {
7646 for (i = 3,j = 0; j < nratts;i += 5,j++)
7647 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7648 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007649 }
7650
7651 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007652
7653base_changed:
7654 /*
7655 * the attribute strings are valid iif the base didn't changed
7656 */
7657 if (attval != 0) {
7658 for (i = 3,j = 0; j < nratts;i += 5,j++)
7659 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7660 xmlFree((xmlChar *) atts[i]);
7661 }
7662 ctxt->input->cur = ctxt->input->base + cur;
7663 if (ctxt->wellFormed == 1) {
7664 goto reparse;
7665 }
7666 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007667}
7668
7669/**
7670 * xmlParseEndTag2:
7671 * @ctxt: an XML parser context
7672 * @line: line of the start tag
7673 * @nsNr: number of namespaces on the start tag
7674 *
7675 * parse an end of tag
7676 *
7677 * [42] ETag ::= '</' Name S? '>'
7678 *
7679 * With namespace
7680 *
7681 * [NS 9] ETag ::= '</' QName S? '>'
7682 */
7683
7684static void
7685xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007686 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007687 const xmlChar *name;
7688
7689 GROW;
7690 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007691 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007692 return;
7693 }
7694 SKIP(2);
7695
William M. Brack13dfa872004-09-18 04:52:08 +00007696 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007697 if (ctxt->input->cur[tlen] == '>') {
7698 ctxt->input->cur += tlen + 1;
7699 goto done;
7700 }
7701 ctxt->input->cur += tlen;
7702 name = (xmlChar*)1;
7703 } else {
7704 if (prefix == NULL)
7705 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7706 else
7707 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7708 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007709
7710 /*
7711 * We should definitely be at the ending "S? '>'" part
7712 */
7713 GROW;
7714 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007715 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007716 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 } else
7718 NEXT1;
7719
7720 /*
7721 * [ WFC: Element Type Match ]
7722 * The Name in an element's end-tag must match the element type in the
7723 * start-tag.
7724 *
7725 */
7726 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007727 if (name == NULL) name = BAD_CAST "unparseable";
7728 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007730 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007731 }
7732
7733 /*
7734 * SAX: End of Tag
7735 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007736done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7738 (!ctxt->disableSAX))
7739 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7740
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741 spacePop(ctxt);
7742 if (nsNr != 0)
7743 nsPop(ctxt, nsNr);
7744 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007745}
7746
7747/**
Owen Taylor3473f882001-02-23 17:55:21 +00007748 * xmlParseCDSect:
7749 * @ctxt: an XML parser context
7750 *
7751 * Parse escaped pure raw content.
7752 *
7753 * [18] CDSect ::= CDStart CData CDEnd
7754 *
7755 * [19] CDStart ::= '<![CDATA['
7756 *
7757 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7758 *
7759 * [21] CDEnd ::= ']]>'
7760 */
7761void
7762xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7763 xmlChar *buf = NULL;
7764 int len = 0;
7765 int size = XML_PARSER_BUFFER_SIZE;
7766 int r, rl;
7767 int s, sl;
7768 int cur, l;
7769 int count = 0;
7770
Daniel Veillard8f597c32003-10-06 08:19:27 +00007771 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007772 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007773 SKIP(9);
7774 } else
7775 return;
7776
7777 ctxt->instate = XML_PARSER_CDATA_SECTION;
7778 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007779 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007780 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007781 ctxt->instate = XML_PARSER_CONTENT;
7782 return;
7783 }
7784 NEXTL(rl);
7785 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007786 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007787 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007788 ctxt->instate = XML_PARSER_CONTENT;
7789 return;
7790 }
7791 NEXTL(sl);
7792 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007793 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007794 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007796 return;
7797 }
William M. Brack871611b2003-10-18 04:53:14 +00007798 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007799 ((r != ']') || (s != ']') || (cur != '>'))) {
7800 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007801 xmlChar *tmp;
7802
Owen Taylor3473f882001-02-23 17:55:21 +00007803 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007804 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7805 if (tmp == NULL) {
7806 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007807 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007808 return;
7809 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007810 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007811 }
7812 COPY_BUF(rl,buf,len,r);
7813 r = s;
7814 rl = sl;
7815 s = cur;
7816 sl = l;
7817 count++;
7818 if (count > 50) {
7819 GROW;
7820 count = 0;
7821 }
7822 NEXTL(l);
7823 cur = CUR_CHAR(l);
7824 }
7825 buf[len] = 0;
7826 ctxt->instate = XML_PARSER_CONTENT;
7827 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007828 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007829 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007830 xmlFree(buf);
7831 return;
7832 }
7833 NEXTL(l);
7834
7835 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007836 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007837 */
7838 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7839 if (ctxt->sax->cdataBlock != NULL)
7840 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007841 else if (ctxt->sax->characters != NULL)
7842 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007843 }
7844 xmlFree(buf);
7845}
7846
7847/**
7848 * xmlParseContent:
7849 * @ctxt: an XML parser context
7850 *
7851 * Parse a content:
7852 *
7853 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7854 */
7855
7856void
7857xmlParseContent(xmlParserCtxtPtr ctxt) {
7858 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007859 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007860 ((RAW != '<') || (NXT(1) != '/'))) {
7861 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007862 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007863 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007864
7865 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007866 * First case : a Processing Instruction.
7867 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007868 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007869 xmlParsePI(ctxt);
7870 }
7871
7872 /*
7873 * Second case : a CDSection
7874 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007875 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007876 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007877 xmlParseCDSect(ctxt);
7878 }
7879
7880 /*
7881 * Third case : a comment
7882 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007883 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007884 (NXT(2) == '-') && (NXT(3) == '-')) {
7885 xmlParseComment(ctxt);
7886 ctxt->instate = XML_PARSER_CONTENT;
7887 }
7888
7889 /*
7890 * Fourth case : a sub-element.
7891 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007892 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007893 xmlParseElement(ctxt);
7894 }
7895
7896 /*
7897 * Fifth case : a reference. If if has not been resolved,
7898 * parsing returns it's Name, create the node
7899 */
7900
Daniel Veillard21a0f912001-02-25 19:54:14 +00007901 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007902 xmlParseReference(ctxt);
7903 }
7904
7905 /*
7906 * Last case, text. Note that References are handled directly.
7907 */
7908 else {
7909 xmlParseCharData(ctxt, 0);
7910 }
7911
7912 GROW;
7913 /*
7914 * Pop-up of finished entities.
7915 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007916 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007917 xmlPopInput(ctxt);
7918 SHRINK;
7919
Daniel Veillardfdc91562002-07-01 21:52:03 +00007920 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007921 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7922 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007923 ctxt->instate = XML_PARSER_EOF;
7924 break;
7925 }
7926 }
7927}
7928
7929/**
7930 * xmlParseElement:
7931 * @ctxt: an XML parser context
7932 *
7933 * parse an XML element, this is highly recursive
7934 *
7935 * [39] element ::= EmptyElemTag | STag content ETag
7936 *
7937 * [ WFC: Element Type Match ]
7938 * The Name in an element's end-tag must match the element type in the
7939 * start-tag.
7940 *
Owen Taylor3473f882001-02-23 17:55:21 +00007941 */
7942
7943void
7944xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007945 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007946 const xmlChar *prefix;
7947 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007948 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007949 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007950 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007951 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007952
7953 /* Capture start position */
7954 if (ctxt->record_info) {
7955 node_info.begin_pos = ctxt->input->consumed +
7956 (CUR_PTR - ctxt->input->base);
7957 node_info.begin_line = ctxt->input->line;
7958 }
7959
7960 if (ctxt->spaceNr == 0)
7961 spacePush(ctxt, -1);
7962 else
7963 spacePush(ctxt, *ctxt->space);
7964
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007965 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007966#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007967 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007968#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007969 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007970#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007971 else
7972 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007973#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007974 if (name == NULL) {
7975 spacePop(ctxt);
7976 return;
7977 }
7978 namePush(ctxt, name);
7979 ret = ctxt->node;
7980
Daniel Veillard4432df22003-09-28 18:58:27 +00007981#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007982 /*
7983 * [ VC: Root Element Type ]
7984 * The Name in the document type declaration must match the element
7985 * type of the root element.
7986 */
7987 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7988 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7989 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007990#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007991
7992 /*
7993 * Check for an Empty Element.
7994 */
7995 if ((RAW == '/') && (NXT(1) == '>')) {
7996 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007997 if (ctxt->sax2) {
7998 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7999 (!ctxt->disableSAX))
8000 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008001#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 } else {
8003 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8004 (!ctxt->disableSAX))
8005 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008006#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008007 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008008 namePop(ctxt);
8009 spacePop(ctxt);
8010 if (nsNr != ctxt->nsNr)
8011 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008012 if ( ret != NULL && ctxt->record_info ) {
8013 node_info.end_pos = ctxt->input->consumed +
8014 (CUR_PTR - ctxt->input->base);
8015 node_info.end_line = ctxt->input->line;
8016 node_info.node = ret;
8017 xmlParserAddNodeInfo(ctxt, &node_info);
8018 }
8019 return;
8020 }
8021 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008022 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008023 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008024 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8025 "Couldn't find end of Start Tag %s line %d\n",
8026 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008027
8028 /*
8029 * end of parsing of this node.
8030 */
8031 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008032 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008033 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 if (nsNr != ctxt->nsNr)
8035 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008036
8037 /*
8038 * Capture end position and add node
8039 */
8040 if ( ret != NULL && ctxt->record_info ) {
8041 node_info.end_pos = ctxt->input->consumed +
8042 (CUR_PTR - ctxt->input->base);
8043 node_info.end_line = ctxt->input->line;
8044 node_info.node = ret;
8045 xmlParserAddNodeInfo(ctxt, &node_info);
8046 }
8047 return;
8048 }
8049
8050 /*
8051 * Parse the content of the element:
8052 */
8053 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008054 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008055 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008056 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008057 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008058
8059 /*
8060 * end of parsing of this node.
8061 */
8062 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008063 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008064 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008065 if (nsNr != ctxt->nsNr)
8066 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008067 return;
8068 }
8069
8070 /*
8071 * parse the end of tag: '</' should be here.
8072 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008073 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008074 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008075 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008076 }
8077#ifdef LIBXML_SAX1_ENABLED
8078 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008079 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008080#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008081
8082 /*
8083 * Capture end position and add node
8084 */
8085 if ( ret != NULL && ctxt->record_info ) {
8086 node_info.end_pos = ctxt->input->consumed +
8087 (CUR_PTR - ctxt->input->base);
8088 node_info.end_line = ctxt->input->line;
8089 node_info.node = ret;
8090 xmlParserAddNodeInfo(ctxt, &node_info);
8091 }
8092}
8093
8094/**
8095 * xmlParseVersionNum:
8096 * @ctxt: an XML parser context
8097 *
8098 * parse the XML version value.
8099 *
8100 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8101 *
8102 * Returns the string giving the XML version number, or NULL
8103 */
8104xmlChar *
8105xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8106 xmlChar *buf = NULL;
8107 int len = 0;
8108 int size = 10;
8109 xmlChar cur;
8110
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008112 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008113 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008114 return(NULL);
8115 }
8116 cur = CUR;
8117 while (((cur >= 'a') && (cur <= 'z')) ||
8118 ((cur >= 'A') && (cur <= 'Z')) ||
8119 ((cur >= '0') && (cur <= '9')) ||
8120 (cur == '_') || (cur == '.') ||
8121 (cur == ':') || (cur == '-')) {
8122 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008123 xmlChar *tmp;
8124
Owen Taylor3473f882001-02-23 17:55:21 +00008125 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008126 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8127 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008129 return(NULL);
8130 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008131 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008132 }
8133 buf[len++] = cur;
8134 NEXT;
8135 cur=CUR;
8136 }
8137 buf[len] = 0;
8138 return(buf);
8139}
8140
8141/**
8142 * xmlParseVersionInfo:
8143 * @ctxt: an XML parser context
8144 *
8145 * parse the XML version.
8146 *
8147 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8148 *
8149 * [25] Eq ::= S? '=' S?
8150 *
8151 * Returns the version string, e.g. "1.0"
8152 */
8153
8154xmlChar *
8155xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8156 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008157
Daniel Veillarda07050d2003-10-19 14:46:32 +00008158 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008159 SKIP(7);
8160 SKIP_BLANKS;
8161 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008162 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008163 return(NULL);
8164 }
8165 NEXT;
8166 SKIP_BLANKS;
8167 if (RAW == '"') {
8168 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008169 version = xmlParseVersionNum(ctxt);
8170 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008171 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008172 } else
8173 NEXT;
8174 } else if (RAW == '\''){
8175 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008176 version = xmlParseVersionNum(ctxt);
8177 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008178 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008179 } else
8180 NEXT;
8181 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008182 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008183 }
8184 }
8185 return(version);
8186}
8187
8188/**
8189 * xmlParseEncName:
8190 * @ctxt: an XML parser context
8191 *
8192 * parse the XML encoding name
8193 *
8194 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8195 *
8196 * Returns the encoding name value or NULL
8197 */
8198xmlChar *
8199xmlParseEncName(xmlParserCtxtPtr ctxt) {
8200 xmlChar *buf = NULL;
8201 int len = 0;
8202 int size = 10;
8203 xmlChar cur;
8204
8205 cur = CUR;
8206 if (((cur >= 'a') && (cur <= 'z')) ||
8207 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008208 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008209 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008210 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008211 return(NULL);
8212 }
8213
8214 buf[len++] = cur;
8215 NEXT;
8216 cur = CUR;
8217 while (((cur >= 'a') && (cur <= 'z')) ||
8218 ((cur >= 'A') && (cur <= 'Z')) ||
8219 ((cur >= '0') && (cur <= '9')) ||
8220 (cur == '.') || (cur == '_') ||
8221 (cur == '-')) {
8222 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008223 xmlChar *tmp;
8224
Owen Taylor3473f882001-02-23 17:55:21 +00008225 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008226 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8227 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008228 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008229 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008230 return(NULL);
8231 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008232 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008233 }
8234 buf[len++] = cur;
8235 NEXT;
8236 cur = CUR;
8237 if (cur == 0) {
8238 SHRINK;
8239 GROW;
8240 cur = CUR;
8241 }
8242 }
8243 buf[len] = 0;
8244 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008245 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008246 }
8247 return(buf);
8248}
8249
8250/**
8251 * xmlParseEncodingDecl:
8252 * @ctxt: an XML parser context
8253 *
8254 * parse the XML encoding declaration
8255 *
8256 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8257 *
8258 * this setups the conversion filters.
8259 *
8260 * Returns the encoding value or NULL
8261 */
8262
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008263const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008264xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8265 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008266
8267 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008268 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008269 SKIP(8);
8270 SKIP_BLANKS;
8271 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008272 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008273 return(NULL);
8274 }
8275 NEXT;
8276 SKIP_BLANKS;
8277 if (RAW == '"') {
8278 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008279 encoding = xmlParseEncName(ctxt);
8280 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008281 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008282 } else
8283 NEXT;
8284 } else if (RAW == '\''){
8285 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008286 encoding = xmlParseEncName(ctxt);
8287 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008288 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008289 } else
8290 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008292 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008293 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008294 /*
8295 * UTF-16 encoding stwich has already taken place at this stage,
8296 * more over the little-endian/big-endian selection is already done
8297 */
8298 if ((encoding != NULL) &&
8299 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8300 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008301 if (ctxt->encoding != NULL)
8302 xmlFree((xmlChar *) ctxt->encoding);
8303 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008304 }
8305 /*
8306 * UTF-8 encoding is handled natively
8307 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008308 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008309 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8310 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008311 if (ctxt->encoding != NULL)
8312 xmlFree((xmlChar *) ctxt->encoding);
8313 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008314 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008315 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008316 xmlCharEncodingHandlerPtr handler;
8317
8318 if (ctxt->input->encoding != NULL)
8319 xmlFree((xmlChar *) ctxt->input->encoding);
8320 ctxt->input->encoding = encoding;
8321
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008322 handler = xmlFindCharEncodingHandler((const char *) encoding);
8323 if (handler != NULL) {
8324 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008325 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008326 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008327 "Unsupported encoding %s\n", encoding);
8328 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 }
8330 }
8331 }
8332 return(encoding);
8333}
8334
8335/**
8336 * xmlParseSDDecl:
8337 * @ctxt: an XML parser context
8338 *
8339 * parse the XML standalone declaration
8340 *
8341 * [32] SDDecl ::= S 'standalone' Eq
8342 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8343 *
8344 * [ VC: Standalone Document Declaration ]
8345 * TODO The standalone document declaration must have the value "no"
8346 * if any external markup declarations contain declarations of:
8347 * - attributes with default values, if elements to which these
8348 * attributes apply appear in the document without specifications
8349 * of values for these attributes, or
8350 * - entities (other than amp, lt, gt, apos, quot), if references
8351 * to those entities appear in the document, or
8352 * - attributes with values subject to normalization, where the
8353 * attribute appears in the document with a value which will change
8354 * as a result of normalization, or
8355 * - element types with element content, if white space occurs directly
8356 * within any instance of those types.
8357 *
8358 * Returns 1 if standalone, 0 otherwise
8359 */
8360
8361int
8362xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8363 int standalone = -1;
8364
8365 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008366 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008367 SKIP(10);
8368 SKIP_BLANKS;
8369 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008370 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 return(standalone);
8372 }
8373 NEXT;
8374 SKIP_BLANKS;
8375 if (RAW == '\''){
8376 NEXT;
8377 if ((RAW == 'n') && (NXT(1) == 'o')) {
8378 standalone = 0;
8379 SKIP(2);
8380 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8381 (NXT(2) == 's')) {
8382 standalone = 1;
8383 SKIP(3);
8384 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008385 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008386 }
8387 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008388 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 } else
8390 NEXT;
8391 } else if (RAW == '"'){
8392 NEXT;
8393 if ((RAW == 'n') && (NXT(1) == 'o')) {
8394 standalone = 0;
8395 SKIP(2);
8396 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8397 (NXT(2) == 's')) {
8398 standalone = 1;
8399 SKIP(3);
8400 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008401 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008402 }
8403 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 } else
8406 NEXT;
8407 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008408 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008409 }
8410 }
8411 return(standalone);
8412}
8413
8414/**
8415 * xmlParseXMLDecl:
8416 * @ctxt: an XML parser context
8417 *
8418 * parse an XML declaration header
8419 *
8420 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8421 */
8422
8423void
8424xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8425 xmlChar *version;
8426
8427 /*
8428 * We know that '<?xml' is here.
8429 */
8430 SKIP(5);
8431
William M. Brack76e95df2003-10-18 16:20:14 +00008432 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8434 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008435 }
8436 SKIP_BLANKS;
8437
8438 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008439 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008440 */
8441 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008442 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008443 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008444 } else {
8445 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8446 /*
8447 * TODO: Blueberry should be detected here
8448 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008449 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8450 "Unsupported version '%s'\n",
8451 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008452 }
8453 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008454 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008455 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008456 }
Owen Taylor3473f882001-02-23 17:55:21 +00008457
8458 /*
8459 * We may have the encoding declaration
8460 */
William M. Brack76e95df2003-10-18 16:20:14 +00008461 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008462 if ((RAW == '?') && (NXT(1) == '>')) {
8463 SKIP(2);
8464 return;
8465 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008466 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008467 }
8468 xmlParseEncodingDecl(ctxt);
8469 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8470 /*
8471 * The XML REC instructs us to stop parsing right here
8472 */
8473 return;
8474 }
8475
8476 /*
8477 * We may have the standalone status.
8478 */
William M. Brack76e95df2003-10-18 16:20:14 +00008479 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008480 if ((RAW == '?') && (NXT(1) == '>')) {
8481 SKIP(2);
8482 return;
8483 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008485 }
8486 SKIP_BLANKS;
8487 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8488
8489 SKIP_BLANKS;
8490 if ((RAW == '?') && (NXT(1) == '>')) {
8491 SKIP(2);
8492 } else if (RAW == '>') {
8493 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008494 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008495 NEXT;
8496 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008497 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008498 MOVETO_ENDTAG(CUR_PTR);
8499 NEXT;
8500 }
8501}
8502
8503/**
8504 * xmlParseMisc:
8505 * @ctxt: an XML parser context
8506 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008507 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008508 *
8509 * [27] Misc ::= Comment | PI | S
8510 */
8511
8512void
8513xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008514 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008515 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008516 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008517 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008518 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008519 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008520 NEXT;
8521 } else
8522 xmlParseComment(ctxt);
8523 }
8524}
8525
8526/**
8527 * xmlParseDocument:
8528 * @ctxt: an XML parser context
8529 *
8530 * parse an XML document (and build a tree if using the standard SAX
8531 * interface).
8532 *
8533 * [1] document ::= prolog element Misc*
8534 *
8535 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8536 *
8537 * Returns 0, -1 in case of error. the parser context is augmented
8538 * as a result of the parsing.
8539 */
8540
8541int
8542xmlParseDocument(xmlParserCtxtPtr ctxt) {
8543 xmlChar start[4];
8544 xmlCharEncoding enc;
8545
8546 xmlInitParser();
8547
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008548 if ((ctxt == NULL) || (ctxt->input == NULL))
8549 return(-1);
8550
Owen Taylor3473f882001-02-23 17:55:21 +00008551 GROW;
8552
8553 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008554 * SAX: detecting the level.
8555 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008556 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008557
8558 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008559 * SAX: beginning of the document processing.
8560 */
8561 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8562 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8563
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008564 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8565 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008566 /*
8567 * Get the 4 first bytes and decode the charset
8568 * if enc != XML_CHAR_ENCODING_NONE
8569 * plug some encoding conversion routines.
8570 */
8571 start[0] = RAW;
8572 start[1] = NXT(1);
8573 start[2] = NXT(2);
8574 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008575 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008576 if (enc != XML_CHAR_ENCODING_NONE) {
8577 xmlSwitchEncoding(ctxt, enc);
8578 }
Owen Taylor3473f882001-02-23 17:55:21 +00008579 }
8580
8581
8582 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008583 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 }
8585
8586 /*
8587 * Check for the XMLDecl in the Prolog.
8588 */
8589 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008590 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008591
8592 /*
8593 * Note that we will switch encoding on the fly.
8594 */
8595 xmlParseXMLDecl(ctxt);
8596 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8597 /*
8598 * The XML REC instructs us to stop parsing right here
8599 */
8600 return(-1);
8601 }
8602 ctxt->standalone = ctxt->input->standalone;
8603 SKIP_BLANKS;
8604 } else {
8605 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8606 }
8607 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8608 ctxt->sax->startDocument(ctxt->userData);
8609
8610 /*
8611 * The Misc part of the Prolog
8612 */
8613 GROW;
8614 xmlParseMisc(ctxt);
8615
8616 /*
8617 * Then possibly doc type declaration(s) and more Misc
8618 * (doctypedecl Misc*)?
8619 */
8620 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008621 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008622
8623 ctxt->inSubset = 1;
8624 xmlParseDocTypeDecl(ctxt);
8625 if (RAW == '[') {
8626 ctxt->instate = XML_PARSER_DTD;
8627 xmlParseInternalSubset(ctxt);
8628 }
8629
8630 /*
8631 * Create and update the external subset.
8632 */
8633 ctxt->inSubset = 2;
8634 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8635 (!ctxt->disableSAX))
8636 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8637 ctxt->extSubSystem, ctxt->extSubURI);
8638 ctxt->inSubset = 0;
8639
8640
8641 ctxt->instate = XML_PARSER_PROLOG;
8642 xmlParseMisc(ctxt);
8643 }
8644
8645 /*
8646 * Time to start parsing the tree itself
8647 */
8648 GROW;
8649 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008650 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8651 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008652 } else {
8653 ctxt->instate = XML_PARSER_CONTENT;
8654 xmlParseElement(ctxt);
8655 ctxt->instate = XML_PARSER_EPILOG;
8656
8657
8658 /*
8659 * The Misc part at the end
8660 */
8661 xmlParseMisc(ctxt);
8662
Daniel Veillard561b7f82002-03-20 21:55:57 +00008663 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008664 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008665 }
8666 ctxt->instate = XML_PARSER_EOF;
8667 }
8668
8669 /*
8670 * SAX: end of the document processing.
8671 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008672 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008673 ctxt->sax->endDocument(ctxt->userData);
8674
Daniel Veillard5997aca2002-03-18 18:36:20 +00008675 /*
8676 * Remove locally kept entity definitions if the tree was not built
8677 */
8678 if ((ctxt->myDoc != NULL) &&
8679 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8680 xmlFreeDoc(ctxt->myDoc);
8681 ctxt->myDoc = NULL;
8682 }
8683
Daniel Veillardc7612992002-02-17 22:47:37 +00008684 if (! ctxt->wellFormed) {
8685 ctxt->valid = 0;
8686 return(-1);
8687 }
Owen Taylor3473f882001-02-23 17:55:21 +00008688 return(0);
8689}
8690
8691/**
8692 * xmlParseExtParsedEnt:
8693 * @ctxt: an XML parser context
8694 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008695 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008696 * An external general parsed entity is well-formed if it matches the
8697 * production labeled extParsedEnt.
8698 *
8699 * [78] extParsedEnt ::= TextDecl? content
8700 *
8701 * Returns 0, -1 in case of error. the parser context is augmented
8702 * as a result of the parsing.
8703 */
8704
8705int
8706xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8707 xmlChar start[4];
8708 xmlCharEncoding enc;
8709
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008710 if ((ctxt == NULL) || (ctxt->input == NULL))
8711 return(-1);
8712
Owen Taylor3473f882001-02-23 17:55:21 +00008713 xmlDefaultSAXHandlerInit();
8714
Daniel Veillard309f81d2003-09-23 09:02:53 +00008715 xmlDetectSAX2(ctxt);
8716
Owen Taylor3473f882001-02-23 17:55:21 +00008717 GROW;
8718
8719 /*
8720 * SAX: beginning of the document processing.
8721 */
8722 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8723 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8724
8725 /*
8726 * Get the 4 first bytes and decode the charset
8727 * if enc != XML_CHAR_ENCODING_NONE
8728 * plug some encoding conversion routines.
8729 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008730 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8731 start[0] = RAW;
8732 start[1] = NXT(1);
8733 start[2] = NXT(2);
8734 start[3] = NXT(3);
8735 enc = xmlDetectCharEncoding(start, 4);
8736 if (enc != XML_CHAR_ENCODING_NONE) {
8737 xmlSwitchEncoding(ctxt, enc);
8738 }
Owen Taylor3473f882001-02-23 17:55:21 +00008739 }
8740
8741
8742 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008743 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008744 }
8745
8746 /*
8747 * Check for the XMLDecl in the Prolog.
8748 */
8749 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008750 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008751
8752 /*
8753 * Note that we will switch encoding on the fly.
8754 */
8755 xmlParseXMLDecl(ctxt);
8756 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8757 /*
8758 * The XML REC instructs us to stop parsing right here
8759 */
8760 return(-1);
8761 }
8762 SKIP_BLANKS;
8763 } else {
8764 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8765 }
8766 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8767 ctxt->sax->startDocument(ctxt->userData);
8768
8769 /*
8770 * Doing validity checking on chunk doesn't make sense
8771 */
8772 ctxt->instate = XML_PARSER_CONTENT;
8773 ctxt->validate = 0;
8774 ctxt->loadsubset = 0;
8775 ctxt->depth = 0;
8776
8777 xmlParseContent(ctxt);
8778
8779 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008780 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008781 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008782 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008783 }
8784
8785 /*
8786 * SAX: end of the document processing.
8787 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008788 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008789 ctxt->sax->endDocument(ctxt->userData);
8790
8791 if (! ctxt->wellFormed) return(-1);
8792 return(0);
8793}
8794
Daniel Veillard73b013f2003-09-30 12:36:01 +00008795#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008796/************************************************************************
8797 * *
8798 * Progressive parsing interfaces *
8799 * *
8800 ************************************************************************/
8801
8802/**
8803 * xmlParseLookupSequence:
8804 * @ctxt: an XML parser context
8805 * @first: the first char to lookup
8806 * @next: the next char to lookup or zero
8807 * @third: the next char to lookup or zero
8808 *
8809 * Try to find if a sequence (first, next, third) or just (first next) or
8810 * (first) is available in the input stream.
8811 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8812 * to avoid rescanning sequences of bytes, it DOES change the state of the
8813 * parser, do not use liberally.
8814 *
8815 * Returns the index to the current parsing point if the full sequence
8816 * is available, -1 otherwise.
8817 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008818static int
Owen Taylor3473f882001-02-23 17:55:21 +00008819xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8820 xmlChar next, xmlChar third) {
8821 int base, len;
8822 xmlParserInputPtr in;
8823 const xmlChar *buf;
8824
8825 in = ctxt->input;
8826 if (in == NULL) return(-1);
8827 base = in->cur - in->base;
8828 if (base < 0) return(-1);
8829 if (ctxt->checkIndex > base)
8830 base = ctxt->checkIndex;
8831 if (in->buf == NULL) {
8832 buf = in->base;
8833 len = in->length;
8834 } else {
8835 buf = in->buf->buffer->content;
8836 len = in->buf->buffer->use;
8837 }
8838 /* take into account the sequence length */
8839 if (third) len -= 2;
8840 else if (next) len --;
8841 for (;base < len;base++) {
8842 if (buf[base] == first) {
8843 if (third != 0) {
8844 if ((buf[base + 1] != next) ||
8845 (buf[base + 2] != third)) continue;
8846 } else if (next != 0) {
8847 if (buf[base + 1] != next) continue;
8848 }
8849 ctxt->checkIndex = 0;
8850#ifdef DEBUG_PUSH
8851 if (next == 0)
8852 xmlGenericError(xmlGenericErrorContext,
8853 "PP: lookup '%c' found at %d\n",
8854 first, base);
8855 else if (third == 0)
8856 xmlGenericError(xmlGenericErrorContext,
8857 "PP: lookup '%c%c' found at %d\n",
8858 first, next, base);
8859 else
8860 xmlGenericError(xmlGenericErrorContext,
8861 "PP: lookup '%c%c%c' found at %d\n",
8862 first, next, third, base);
8863#endif
8864 return(base - (in->cur - in->base));
8865 }
8866 }
8867 ctxt->checkIndex = base;
8868#ifdef DEBUG_PUSH
8869 if (next == 0)
8870 xmlGenericError(xmlGenericErrorContext,
8871 "PP: lookup '%c' failed\n", first);
8872 else if (third == 0)
8873 xmlGenericError(xmlGenericErrorContext,
8874 "PP: lookup '%c%c' failed\n", first, next);
8875 else
8876 xmlGenericError(xmlGenericErrorContext,
8877 "PP: lookup '%c%c%c' failed\n", first, next, third);
8878#endif
8879 return(-1);
8880}
8881
8882/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008883 * xmlParseGetLasts:
8884 * @ctxt: an XML parser context
8885 * @lastlt: pointer to store the last '<' from the input
8886 * @lastgt: pointer to store the last '>' from the input
8887 *
8888 * Lookup the last < and > in the current chunk
8889 */
8890static void
8891xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8892 const xmlChar **lastgt) {
8893 const xmlChar *tmp;
8894
8895 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8896 xmlGenericError(xmlGenericErrorContext,
8897 "Internal error: xmlParseGetLasts\n");
8898 return;
8899 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008900 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008901 tmp = ctxt->input->end;
8902 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008903 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008904 if (tmp < ctxt->input->base) {
8905 *lastlt = NULL;
8906 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008907 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008908 *lastlt = tmp;
8909 tmp++;
8910 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8911 if (*tmp == '\'') {
8912 tmp++;
8913 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8914 if (tmp < ctxt->input->end) tmp++;
8915 } else if (*tmp == '"') {
8916 tmp++;
8917 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8918 if (tmp < ctxt->input->end) tmp++;
8919 } else
8920 tmp++;
8921 }
8922 if (tmp < ctxt->input->end)
8923 *lastgt = tmp;
8924 else {
8925 tmp = *lastlt;
8926 tmp--;
8927 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8928 if (tmp >= ctxt->input->base)
8929 *lastgt = tmp;
8930 else
8931 *lastgt = NULL;
8932 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008933 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008934 } else {
8935 *lastlt = NULL;
8936 *lastgt = NULL;
8937 }
8938}
8939/**
Owen Taylor3473f882001-02-23 17:55:21 +00008940 * xmlParseTryOrFinish:
8941 * @ctxt: an XML parser context
8942 * @terminate: last chunk indicator
8943 *
8944 * Try to progress on parsing
8945 *
8946 * Returns zero if no parsing was possible
8947 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008948static int
Owen Taylor3473f882001-02-23 17:55:21 +00008949xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8950 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008951 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008952 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008953 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008954
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008955 if (ctxt->input == NULL)
8956 return(0);
8957
Owen Taylor3473f882001-02-23 17:55:21 +00008958#ifdef DEBUG_PUSH
8959 switch (ctxt->instate) {
8960 case XML_PARSER_EOF:
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: try EOF\n"); break;
8963 case XML_PARSER_START:
8964 xmlGenericError(xmlGenericErrorContext,
8965 "PP: try START\n"); break;
8966 case XML_PARSER_MISC:
8967 xmlGenericError(xmlGenericErrorContext,
8968 "PP: try MISC\n");break;
8969 case XML_PARSER_COMMENT:
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: try COMMENT\n");break;
8972 case XML_PARSER_PROLOG:
8973 xmlGenericError(xmlGenericErrorContext,
8974 "PP: try PROLOG\n");break;
8975 case XML_PARSER_START_TAG:
8976 xmlGenericError(xmlGenericErrorContext,
8977 "PP: try START_TAG\n");break;
8978 case XML_PARSER_CONTENT:
8979 xmlGenericError(xmlGenericErrorContext,
8980 "PP: try CONTENT\n");break;
8981 case XML_PARSER_CDATA_SECTION:
8982 xmlGenericError(xmlGenericErrorContext,
8983 "PP: try CDATA_SECTION\n");break;
8984 case XML_PARSER_END_TAG:
8985 xmlGenericError(xmlGenericErrorContext,
8986 "PP: try END_TAG\n");break;
8987 case XML_PARSER_ENTITY_DECL:
8988 xmlGenericError(xmlGenericErrorContext,
8989 "PP: try ENTITY_DECL\n");break;
8990 case XML_PARSER_ENTITY_VALUE:
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: try ENTITY_VALUE\n");break;
8993 case XML_PARSER_ATTRIBUTE_VALUE:
8994 xmlGenericError(xmlGenericErrorContext,
8995 "PP: try ATTRIBUTE_VALUE\n");break;
8996 case XML_PARSER_DTD:
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: try DTD\n");break;
8999 case XML_PARSER_EPILOG:
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: try EPILOG\n");break;
9002 case XML_PARSER_PI:
9003 xmlGenericError(xmlGenericErrorContext,
9004 "PP: try PI\n");break;
9005 case XML_PARSER_IGNORE:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: try IGNORE\n");break;
9008 }
9009#endif
9010
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009011 if ((ctxt->input != NULL) &&
9012 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009013 xmlSHRINK(ctxt);
9014 ctxt->checkIndex = 0;
9015 }
9016 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009017
Daniel Veillarda880b122003-04-21 21:36:41 +00009018 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009019 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9020 return(0);
9021
9022
Owen Taylor3473f882001-02-23 17:55:21 +00009023 /*
9024 * Pop-up of finished entities.
9025 */
9026 while ((RAW == 0) && (ctxt->inputNr > 1))
9027 xmlPopInput(ctxt);
9028
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009029 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009030 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009031 avail = ctxt->input->length -
9032 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009033 else {
9034 /*
9035 * If we are operating on converted input, try to flush
9036 * remainng chars to avoid them stalling in the non-converted
9037 * buffer.
9038 */
9039 if ((ctxt->input->buf->raw != NULL) &&
9040 (ctxt->input->buf->raw->use > 0)) {
9041 int base = ctxt->input->base -
9042 ctxt->input->buf->buffer->content;
9043 int current = ctxt->input->cur - ctxt->input->base;
9044
9045 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9046 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9047 ctxt->input->cur = ctxt->input->base + current;
9048 ctxt->input->end =
9049 &ctxt->input->buf->buffer->content[
9050 ctxt->input->buf->buffer->use];
9051 }
9052 avail = ctxt->input->buf->buffer->use -
9053 (ctxt->input->cur - ctxt->input->base);
9054 }
Owen Taylor3473f882001-02-23 17:55:21 +00009055 if (avail < 1)
9056 goto done;
9057 switch (ctxt->instate) {
9058 case XML_PARSER_EOF:
9059 /*
9060 * Document parsing is done !
9061 */
9062 goto done;
9063 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009064 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9065 xmlChar start[4];
9066 xmlCharEncoding enc;
9067
9068 /*
9069 * Very first chars read from the document flow.
9070 */
9071 if (avail < 4)
9072 goto done;
9073
9074 /*
9075 * Get the 4 first bytes and decode the charset
9076 * if enc != XML_CHAR_ENCODING_NONE
9077 * plug some encoding conversion routines.
9078 */
9079 start[0] = RAW;
9080 start[1] = NXT(1);
9081 start[2] = NXT(2);
9082 start[3] = NXT(3);
9083 enc = xmlDetectCharEncoding(start, 4);
9084 if (enc != XML_CHAR_ENCODING_NONE) {
9085 xmlSwitchEncoding(ctxt, enc);
9086 }
9087 break;
9088 }
Owen Taylor3473f882001-02-23 17:55:21 +00009089
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009090 if (avail < 2)
9091 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009092 cur = ctxt->input->cur[0];
9093 next = ctxt->input->cur[1];
9094 if (cur == 0) {
9095 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9096 ctxt->sax->setDocumentLocator(ctxt->userData,
9097 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009098 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009099 ctxt->instate = XML_PARSER_EOF;
9100#ifdef DEBUG_PUSH
9101 xmlGenericError(xmlGenericErrorContext,
9102 "PP: entering EOF\n");
9103#endif
9104 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9105 ctxt->sax->endDocument(ctxt->userData);
9106 goto done;
9107 }
9108 if ((cur == '<') && (next == '?')) {
9109 /* PI or XML decl */
9110 if (avail < 5) return(ret);
9111 if ((!terminate) &&
9112 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9113 return(ret);
9114 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9115 ctxt->sax->setDocumentLocator(ctxt->userData,
9116 &xmlDefaultSAXLocator);
9117 if ((ctxt->input->cur[2] == 'x') &&
9118 (ctxt->input->cur[3] == 'm') &&
9119 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009120 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009121 ret += 5;
9122#ifdef DEBUG_PUSH
9123 xmlGenericError(xmlGenericErrorContext,
9124 "PP: Parsing XML Decl\n");
9125#endif
9126 xmlParseXMLDecl(ctxt);
9127 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9128 /*
9129 * The XML REC instructs us to stop parsing right
9130 * here
9131 */
9132 ctxt->instate = XML_PARSER_EOF;
9133 return(0);
9134 }
9135 ctxt->standalone = ctxt->input->standalone;
9136 if ((ctxt->encoding == NULL) &&
9137 (ctxt->input->encoding != NULL))
9138 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9139 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9140 (!ctxt->disableSAX))
9141 ctxt->sax->startDocument(ctxt->userData);
9142 ctxt->instate = XML_PARSER_MISC;
9143#ifdef DEBUG_PUSH
9144 xmlGenericError(xmlGenericErrorContext,
9145 "PP: entering MISC\n");
9146#endif
9147 } else {
9148 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9149 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9150 (!ctxt->disableSAX))
9151 ctxt->sax->startDocument(ctxt->userData);
9152 ctxt->instate = XML_PARSER_MISC;
9153#ifdef DEBUG_PUSH
9154 xmlGenericError(xmlGenericErrorContext,
9155 "PP: entering MISC\n");
9156#endif
9157 }
9158 } else {
9159 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9160 ctxt->sax->setDocumentLocator(ctxt->userData,
9161 &xmlDefaultSAXLocator);
9162 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009163 if (ctxt->version == NULL) {
9164 xmlErrMemory(ctxt, NULL);
9165 break;
9166 }
Owen Taylor3473f882001-02-23 17:55:21 +00009167 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9168 (!ctxt->disableSAX))
9169 ctxt->sax->startDocument(ctxt->userData);
9170 ctxt->instate = XML_PARSER_MISC;
9171#ifdef DEBUG_PUSH
9172 xmlGenericError(xmlGenericErrorContext,
9173 "PP: entering MISC\n");
9174#endif
9175 }
9176 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009177 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009178 const xmlChar *name;
9179 const xmlChar *prefix;
9180 const xmlChar *URI;
9181 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009182
9183 if ((avail < 2) && (ctxt->inputNr == 1))
9184 goto done;
9185 cur = ctxt->input->cur[0];
9186 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009187 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009188 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009189 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9190 ctxt->sax->endDocument(ctxt->userData);
9191 goto done;
9192 }
9193 if (!terminate) {
9194 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009195 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009196 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009197 goto done;
9198 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9199 goto done;
9200 }
9201 }
9202 if (ctxt->spaceNr == 0)
9203 spacePush(ctxt, -1);
9204 else
9205 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009206#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009207 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009208#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009209 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009210#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009211 else
9212 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009213#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009214 if (name == NULL) {
9215 spacePop(ctxt);
9216 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009217 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9218 ctxt->sax->endDocument(ctxt->userData);
9219 goto done;
9220 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009221#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009222 /*
9223 * [ VC: Root Element Type ]
9224 * The Name in the document type declaration must match
9225 * the element type of the root element.
9226 */
9227 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9228 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9229 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009230#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009231
9232 /*
9233 * Check for an Empty Element.
9234 */
9235 if ((RAW == '/') && (NXT(1) == '>')) {
9236 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009237
9238 if (ctxt->sax2) {
9239 if ((ctxt->sax != NULL) &&
9240 (ctxt->sax->endElementNs != NULL) &&
9241 (!ctxt->disableSAX))
9242 ctxt->sax->endElementNs(ctxt->userData, name,
9243 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009244#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245 } else {
9246 if ((ctxt->sax != NULL) &&
9247 (ctxt->sax->endElement != NULL) &&
9248 (!ctxt->disableSAX))
9249 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009250#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252 spacePop(ctxt);
9253 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009254 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009255 } else {
9256 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009257 }
9258 break;
9259 }
9260 if (RAW == '>') {
9261 NEXT;
9262 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009263 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009264 "Couldn't find end of Start Tag %s\n",
9265 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009266 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009267 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009268 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009269 if (ctxt->sax2)
9270 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009271#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009272 else
9273 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009274#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009275
Daniel Veillarda880b122003-04-21 21:36:41 +00009276 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009277 break;
9278 }
9279 case XML_PARSER_CONTENT: {
9280 const xmlChar *test;
9281 unsigned int cons;
9282 if ((avail < 2) && (ctxt->inputNr == 1))
9283 goto done;
9284 cur = ctxt->input->cur[0];
9285 next = ctxt->input->cur[1];
9286
9287 test = CUR_PTR;
9288 cons = ctxt->input->consumed;
9289 if ((cur == '<') && (next == '/')) {
9290 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009291 break;
9292 } else if ((cur == '<') && (next == '?')) {
9293 if ((!terminate) &&
9294 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9295 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009296 xmlParsePI(ctxt);
9297 } else if ((cur == '<') && (next != '!')) {
9298 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009299 break;
9300 } else if ((cur == '<') && (next == '!') &&
9301 (ctxt->input->cur[2] == '-') &&
9302 (ctxt->input->cur[3] == '-')) {
9303 if ((!terminate) &&
9304 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9305 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009306 xmlParseComment(ctxt);
9307 ctxt->instate = XML_PARSER_CONTENT;
9308 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9309 (ctxt->input->cur[2] == '[') &&
9310 (ctxt->input->cur[3] == 'C') &&
9311 (ctxt->input->cur[4] == 'D') &&
9312 (ctxt->input->cur[5] == 'A') &&
9313 (ctxt->input->cur[6] == 'T') &&
9314 (ctxt->input->cur[7] == 'A') &&
9315 (ctxt->input->cur[8] == '[')) {
9316 SKIP(9);
9317 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009318 break;
9319 } else if ((cur == '<') && (next == '!') &&
9320 (avail < 9)) {
9321 goto done;
9322 } else if (cur == '&') {
9323 if ((!terminate) &&
9324 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9325 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009326 xmlParseReference(ctxt);
9327 } else {
9328 /* TODO Avoid the extra copy, handle directly !!! */
9329 /*
9330 * Goal of the following test is:
9331 * - minimize calls to the SAX 'character' callback
9332 * when they are mergeable
9333 * - handle an problem for isBlank when we only parse
9334 * a sequence of blank chars and the next one is
9335 * not available to check against '<' presence.
9336 * - tries to homogenize the differences in SAX
9337 * callbacks between the push and pull versions
9338 * of the parser.
9339 */
9340 if ((ctxt->inputNr == 1) &&
9341 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9342 if (!terminate) {
9343 if (ctxt->progressive) {
9344 if ((lastlt == NULL) ||
9345 (ctxt->input->cur > lastlt))
9346 goto done;
9347 } else if (xmlParseLookupSequence(ctxt,
9348 '<', 0, 0) < 0) {
9349 goto done;
9350 }
9351 }
9352 }
9353 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009354 xmlParseCharData(ctxt, 0);
9355 }
9356 /*
9357 * Pop-up of finished entities.
9358 */
9359 while ((RAW == 0) && (ctxt->inputNr > 1))
9360 xmlPopInput(ctxt);
9361 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009362 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9363 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009364 ctxt->instate = XML_PARSER_EOF;
9365 break;
9366 }
9367 break;
9368 }
9369 case XML_PARSER_END_TAG:
9370 if (avail < 2)
9371 goto done;
9372 if (!terminate) {
9373 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009374 /* > can be found unescaped in attribute values */
9375 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 goto done;
9377 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9378 goto done;
9379 }
9380 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009381 if (ctxt->sax2) {
9382 xmlParseEndTag2(ctxt,
9383 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9384 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009385 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009386 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009387 }
9388#ifdef LIBXML_SAX1_ENABLED
9389 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009390 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009391#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009392 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009393 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009394 } else {
9395 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009396 }
9397 break;
9398 case XML_PARSER_CDATA_SECTION: {
9399 /*
9400 * The Push mode need to have the SAX callback for
9401 * cdataBlock merge back contiguous callbacks.
9402 */
9403 int base;
9404
9405 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9406 if (base < 0) {
9407 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9408 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9409 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009410 ctxt->sax->cdataBlock(ctxt->userData,
9411 ctxt->input->cur,
9412 XML_PARSER_BIG_BUFFER_SIZE);
9413 else if (ctxt->sax->characters != NULL)
9414 ctxt->sax->characters(ctxt->userData,
9415 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009416 XML_PARSER_BIG_BUFFER_SIZE);
9417 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009418 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009419 ctxt->checkIndex = 0;
9420 }
9421 goto done;
9422 } else {
9423 if ((ctxt->sax != NULL) && (base > 0) &&
9424 (!ctxt->disableSAX)) {
9425 if (ctxt->sax->cdataBlock != NULL)
9426 ctxt->sax->cdataBlock(ctxt->userData,
9427 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009428 else if (ctxt->sax->characters != NULL)
9429 ctxt->sax->characters(ctxt->userData,
9430 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009431 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009432 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009433 ctxt->checkIndex = 0;
9434 ctxt->instate = XML_PARSER_CONTENT;
9435#ifdef DEBUG_PUSH
9436 xmlGenericError(xmlGenericErrorContext,
9437 "PP: entering CONTENT\n");
9438#endif
9439 }
9440 break;
9441 }
Owen Taylor3473f882001-02-23 17:55:21 +00009442 case XML_PARSER_MISC:
9443 SKIP_BLANKS;
9444 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009445 avail = ctxt->input->length -
9446 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009447 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 avail = ctxt->input->buf->buffer->use -
9449 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009450 if (avail < 2)
9451 goto done;
9452 cur = ctxt->input->cur[0];
9453 next = ctxt->input->cur[1];
9454 if ((cur == '<') && (next == '?')) {
9455 if ((!terminate) &&
9456 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9457 goto done;
9458#ifdef DEBUG_PUSH
9459 xmlGenericError(xmlGenericErrorContext,
9460 "PP: Parsing PI\n");
9461#endif
9462 xmlParsePI(ctxt);
9463 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009464 (ctxt->input->cur[2] == '-') &&
9465 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009466 if ((!terminate) &&
9467 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9468 goto done;
9469#ifdef DEBUG_PUSH
9470 xmlGenericError(xmlGenericErrorContext,
9471 "PP: Parsing Comment\n");
9472#endif
9473 xmlParseComment(ctxt);
9474 ctxt->instate = XML_PARSER_MISC;
9475 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 (ctxt->input->cur[2] == 'D') &&
9477 (ctxt->input->cur[3] == 'O') &&
9478 (ctxt->input->cur[4] == 'C') &&
9479 (ctxt->input->cur[5] == 'T') &&
9480 (ctxt->input->cur[6] == 'Y') &&
9481 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009482 (ctxt->input->cur[8] == 'E')) {
9483 if ((!terminate) &&
9484 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9485 goto done;
9486#ifdef DEBUG_PUSH
9487 xmlGenericError(xmlGenericErrorContext,
9488 "PP: Parsing internal subset\n");
9489#endif
9490 ctxt->inSubset = 1;
9491 xmlParseDocTypeDecl(ctxt);
9492 if (RAW == '[') {
9493 ctxt->instate = XML_PARSER_DTD;
9494#ifdef DEBUG_PUSH
9495 xmlGenericError(xmlGenericErrorContext,
9496 "PP: entering DTD\n");
9497#endif
9498 } else {
9499 /*
9500 * Create and update the external subset.
9501 */
9502 ctxt->inSubset = 2;
9503 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9504 (ctxt->sax->externalSubset != NULL))
9505 ctxt->sax->externalSubset(ctxt->userData,
9506 ctxt->intSubName, ctxt->extSubSystem,
9507 ctxt->extSubURI);
9508 ctxt->inSubset = 0;
9509 ctxt->instate = XML_PARSER_PROLOG;
9510#ifdef DEBUG_PUSH
9511 xmlGenericError(xmlGenericErrorContext,
9512 "PP: entering PROLOG\n");
9513#endif
9514 }
9515 } else if ((cur == '<') && (next == '!') &&
9516 (avail < 9)) {
9517 goto done;
9518 } else {
9519 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009520 ctxt->progressive = 1;
9521 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009522#ifdef DEBUG_PUSH
9523 xmlGenericError(xmlGenericErrorContext,
9524 "PP: entering START_TAG\n");
9525#endif
9526 }
9527 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009528 case XML_PARSER_PROLOG:
9529 SKIP_BLANKS;
9530 if (ctxt->input->buf == NULL)
9531 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9532 else
9533 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9534 if (avail < 2)
9535 goto done;
9536 cur = ctxt->input->cur[0];
9537 next = ctxt->input->cur[1];
9538 if ((cur == '<') && (next == '?')) {
9539 if ((!terminate) &&
9540 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9541 goto done;
9542#ifdef DEBUG_PUSH
9543 xmlGenericError(xmlGenericErrorContext,
9544 "PP: Parsing PI\n");
9545#endif
9546 xmlParsePI(ctxt);
9547 } else if ((cur == '<') && (next == '!') &&
9548 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9549 if ((!terminate) &&
9550 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9551 goto done;
9552#ifdef DEBUG_PUSH
9553 xmlGenericError(xmlGenericErrorContext,
9554 "PP: Parsing Comment\n");
9555#endif
9556 xmlParseComment(ctxt);
9557 ctxt->instate = XML_PARSER_PROLOG;
9558 } else if ((cur == '<') && (next == '!') &&
9559 (avail < 4)) {
9560 goto done;
9561 } else {
9562 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009563 if (ctxt->progressive == 0)
9564 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009565 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009566#ifdef DEBUG_PUSH
9567 xmlGenericError(xmlGenericErrorContext,
9568 "PP: entering START_TAG\n");
9569#endif
9570 }
9571 break;
9572 case XML_PARSER_EPILOG:
9573 SKIP_BLANKS;
9574 if (ctxt->input->buf == NULL)
9575 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9576 else
9577 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9578 if (avail < 2)
9579 goto done;
9580 cur = ctxt->input->cur[0];
9581 next = ctxt->input->cur[1];
9582 if ((cur == '<') && (next == '?')) {
9583 if ((!terminate) &&
9584 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9585 goto done;
9586#ifdef DEBUG_PUSH
9587 xmlGenericError(xmlGenericErrorContext,
9588 "PP: Parsing PI\n");
9589#endif
9590 xmlParsePI(ctxt);
9591 ctxt->instate = XML_PARSER_EPILOG;
9592 } else if ((cur == '<') && (next == '!') &&
9593 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9594 if ((!terminate) &&
9595 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9596 goto done;
9597#ifdef DEBUG_PUSH
9598 xmlGenericError(xmlGenericErrorContext,
9599 "PP: Parsing Comment\n");
9600#endif
9601 xmlParseComment(ctxt);
9602 ctxt->instate = XML_PARSER_EPILOG;
9603 } else if ((cur == '<') && (next == '!') &&
9604 (avail < 4)) {
9605 goto done;
9606 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009607 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009608 ctxt->instate = XML_PARSER_EOF;
9609#ifdef DEBUG_PUSH
9610 xmlGenericError(xmlGenericErrorContext,
9611 "PP: entering EOF\n");
9612#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009613 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009614 ctxt->sax->endDocument(ctxt->userData);
9615 goto done;
9616 }
9617 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009618 case XML_PARSER_DTD: {
9619 /*
9620 * Sorry but progressive parsing of the internal subset
9621 * is not expected to be supported. We first check that
9622 * the full content of the internal subset is available and
9623 * the parsing is launched only at that point.
9624 * Internal subset ends up with "']' S? '>'" in an unescaped
9625 * section and not in a ']]>' sequence which are conditional
9626 * sections (whoever argued to keep that crap in XML deserve
9627 * a place in hell !).
9628 */
9629 int base, i;
9630 xmlChar *buf;
9631 xmlChar quote = 0;
9632
9633 base = ctxt->input->cur - ctxt->input->base;
9634 if (base < 0) return(0);
9635 if (ctxt->checkIndex > base)
9636 base = ctxt->checkIndex;
9637 buf = ctxt->input->buf->buffer->content;
9638 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9639 base++) {
9640 if (quote != 0) {
9641 if (buf[base] == quote)
9642 quote = 0;
9643 continue;
9644 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009645 if ((quote == 0) && (buf[base] == '<')) {
9646 int found = 0;
9647 /* special handling of comments */
9648 if (((unsigned int) base + 4 <
9649 ctxt->input->buf->buffer->use) &&
9650 (buf[base + 1] == '!') &&
9651 (buf[base + 2] == '-') &&
9652 (buf[base + 3] == '-')) {
9653 for (;(unsigned int) base + 3 <
9654 ctxt->input->buf->buffer->use; base++) {
9655 if ((buf[base] == '-') &&
9656 (buf[base + 1] == '-') &&
9657 (buf[base + 2] == '>')) {
9658 found = 1;
9659 base += 2;
9660 break;
9661 }
9662 }
9663 if (!found)
9664 break;
9665 continue;
9666 }
9667 }
Owen Taylor3473f882001-02-23 17:55:21 +00009668 if (buf[base] == '"') {
9669 quote = '"';
9670 continue;
9671 }
9672 if (buf[base] == '\'') {
9673 quote = '\'';
9674 continue;
9675 }
9676 if (buf[base] == ']') {
9677 if ((unsigned int) base +1 >=
9678 ctxt->input->buf->buffer->use)
9679 break;
9680 if (buf[base + 1] == ']') {
9681 /* conditional crap, skip both ']' ! */
9682 base++;
9683 continue;
9684 }
9685 for (i = 0;
9686 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9687 i++) {
9688 if (buf[base + i] == '>')
9689 goto found_end_int_subset;
9690 }
9691 break;
9692 }
9693 }
9694 /*
9695 * We didn't found the end of the Internal subset
9696 */
9697 if (quote == 0)
9698 ctxt->checkIndex = base;
9699#ifdef DEBUG_PUSH
9700 if (next == 0)
9701 xmlGenericError(xmlGenericErrorContext,
9702 "PP: lookup of int subset end filed\n");
9703#endif
9704 goto done;
9705
9706found_end_int_subset:
9707 xmlParseInternalSubset(ctxt);
9708 ctxt->inSubset = 2;
9709 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9710 (ctxt->sax->externalSubset != NULL))
9711 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9712 ctxt->extSubSystem, ctxt->extSubURI);
9713 ctxt->inSubset = 0;
9714 ctxt->instate = XML_PARSER_PROLOG;
9715 ctxt->checkIndex = 0;
9716#ifdef DEBUG_PUSH
9717 xmlGenericError(xmlGenericErrorContext,
9718 "PP: entering PROLOG\n");
9719#endif
9720 break;
9721 }
9722 case XML_PARSER_COMMENT:
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: internal error, state == COMMENT\n");
9725 ctxt->instate = XML_PARSER_CONTENT;
9726#ifdef DEBUG_PUSH
9727 xmlGenericError(xmlGenericErrorContext,
9728 "PP: entering CONTENT\n");
9729#endif
9730 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009731 case XML_PARSER_IGNORE:
9732 xmlGenericError(xmlGenericErrorContext,
9733 "PP: internal error, state == IGNORE");
9734 ctxt->instate = XML_PARSER_DTD;
9735#ifdef DEBUG_PUSH
9736 xmlGenericError(xmlGenericErrorContext,
9737 "PP: entering DTD\n");
9738#endif
9739 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009740 case XML_PARSER_PI:
9741 xmlGenericError(xmlGenericErrorContext,
9742 "PP: internal error, state == PI\n");
9743 ctxt->instate = XML_PARSER_CONTENT;
9744#ifdef DEBUG_PUSH
9745 xmlGenericError(xmlGenericErrorContext,
9746 "PP: entering CONTENT\n");
9747#endif
9748 break;
9749 case XML_PARSER_ENTITY_DECL:
9750 xmlGenericError(xmlGenericErrorContext,
9751 "PP: internal error, state == ENTITY_DECL\n");
9752 ctxt->instate = XML_PARSER_DTD;
9753#ifdef DEBUG_PUSH
9754 xmlGenericError(xmlGenericErrorContext,
9755 "PP: entering DTD\n");
9756#endif
9757 break;
9758 case XML_PARSER_ENTITY_VALUE:
9759 xmlGenericError(xmlGenericErrorContext,
9760 "PP: internal error, state == ENTITY_VALUE\n");
9761 ctxt->instate = XML_PARSER_CONTENT;
9762#ifdef DEBUG_PUSH
9763 xmlGenericError(xmlGenericErrorContext,
9764 "PP: entering DTD\n");
9765#endif
9766 break;
9767 case XML_PARSER_ATTRIBUTE_VALUE:
9768 xmlGenericError(xmlGenericErrorContext,
9769 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9770 ctxt->instate = XML_PARSER_START_TAG;
9771#ifdef DEBUG_PUSH
9772 xmlGenericError(xmlGenericErrorContext,
9773 "PP: entering START_TAG\n");
9774#endif
9775 break;
9776 case XML_PARSER_SYSTEM_LITERAL:
9777 xmlGenericError(xmlGenericErrorContext,
9778 "PP: internal error, state == SYSTEM_LITERAL\n");
9779 ctxt->instate = XML_PARSER_START_TAG;
9780#ifdef DEBUG_PUSH
9781 xmlGenericError(xmlGenericErrorContext,
9782 "PP: entering START_TAG\n");
9783#endif
9784 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009785 case XML_PARSER_PUBLIC_LITERAL:
9786 xmlGenericError(xmlGenericErrorContext,
9787 "PP: internal error, state == PUBLIC_LITERAL\n");
9788 ctxt->instate = XML_PARSER_START_TAG;
9789#ifdef DEBUG_PUSH
9790 xmlGenericError(xmlGenericErrorContext,
9791 "PP: entering START_TAG\n");
9792#endif
9793 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009794 }
9795 }
9796done:
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9799#endif
9800 return(ret);
9801}
9802
9803/**
Owen Taylor3473f882001-02-23 17:55:21 +00009804 * xmlParseChunk:
9805 * @ctxt: an XML parser context
9806 * @chunk: an char array
9807 * @size: the size in byte of the chunk
9808 * @terminate: last chunk indicator
9809 *
9810 * Parse a Chunk of memory
9811 *
9812 * Returns zero if no error, the xmlParserErrors otherwise.
9813 */
9814int
9815xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9816 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009817 if (ctxt == NULL)
9818 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009819 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9820 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009821 if (ctxt->instate == XML_PARSER_START)
9822 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009823 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9824 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9825 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9826 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009827 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009828
William M. Bracka3215c72004-07-31 16:24:01 +00009829 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9830 if (res < 0) {
9831 ctxt->errNo = XML_PARSER_EOF;
9832 ctxt->disableSAX = 1;
9833 return (XML_PARSER_EOF);
9834 }
Owen Taylor3473f882001-02-23 17:55:21 +00009835 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9836 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009837 ctxt->input->end =
9838 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009839#ifdef DEBUG_PUSH
9840 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9841#endif
9842
Owen Taylor3473f882001-02-23 17:55:21 +00009843 } else if (ctxt->instate != XML_PARSER_EOF) {
9844 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9845 xmlParserInputBufferPtr in = ctxt->input->buf;
9846 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9847 (in->raw != NULL)) {
9848 int nbchars;
9849
9850 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9851 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009852 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009853 xmlGenericError(xmlGenericErrorContext,
9854 "xmlParseChunk: encoder error\n");
9855 return(XML_ERR_INVALID_ENCODING);
9856 }
9857 }
9858 }
9859 }
9860 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009861 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9862 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009863 if (terminate) {
9864 /*
9865 * Check for termination
9866 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009867 int avail = 0;
9868
9869 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009870 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009871 avail = ctxt->input->length -
9872 (ctxt->input->cur - ctxt->input->base);
9873 else
9874 avail = ctxt->input->buf->buffer->use -
9875 (ctxt->input->cur - ctxt->input->base);
9876 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009877
Owen Taylor3473f882001-02-23 17:55:21 +00009878 if ((ctxt->instate != XML_PARSER_EOF) &&
9879 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009880 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009881 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009882 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009883 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009884 }
Owen Taylor3473f882001-02-23 17:55:21 +00009885 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009886 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009887 ctxt->sax->endDocument(ctxt->userData);
9888 }
9889 ctxt->instate = XML_PARSER_EOF;
9890 }
9891 return((xmlParserErrors) ctxt->errNo);
9892}
9893
9894/************************************************************************
9895 * *
9896 * I/O front end functions to the parser *
9897 * *
9898 ************************************************************************/
9899
9900/**
9901 * xmlStopParser:
9902 * @ctxt: an XML parser context
9903 *
9904 * Blocks further parser processing
9905 */
9906void
9907xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009908 if (ctxt == NULL)
9909 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009910 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009911 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009912 if (ctxt->input != NULL)
9913 ctxt->input->cur = BAD_CAST"";
9914}
9915
9916/**
9917 * xmlCreatePushParserCtxt:
9918 * @sax: a SAX handler
9919 * @user_data: The user data returned on SAX callbacks
9920 * @chunk: a pointer to an array of chars
9921 * @size: number of chars in the array
9922 * @filename: an optional file name or URI
9923 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009924 * Create a parser context for using the XML parser in push mode.
9925 * If @buffer and @size are non-NULL, the data is used to detect
9926 * the encoding. The remaining characters will be parsed so they
9927 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009928 * To allow content encoding detection, @size should be >= 4
9929 * The value of @filename is used for fetching external entities
9930 * and error/warning reports.
9931 *
9932 * Returns the new parser context or NULL
9933 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009934
Owen Taylor3473f882001-02-23 17:55:21 +00009935xmlParserCtxtPtr
9936xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9937 const char *chunk, int size, const char *filename) {
9938 xmlParserCtxtPtr ctxt;
9939 xmlParserInputPtr inputStream;
9940 xmlParserInputBufferPtr buf;
9941 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9942
9943 /*
9944 * plug some encoding conversion routines
9945 */
9946 if ((chunk != NULL) && (size >= 4))
9947 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9948
9949 buf = xmlAllocParserInputBuffer(enc);
9950 if (buf == NULL) return(NULL);
9951
9952 ctxt = xmlNewParserCtxt();
9953 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009954 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009955 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009956 return(NULL);
9957 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009958 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009959 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9960 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009961 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009962 xmlFreeParserInputBuffer(buf);
9963 xmlFreeParserCtxt(ctxt);
9964 return(NULL);
9965 }
Owen Taylor3473f882001-02-23 17:55:21 +00009966 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009967#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009968 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009969#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009970 xmlFree(ctxt->sax);
9971 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9972 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009973 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009974 xmlFreeParserInputBuffer(buf);
9975 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009976 return(NULL);
9977 }
9978 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9979 if (user_data != NULL)
9980 ctxt->userData = user_data;
9981 }
9982 if (filename == NULL) {
9983 ctxt->directory = NULL;
9984 } else {
9985 ctxt->directory = xmlParserGetDirectory(filename);
9986 }
9987
9988 inputStream = xmlNewInputStream(ctxt);
9989 if (inputStream == NULL) {
9990 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009991 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009992 return(NULL);
9993 }
9994
9995 if (filename == NULL)
9996 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +00009997 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +00009998 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009999 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010000 if (inputStream->filename == NULL) {
10001 xmlFreeParserCtxt(ctxt);
10002 xmlFreeParserInputBuffer(buf);
10003 return(NULL);
10004 }
10005 }
Owen Taylor3473f882001-02-23 17:55:21 +000010006 inputStream->buf = buf;
10007 inputStream->base = inputStream->buf->buffer->content;
10008 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010009 inputStream->end =
10010 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010011
10012 inputPush(ctxt, inputStream);
10013
10014 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10015 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010016 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10017 int cur = ctxt->input->cur - ctxt->input->base;
10018
Owen Taylor3473f882001-02-23 17:55:21 +000010019 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010020
10021 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10022 ctxt->input->cur = ctxt->input->base + cur;
10023 ctxt->input->end =
10024 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010025#ifdef DEBUG_PUSH
10026 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10027#endif
10028 }
10029
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010030 if (enc != XML_CHAR_ENCODING_NONE) {
10031 xmlSwitchEncoding(ctxt, enc);
10032 }
10033
Owen Taylor3473f882001-02-23 17:55:21 +000010034 return(ctxt);
10035}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010036#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010037
10038/**
10039 * xmlCreateIOParserCtxt:
10040 * @sax: a SAX handler
10041 * @user_data: The user data returned on SAX callbacks
10042 * @ioread: an I/O read function
10043 * @ioclose: an I/O close function
10044 * @ioctx: an I/O handler
10045 * @enc: the charset encoding if known
10046 *
10047 * Create a parser context for using the XML parser with an existing
10048 * I/O stream
10049 *
10050 * Returns the new parser context or NULL
10051 */
10052xmlParserCtxtPtr
10053xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10054 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10055 void *ioctx, xmlCharEncoding enc) {
10056 xmlParserCtxtPtr ctxt;
10057 xmlParserInputPtr inputStream;
10058 xmlParserInputBufferPtr buf;
10059
10060 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10061 if (buf == NULL) return(NULL);
10062
10063 ctxt = xmlNewParserCtxt();
10064 if (ctxt == NULL) {
10065 xmlFree(buf);
10066 return(NULL);
10067 }
10068 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010069#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010070 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010071#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010072 xmlFree(ctxt->sax);
10073 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10074 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010075 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010076 xmlFree(ctxt);
10077 return(NULL);
10078 }
10079 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10080 if (user_data != NULL)
10081 ctxt->userData = user_data;
10082 }
10083
10084 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10085 if (inputStream == NULL) {
10086 xmlFreeParserCtxt(ctxt);
10087 return(NULL);
10088 }
10089 inputPush(ctxt, inputStream);
10090
10091 return(ctxt);
10092}
10093
Daniel Veillard4432df22003-09-28 18:58:27 +000010094#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010095/************************************************************************
10096 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010097 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010098 * *
10099 ************************************************************************/
10100
10101/**
10102 * xmlIOParseDTD:
10103 * @sax: the SAX handler block or NULL
10104 * @input: an Input Buffer
10105 * @enc: the charset encoding if known
10106 *
10107 * Load and parse a DTD
10108 *
10109 * Returns the resulting xmlDtdPtr or NULL in case of error.
10110 * @input will be freed at parsing end.
10111 */
10112
10113xmlDtdPtr
10114xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10115 xmlCharEncoding enc) {
10116 xmlDtdPtr ret = NULL;
10117 xmlParserCtxtPtr ctxt;
10118 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010119 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010120
10121 if (input == NULL)
10122 return(NULL);
10123
10124 ctxt = xmlNewParserCtxt();
10125 if (ctxt == NULL) {
10126 return(NULL);
10127 }
10128
10129 /*
10130 * Set-up the SAX context
10131 */
10132 if (sax != NULL) {
10133 if (ctxt->sax != NULL)
10134 xmlFree(ctxt->sax);
10135 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010136 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010137 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010138 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010139
10140 /*
10141 * generate a parser input from the I/O handler
10142 */
10143
Daniel Veillard43caefb2003-12-07 19:32:22 +000010144 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010145 if (pinput == NULL) {
10146 if (sax != NULL) ctxt->sax = NULL;
10147 xmlFreeParserCtxt(ctxt);
10148 return(NULL);
10149 }
10150
10151 /*
10152 * plug some encoding conversion routines here.
10153 */
10154 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010155 if (enc != XML_CHAR_ENCODING_NONE) {
10156 xmlSwitchEncoding(ctxt, enc);
10157 }
Owen Taylor3473f882001-02-23 17:55:21 +000010158
10159 pinput->filename = NULL;
10160 pinput->line = 1;
10161 pinput->col = 1;
10162 pinput->base = ctxt->input->cur;
10163 pinput->cur = ctxt->input->cur;
10164 pinput->free = NULL;
10165
10166 /*
10167 * let's parse that entity knowing it's an external subset.
10168 */
10169 ctxt->inSubset = 2;
10170 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10171 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10172 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010173
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010174 if ((enc == XML_CHAR_ENCODING_NONE) &&
10175 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010176 /*
10177 * Get the 4 first bytes and decode the charset
10178 * if enc != XML_CHAR_ENCODING_NONE
10179 * plug some encoding conversion routines.
10180 */
10181 start[0] = RAW;
10182 start[1] = NXT(1);
10183 start[2] = NXT(2);
10184 start[3] = NXT(3);
10185 enc = xmlDetectCharEncoding(start, 4);
10186 if (enc != XML_CHAR_ENCODING_NONE) {
10187 xmlSwitchEncoding(ctxt, enc);
10188 }
10189 }
10190
Owen Taylor3473f882001-02-23 17:55:21 +000010191 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10192
10193 if (ctxt->myDoc != NULL) {
10194 if (ctxt->wellFormed) {
10195 ret = ctxt->myDoc->extSubset;
10196 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010197 if (ret != NULL) {
10198 xmlNodePtr tmp;
10199
10200 ret->doc = NULL;
10201 tmp = ret->children;
10202 while (tmp != NULL) {
10203 tmp->doc = NULL;
10204 tmp = tmp->next;
10205 }
10206 }
Owen Taylor3473f882001-02-23 17:55:21 +000010207 } else {
10208 ret = NULL;
10209 }
10210 xmlFreeDoc(ctxt->myDoc);
10211 ctxt->myDoc = NULL;
10212 }
10213 if (sax != NULL) ctxt->sax = NULL;
10214 xmlFreeParserCtxt(ctxt);
10215
10216 return(ret);
10217}
10218
10219/**
10220 * xmlSAXParseDTD:
10221 * @sax: the SAX handler block
10222 * @ExternalID: a NAME* containing the External ID of the DTD
10223 * @SystemID: a NAME* containing the URL to the DTD
10224 *
10225 * Load and parse an external subset.
10226 *
10227 * Returns the resulting xmlDtdPtr or NULL in case of error.
10228 */
10229
10230xmlDtdPtr
10231xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10232 const xmlChar *SystemID) {
10233 xmlDtdPtr ret = NULL;
10234 xmlParserCtxtPtr ctxt;
10235 xmlParserInputPtr input = NULL;
10236 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010237 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010238
10239 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10240
10241 ctxt = xmlNewParserCtxt();
10242 if (ctxt == NULL) {
10243 return(NULL);
10244 }
10245
10246 /*
10247 * Set-up the SAX context
10248 */
10249 if (sax != NULL) {
10250 if (ctxt->sax != NULL)
10251 xmlFree(ctxt->sax);
10252 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010253 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010254 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010255
10256 /*
10257 * Canonicalise the system ID
10258 */
10259 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010260 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010261 xmlFreeParserCtxt(ctxt);
10262 return(NULL);
10263 }
Owen Taylor3473f882001-02-23 17:55:21 +000010264
10265 /*
10266 * Ask the Entity resolver to load the damn thing
10267 */
10268
10269 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010270 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010271 if (input == NULL) {
10272 if (sax != NULL) ctxt->sax = NULL;
10273 xmlFreeParserCtxt(ctxt);
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010274 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010275 return(NULL);
10276 }
10277
10278 /*
10279 * plug some encoding conversion routines here.
10280 */
10281 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010282 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10283 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10284 xmlSwitchEncoding(ctxt, enc);
10285 }
Owen Taylor3473f882001-02-23 17:55:21 +000010286
10287 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010288 input->filename = (char *) systemIdCanonic;
10289 else
10290 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010291 input->line = 1;
10292 input->col = 1;
10293 input->base = ctxt->input->cur;
10294 input->cur = ctxt->input->cur;
10295 input->free = NULL;
10296
10297 /*
10298 * let's parse that entity knowing it's an external subset.
10299 */
10300 ctxt->inSubset = 2;
10301 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10302 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10303 ExternalID, SystemID);
10304 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10305
10306 if (ctxt->myDoc != NULL) {
10307 if (ctxt->wellFormed) {
10308 ret = ctxt->myDoc->extSubset;
10309 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010310 if (ret != NULL) {
10311 xmlNodePtr tmp;
10312
10313 ret->doc = NULL;
10314 tmp = ret->children;
10315 while (tmp != NULL) {
10316 tmp->doc = NULL;
10317 tmp = tmp->next;
10318 }
10319 }
Owen Taylor3473f882001-02-23 17:55:21 +000010320 } else {
10321 ret = NULL;
10322 }
10323 xmlFreeDoc(ctxt->myDoc);
10324 ctxt->myDoc = NULL;
10325 }
10326 if (sax != NULL) ctxt->sax = NULL;
10327 xmlFreeParserCtxt(ctxt);
10328
10329 return(ret);
10330}
10331
Daniel Veillard4432df22003-09-28 18:58:27 +000010332
Owen Taylor3473f882001-02-23 17:55:21 +000010333/**
10334 * xmlParseDTD:
10335 * @ExternalID: a NAME* containing the External ID of the DTD
10336 * @SystemID: a NAME* containing the URL to the DTD
10337 *
10338 * Load and parse an external subset.
10339 *
10340 * Returns the resulting xmlDtdPtr or NULL in case of error.
10341 */
10342
10343xmlDtdPtr
10344xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10345 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10346}
Daniel Veillard4432df22003-09-28 18:58:27 +000010347#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010348
10349/************************************************************************
10350 * *
10351 * Front ends when parsing an Entity *
10352 * *
10353 ************************************************************************/
10354
10355/**
Owen Taylor3473f882001-02-23 17:55:21 +000010356 * xmlParseCtxtExternalEntity:
10357 * @ctx: the existing parsing context
10358 * @URL: the URL for the entity to load
10359 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010360 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010361 *
10362 * Parse an external general entity within an existing parsing context
10363 * An external general parsed entity is well-formed if it matches the
10364 * production labeled extParsedEnt.
10365 *
10366 * [78] extParsedEnt ::= TextDecl? content
10367 *
10368 * Returns 0 if the entity is well formed, -1 in case of args problem and
10369 * the parser error code otherwise
10370 */
10371
10372int
10373xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010374 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010375 xmlParserCtxtPtr ctxt;
10376 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010377 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010378 xmlSAXHandlerPtr oldsax = NULL;
10379 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010380 xmlChar start[4];
10381 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010382
10383 if (ctx->depth > 40) {
10384 return(XML_ERR_ENTITY_LOOP);
10385 }
10386
Daniel Veillardcda96922001-08-21 10:56:31 +000010387 if (lst != NULL)
10388 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010389 if ((URL == NULL) && (ID == NULL))
10390 return(-1);
10391 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10392 return(-1);
10393
10394
10395 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10396 if (ctxt == NULL) return(-1);
10397 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010398 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010399 oldsax = ctxt->sax;
10400 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010401 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010402 newDoc = xmlNewDoc(BAD_CAST "1.0");
10403 if (newDoc == NULL) {
10404 xmlFreeParserCtxt(ctxt);
10405 return(-1);
10406 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010407 if (ctx->myDoc->dict) {
10408 newDoc->dict = ctx->myDoc->dict;
10409 xmlDictReference(newDoc->dict);
10410 }
Owen Taylor3473f882001-02-23 17:55:21 +000010411 if (ctx->myDoc != NULL) {
10412 newDoc->intSubset = ctx->myDoc->intSubset;
10413 newDoc->extSubset = ctx->myDoc->extSubset;
10414 }
10415 if (ctx->myDoc->URL != NULL) {
10416 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10417 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010418 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10419 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010420 ctxt->sax = oldsax;
10421 xmlFreeParserCtxt(ctxt);
10422 newDoc->intSubset = NULL;
10423 newDoc->extSubset = NULL;
10424 xmlFreeDoc(newDoc);
10425 return(-1);
10426 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010427 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010428 nodePush(ctxt, newDoc->children);
10429 if (ctx->myDoc == NULL) {
10430 ctxt->myDoc = newDoc;
10431 } else {
10432 ctxt->myDoc = ctx->myDoc;
10433 newDoc->children->doc = ctx->myDoc;
10434 }
10435
Daniel Veillard87a764e2001-06-20 17:41:10 +000010436 /*
10437 * Get the 4 first bytes and decode the charset
10438 * if enc != XML_CHAR_ENCODING_NONE
10439 * plug some encoding conversion routines.
10440 */
10441 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010442 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10443 start[0] = RAW;
10444 start[1] = NXT(1);
10445 start[2] = NXT(2);
10446 start[3] = NXT(3);
10447 enc = xmlDetectCharEncoding(start, 4);
10448 if (enc != XML_CHAR_ENCODING_NONE) {
10449 xmlSwitchEncoding(ctxt, enc);
10450 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010451 }
10452
Owen Taylor3473f882001-02-23 17:55:21 +000010453 /*
10454 * Parse a possible text declaration first
10455 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010456 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010457 xmlParseTextDecl(ctxt);
10458 }
10459
10460 /*
10461 * Doing validity checking on chunk doesn't make sense
10462 */
10463 ctxt->instate = XML_PARSER_CONTENT;
10464 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010465 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010466 ctxt->loadsubset = ctx->loadsubset;
10467 ctxt->depth = ctx->depth + 1;
10468 ctxt->replaceEntities = ctx->replaceEntities;
10469 if (ctxt->validate) {
10470 ctxt->vctxt.error = ctx->vctxt.error;
10471 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010472 } else {
10473 ctxt->vctxt.error = NULL;
10474 ctxt->vctxt.warning = NULL;
10475 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010476 ctxt->vctxt.nodeTab = NULL;
10477 ctxt->vctxt.nodeNr = 0;
10478 ctxt->vctxt.nodeMax = 0;
10479 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010480 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10481 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010482 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10483 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10484 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010485 ctxt->dictNames = ctx->dictNames;
10486 ctxt->attsDefault = ctx->attsDefault;
10487 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010488 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010489
10490 xmlParseContent(ctxt);
10491
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010492 ctx->validate = ctxt->validate;
10493 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010494 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010495 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010496 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010497 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010498 }
10499 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010500 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010501 }
10502
10503 if (!ctxt->wellFormed) {
10504 if (ctxt->errNo == 0)
10505 ret = 1;
10506 else
10507 ret = ctxt->errNo;
10508 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010509 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010510 xmlNodePtr cur;
10511
10512 /*
10513 * Return the newly created nodeset after unlinking it from
10514 * they pseudo parent.
10515 */
10516 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010517 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010518 while (cur != NULL) {
10519 cur->parent = NULL;
10520 cur = cur->next;
10521 }
10522 newDoc->children->children = NULL;
10523 }
10524 ret = 0;
10525 }
10526 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010527 ctxt->dict = NULL;
10528 ctxt->attsDefault = NULL;
10529 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010530 xmlFreeParserCtxt(ctxt);
10531 newDoc->intSubset = NULL;
10532 newDoc->extSubset = NULL;
10533 xmlFreeDoc(newDoc);
10534
10535 return(ret);
10536}
10537
10538/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010539 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010540 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010541 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010542 * @sax: the SAX handler bloc (possibly NULL)
10543 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10544 * @depth: Used for loop detection, use 0
10545 * @URL: the URL for the entity to load
10546 * @ID: the System ID for the entity to load
10547 * @list: the return value for the set of parsed nodes
10548 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010549 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010550 *
10551 * Returns 0 if the entity is well formed, -1 in case of args problem and
10552 * the parser error code otherwise
10553 */
10554
Daniel Veillard7d515752003-09-26 19:12:37 +000010555static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010556xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10557 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010558 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010559 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlParserCtxtPtr ctxt;
10561 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010562 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010563 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010564 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010565 xmlChar start[4];
10566 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010567
10568 if (depth > 40) {
10569 return(XML_ERR_ENTITY_LOOP);
10570 }
10571
10572
10573
10574 if (list != NULL)
10575 *list = NULL;
10576 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010577 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010578 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010579 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010580
10581
10582 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010583 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010584 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010585 if (oldctxt != NULL) {
10586 ctxt->_private = oldctxt->_private;
10587 ctxt->loadsubset = oldctxt->loadsubset;
10588 ctxt->validate = oldctxt->validate;
10589 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010590 ctxt->record_info = oldctxt->record_info;
10591 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10592 ctxt->node_seq.length = oldctxt->node_seq.length;
10593 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010594 } else {
10595 /*
10596 * Doing validity checking on chunk without context
10597 * doesn't make sense
10598 */
10599 ctxt->_private = NULL;
10600 ctxt->validate = 0;
10601 ctxt->external = 2;
10602 ctxt->loadsubset = 0;
10603 }
Owen Taylor3473f882001-02-23 17:55:21 +000010604 if (sax != NULL) {
10605 oldsax = ctxt->sax;
10606 ctxt->sax = sax;
10607 if (user_data != NULL)
10608 ctxt->userData = user_data;
10609 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010610 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010611 newDoc = xmlNewDoc(BAD_CAST "1.0");
10612 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010613 ctxt->node_seq.maximum = 0;
10614 ctxt->node_seq.length = 0;
10615 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010616 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010617 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010618 }
10619 if (doc != NULL) {
10620 newDoc->intSubset = doc->intSubset;
10621 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010622 newDoc->dict = doc->dict;
10623 } else if (oldctxt != NULL) {
10624 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010625 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010626 xmlDictReference(newDoc->dict);
10627
Owen Taylor3473f882001-02-23 17:55:21 +000010628 if (doc->URL != NULL) {
10629 newDoc->URL = xmlStrdup(doc->URL);
10630 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010631 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10632 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010633 if (sax != NULL)
10634 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010635 ctxt->node_seq.maximum = 0;
10636 ctxt->node_seq.length = 0;
10637 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010638 xmlFreeParserCtxt(ctxt);
10639 newDoc->intSubset = NULL;
10640 newDoc->extSubset = NULL;
10641 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010642 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010643 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010644 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010645 nodePush(ctxt, newDoc->children);
10646 if (doc == NULL) {
10647 ctxt->myDoc = newDoc;
10648 } else {
10649 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010650 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010651 }
10652
Daniel Veillard87a764e2001-06-20 17:41:10 +000010653 /*
10654 * Get the 4 first bytes and decode the charset
10655 * if enc != XML_CHAR_ENCODING_NONE
10656 * plug some encoding conversion routines.
10657 */
10658 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010659 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10660 start[0] = RAW;
10661 start[1] = NXT(1);
10662 start[2] = NXT(2);
10663 start[3] = NXT(3);
10664 enc = xmlDetectCharEncoding(start, 4);
10665 if (enc != XML_CHAR_ENCODING_NONE) {
10666 xmlSwitchEncoding(ctxt, enc);
10667 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010668 }
10669
Owen Taylor3473f882001-02-23 17:55:21 +000010670 /*
10671 * Parse a possible text declaration first
10672 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010673 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674 xmlParseTextDecl(ctxt);
10675 }
10676
Owen Taylor3473f882001-02-23 17:55:21 +000010677 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010678 ctxt->depth = depth;
10679
10680 xmlParseContent(ctxt);
10681
Daniel Veillard561b7f82002-03-20 21:55:57 +000010682 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010684 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010685 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010686 }
10687 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010688 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010689 }
10690
10691 if (!ctxt->wellFormed) {
10692 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010693 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010694 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010695 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010696 } else {
10697 if (list != NULL) {
10698 xmlNodePtr cur;
10699
10700 /*
10701 * Return the newly created nodeset after unlinking it from
10702 * they pseudo parent.
10703 */
10704 cur = newDoc->children->children;
10705 *list = cur;
10706 while (cur != NULL) {
10707 cur->parent = NULL;
10708 cur = cur->next;
10709 }
10710 newDoc->children->children = NULL;
10711 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010712 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010713 }
10714 if (sax != NULL)
10715 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010716 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10717 oldctxt->node_seq.length = ctxt->node_seq.length;
10718 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010719 ctxt->node_seq.maximum = 0;
10720 ctxt->node_seq.length = 0;
10721 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010722 xmlFreeParserCtxt(ctxt);
10723 newDoc->intSubset = NULL;
10724 newDoc->extSubset = NULL;
10725 xmlFreeDoc(newDoc);
10726
10727 return(ret);
10728}
10729
Daniel Veillard81273902003-09-30 00:43:48 +000010730#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010731/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010732 * xmlParseExternalEntity:
10733 * @doc: the document the chunk pertains to
10734 * @sax: the SAX handler bloc (possibly NULL)
10735 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10736 * @depth: Used for loop detection, use 0
10737 * @URL: the URL for the entity to load
10738 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010739 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010740 *
10741 * Parse an external general entity
10742 * An external general parsed entity is well-formed if it matches the
10743 * production labeled extParsedEnt.
10744 *
10745 * [78] extParsedEnt ::= TextDecl? content
10746 *
10747 * Returns 0 if the entity is well formed, -1 in case of args problem and
10748 * the parser error code otherwise
10749 */
10750
10751int
10752xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010753 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010754 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010755 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010756}
10757
10758/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010759 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010760 * @doc: the document the chunk pertains to
10761 * @sax: the SAX handler bloc (possibly NULL)
10762 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10763 * @depth: Used for loop detection, use 0
10764 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010765 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010766 *
10767 * Parse a well-balanced chunk of an XML document
10768 * called by the parser
10769 * The allowed sequence for the Well Balanced Chunk is the one defined by
10770 * the content production in the XML grammar:
10771 *
10772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10773 *
10774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10775 * the parser error code otherwise
10776 */
10777
10778int
10779xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010780 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010781 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10782 depth, string, lst, 0 );
10783}
Daniel Veillard81273902003-09-30 00:43:48 +000010784#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010785
10786/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010787 * xmlParseBalancedChunkMemoryInternal:
10788 * @oldctxt: the existing parsing context
10789 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10790 * @user_data: the user data field for the parser context
10791 * @lst: the return value for the set of parsed nodes
10792 *
10793 *
10794 * Parse a well-balanced chunk of an XML document
10795 * called by the parser
10796 * The allowed sequence for the Well Balanced Chunk is the one defined by
10797 * the content production in the XML grammar:
10798 *
10799 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10800 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010801 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10802 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010803 *
10804 * In case recover is set to 1, the nodelist will not be empty even if
10805 * the parsed chunk is not well balanced.
10806 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010807static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010808xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10809 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10810 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010811 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010812 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010813 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010814 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010815 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010816 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010817 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010818
10819 if (oldctxt->depth > 40) {
10820 return(XML_ERR_ENTITY_LOOP);
10821 }
10822
10823
10824 if (lst != NULL)
10825 *lst = NULL;
10826 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010827 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010828
10829 size = xmlStrlen(string);
10830
10831 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010832 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010833 if (user_data != NULL)
10834 ctxt->userData = user_data;
10835 else
10836 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010837 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10838 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010839 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10840 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10841 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010842
10843 oldsax = ctxt->sax;
10844 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010845 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010846 ctxt->replaceEntities = oldctxt->replaceEntities;
10847 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010848
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010849 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010850 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010851 newDoc = xmlNewDoc(BAD_CAST "1.0");
10852 if (newDoc == NULL) {
10853 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010854 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010855 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010856 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010857 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010858 newDoc->dict = ctxt->dict;
10859 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010860 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010861 } else {
10862 ctxt->myDoc = oldctxt->myDoc;
10863 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010864 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010865 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010866 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10867 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010868 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010869 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010870 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010871 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010872 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010873 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010874 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010875 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010876 ctxt->myDoc->children = NULL;
10877 ctxt->myDoc->last = NULL;
10878 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010879 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010880 ctxt->instate = XML_PARSER_CONTENT;
10881 ctxt->depth = oldctxt->depth + 1;
10882
Daniel Veillard328f48c2002-11-15 15:24:34 +000010883 ctxt->validate = 0;
10884 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010885 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10886 /*
10887 * ID/IDREF registration will be done in xmlValidateElement below
10888 */
10889 ctxt->loadsubset |= XML_SKIP_IDS;
10890 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010891 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010892 ctxt->attsDefault = oldctxt->attsDefault;
10893 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010894
Daniel Veillard68e9e742002-11-16 15:35:11 +000010895 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010896 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010897 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010898 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010899 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010900 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010901 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010902 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010903 }
10904
10905 if (!ctxt->wellFormed) {
10906 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010907 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010908 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010909 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010910 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010911 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010912 }
10913
William M. Brack7b9154b2003-09-27 19:23:50 +000010914 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010915 xmlNodePtr cur;
10916
10917 /*
10918 * Return the newly created nodeset after unlinking it from
10919 * they pseudo parent.
10920 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010921 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010922 *lst = cur;
10923 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010924#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010925 if (oldctxt->validate && oldctxt->wellFormed &&
10926 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10927 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10928 oldctxt->myDoc, cur);
10929 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010930#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010931 cur->parent = NULL;
10932 cur = cur->next;
10933 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010934 ctxt->myDoc->children->children = NULL;
10935 }
10936 if (ctxt->myDoc != NULL) {
10937 xmlFreeNode(ctxt->myDoc->children);
10938 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010939 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010940 }
10941
10942 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010943 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010944 ctxt->attsDefault = NULL;
10945 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010946 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010947 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010948 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010949 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010950
10951 return(ret);
10952}
10953
Daniel Veillard29b17482004-08-16 00:39:03 +000010954/**
10955 * xmlParseInNodeContext:
10956 * @node: the context node
10957 * @data: the input string
10958 * @datalen: the input string length in bytes
10959 * @options: a combination of xmlParserOption
10960 * @lst: the return value for the set of parsed nodes
10961 *
10962 * Parse a well-balanced chunk of an XML document
10963 * within the context (DTD, namespaces, etc ...) of the given node.
10964 *
10965 * The allowed sequence for the data is a Well Balanced Chunk defined by
10966 * the content production in the XML grammar:
10967 *
10968 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10969 *
10970 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10971 * error code otherwise
10972 */
10973xmlParserErrors
10974xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
10975 int options, xmlNodePtr *lst) {
10976#ifdef SAX2
10977 xmlParserCtxtPtr ctxt;
10978 xmlDocPtr doc = NULL;
10979 xmlNodePtr fake, cur;
10980 int nsnr = 0;
10981
10982 xmlParserErrors ret = XML_ERR_OK;
10983
10984 /*
10985 * check all input parameters, grab the document
10986 */
10987 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
10988 return(XML_ERR_INTERNAL_ERROR);
10989 switch (node->type) {
10990 case XML_ELEMENT_NODE:
10991 case XML_ATTRIBUTE_NODE:
10992 case XML_TEXT_NODE:
10993 case XML_CDATA_SECTION_NODE:
10994 case XML_ENTITY_REF_NODE:
10995 case XML_PI_NODE:
10996 case XML_COMMENT_NODE:
10997 case XML_DOCUMENT_NODE:
10998 case XML_HTML_DOCUMENT_NODE:
10999 break;
11000 default:
11001 return(XML_ERR_INTERNAL_ERROR);
11002
11003 }
11004 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11005 (node->type != XML_DOCUMENT_NODE) &&
11006 (node->type != XML_HTML_DOCUMENT_NODE))
11007 node = node->parent;
11008 if (node == NULL)
11009 return(XML_ERR_INTERNAL_ERROR);
11010 if (node->type == XML_ELEMENT_NODE)
11011 doc = node->doc;
11012 else
11013 doc = (xmlDocPtr) node;
11014 if (doc == NULL)
11015 return(XML_ERR_INTERNAL_ERROR);
11016
11017 /*
11018 * allocate a context and set-up everything not related to the
11019 * node position in the tree
11020 */
11021 if (doc->type == XML_DOCUMENT_NODE)
11022 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11023#ifdef LIBXML_HTML_ENABLED
11024 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11025 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11026#endif
11027 else
11028 return(XML_ERR_INTERNAL_ERROR);
11029
11030 if (ctxt == NULL)
11031 return(XML_ERR_NO_MEMORY);
11032 fake = xmlNewComment(NULL);
11033 if (fake == NULL) {
11034 xmlFreeParserCtxt(ctxt);
11035 return(XML_ERR_NO_MEMORY);
11036 }
11037 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011038
11039 /*
11040 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11041 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11042 * we must wait until the last moment to free the original one.
11043 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011044 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011045 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011046 xmlDictFree(ctxt->dict);
11047 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011048 } else
11049 options |= XML_PARSE_NODICT;
11050
11051 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011052 xmlDetectSAX2(ctxt);
11053 ctxt->myDoc = doc;
11054
11055 if (node->type == XML_ELEMENT_NODE) {
11056 nodePush(ctxt, node);
11057 /*
11058 * initialize the SAX2 namespaces stack
11059 */
11060 cur = node;
11061 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11062 xmlNsPtr ns = cur->nsDef;
11063 const xmlChar *iprefix, *ihref;
11064
11065 while (ns != NULL) {
11066 if (ctxt->dict) {
11067 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11068 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11069 } else {
11070 iprefix = ns->prefix;
11071 ihref = ns->href;
11072 }
11073
11074 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11075 nsPush(ctxt, iprefix, ihref);
11076 nsnr++;
11077 }
11078 ns = ns->next;
11079 }
11080 cur = cur->parent;
11081 }
11082 ctxt->instate = XML_PARSER_CONTENT;
11083 }
11084
11085 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11086 /*
11087 * ID/IDREF registration will be done in xmlValidateElement below
11088 */
11089 ctxt->loadsubset |= XML_SKIP_IDS;
11090 }
11091
11092 xmlParseContent(ctxt);
11093 nsPop(ctxt, nsnr);
11094 if ((RAW == '<') && (NXT(1) == '/')) {
11095 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11096 } else if (RAW != 0) {
11097 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11098 }
11099 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11100 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11101 ctxt->wellFormed = 0;
11102 }
11103
11104 if (!ctxt->wellFormed) {
11105 if (ctxt->errNo == 0)
11106 ret = XML_ERR_INTERNAL_ERROR;
11107 else
11108 ret = (xmlParserErrors)ctxt->errNo;
11109 } else {
11110 ret = XML_ERR_OK;
11111 }
11112
11113 /*
11114 * Return the newly created nodeset after unlinking it from
11115 * the pseudo sibling.
11116 */
11117
11118 cur = fake->next;
11119 fake->next = NULL;
11120 node->last = fake;
11121
11122 if (cur != NULL) {
11123 cur->prev = NULL;
11124 }
11125
11126 *lst = cur;
11127
11128 while (cur != NULL) {
11129 cur->parent = NULL;
11130 cur = cur->next;
11131 }
11132
11133 xmlUnlinkNode(fake);
11134 xmlFreeNode(fake);
11135
11136
11137 if (ret != XML_ERR_OK) {
11138 xmlFreeNodeList(*lst);
11139 *lst = NULL;
11140 }
William M. Brackc3f81342004-10-03 01:22:44 +000011141
William M. Brackb7b54de2004-10-06 16:38:01 +000011142 if (doc->dict != NULL)
11143 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011144 xmlFreeParserCtxt(ctxt);
11145
11146 return(ret);
11147#else /* !SAX2 */
11148 return(XML_ERR_INTERNAL_ERROR);
11149#endif
11150}
11151
Daniel Veillard81273902003-09-30 00:43:48 +000011152#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011153/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011154 * xmlParseBalancedChunkMemoryRecover:
11155 * @doc: the document the chunk pertains to
11156 * @sax: the SAX handler bloc (possibly NULL)
11157 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11158 * @depth: Used for loop detection, use 0
11159 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11160 * @lst: the return value for the set of parsed nodes
11161 * @recover: return nodes even if the data is broken (use 0)
11162 *
11163 *
11164 * Parse a well-balanced chunk of an XML document
11165 * called by the parser
11166 * The allowed sequence for the Well Balanced Chunk is the one defined by
11167 * the content production in the XML grammar:
11168 *
11169 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11170 *
11171 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11172 * the parser error code otherwise
11173 *
11174 * In case recover is set to 1, the nodelist will not be empty even if
11175 * the parsed chunk is not well balanced.
11176 */
11177int
11178xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11179 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11180 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011181 xmlParserCtxtPtr ctxt;
11182 xmlDocPtr newDoc;
11183 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011184 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011185 int size;
11186 int ret = 0;
11187
11188 if (depth > 40) {
11189 return(XML_ERR_ENTITY_LOOP);
11190 }
11191
11192
Daniel Veillardcda96922001-08-21 10:56:31 +000011193 if (lst != NULL)
11194 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011195 if (string == NULL)
11196 return(-1);
11197
11198 size = xmlStrlen(string);
11199
11200 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11201 if (ctxt == NULL) return(-1);
11202 ctxt->userData = ctxt;
11203 if (sax != NULL) {
11204 oldsax = ctxt->sax;
11205 ctxt->sax = sax;
11206 if (user_data != NULL)
11207 ctxt->userData = user_data;
11208 }
11209 newDoc = xmlNewDoc(BAD_CAST "1.0");
11210 if (newDoc == NULL) {
11211 xmlFreeParserCtxt(ctxt);
11212 return(-1);
11213 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011214 if ((doc != NULL) && (doc->dict != NULL)) {
11215 xmlDictFree(ctxt->dict);
11216 ctxt->dict = doc->dict;
11217 xmlDictReference(ctxt->dict);
11218 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11219 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11220 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11221 ctxt->dictNames = 1;
11222 } else {
11223 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11224 }
Owen Taylor3473f882001-02-23 17:55:21 +000011225 if (doc != NULL) {
11226 newDoc->intSubset = doc->intSubset;
11227 newDoc->extSubset = doc->extSubset;
11228 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011229 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11230 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011231 if (sax != NULL)
11232 ctxt->sax = oldsax;
11233 xmlFreeParserCtxt(ctxt);
11234 newDoc->intSubset = NULL;
11235 newDoc->extSubset = NULL;
11236 xmlFreeDoc(newDoc);
11237 return(-1);
11238 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011239 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11240 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011241 if (doc == NULL) {
11242 ctxt->myDoc = newDoc;
11243 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011244 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011245 newDoc->children->doc = doc;
11246 }
11247 ctxt->instate = XML_PARSER_CONTENT;
11248 ctxt->depth = depth;
11249
11250 /*
11251 * Doing validity checking on chunk doesn't make sense
11252 */
11253 ctxt->validate = 0;
11254 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011255 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011256
Daniel Veillardb39bc392002-10-26 19:29:51 +000011257 if ( doc != NULL ){
11258 content = doc->children;
11259 doc->children = NULL;
11260 xmlParseContent(ctxt);
11261 doc->children = content;
11262 }
11263 else {
11264 xmlParseContent(ctxt);
11265 }
Owen Taylor3473f882001-02-23 17:55:21 +000011266 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011267 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011268 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011269 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011270 }
11271 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011272 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011273 }
11274
11275 if (!ctxt->wellFormed) {
11276 if (ctxt->errNo == 0)
11277 ret = 1;
11278 else
11279 ret = ctxt->errNo;
11280 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011281 ret = 0;
11282 }
11283
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011284 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11285 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011286
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011287 /*
11288 * Return the newly created nodeset after unlinking it from
11289 * they pseudo parent.
11290 */
11291 cur = newDoc->children->children;
11292 *lst = cur;
11293 while (cur != NULL) {
11294 xmlSetTreeDoc(cur, doc);
11295 cur->parent = NULL;
11296 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011297 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011298 newDoc->children->children = NULL;
11299 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011300
Owen Taylor3473f882001-02-23 17:55:21 +000011301 if (sax != NULL)
11302 ctxt->sax = oldsax;
11303 xmlFreeParserCtxt(ctxt);
11304 newDoc->intSubset = NULL;
11305 newDoc->extSubset = NULL;
11306 xmlFreeDoc(newDoc);
11307
11308 return(ret);
11309}
11310
11311/**
11312 * xmlSAXParseEntity:
11313 * @sax: the SAX handler block
11314 * @filename: the filename
11315 *
11316 * parse an XML external entity out of context and build a tree.
11317 * It use the given SAX function block to handle the parsing callback.
11318 * If sax is NULL, fallback to the default DOM tree building routines.
11319 *
11320 * [78] extParsedEnt ::= TextDecl? content
11321 *
11322 * This correspond to a "Well Balanced" chunk
11323 *
11324 * Returns the resulting document tree
11325 */
11326
11327xmlDocPtr
11328xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11329 xmlDocPtr ret;
11330 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011331
11332 ctxt = xmlCreateFileParserCtxt(filename);
11333 if (ctxt == NULL) {
11334 return(NULL);
11335 }
11336 if (sax != NULL) {
11337 if (ctxt->sax != NULL)
11338 xmlFree(ctxt->sax);
11339 ctxt->sax = sax;
11340 ctxt->userData = NULL;
11341 }
11342
Owen Taylor3473f882001-02-23 17:55:21 +000011343 xmlParseExtParsedEnt(ctxt);
11344
11345 if (ctxt->wellFormed)
11346 ret = ctxt->myDoc;
11347 else {
11348 ret = NULL;
11349 xmlFreeDoc(ctxt->myDoc);
11350 ctxt->myDoc = NULL;
11351 }
11352 if (sax != NULL)
11353 ctxt->sax = NULL;
11354 xmlFreeParserCtxt(ctxt);
11355
11356 return(ret);
11357}
11358
11359/**
11360 * xmlParseEntity:
11361 * @filename: the filename
11362 *
11363 * parse an XML external entity out of context and build a tree.
11364 *
11365 * [78] extParsedEnt ::= TextDecl? content
11366 *
11367 * This correspond to a "Well Balanced" chunk
11368 *
11369 * Returns the resulting document tree
11370 */
11371
11372xmlDocPtr
11373xmlParseEntity(const char *filename) {
11374 return(xmlSAXParseEntity(NULL, filename));
11375}
Daniel Veillard81273902003-09-30 00:43:48 +000011376#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011377
11378/**
11379 * xmlCreateEntityParserCtxt:
11380 * @URL: the entity URL
11381 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011382 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011383 *
11384 * Create a parser context for an external entity
11385 * Automatic support for ZLIB/Compress compressed document is provided
11386 * by default if found at compile-time.
11387 *
11388 * Returns the new parser context or NULL
11389 */
11390xmlParserCtxtPtr
11391xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11392 const xmlChar *base) {
11393 xmlParserCtxtPtr ctxt;
11394 xmlParserInputPtr inputStream;
11395 char *directory = NULL;
11396 xmlChar *uri;
11397
11398 ctxt = xmlNewParserCtxt();
11399 if (ctxt == NULL) {
11400 return(NULL);
11401 }
11402
11403 uri = xmlBuildURI(URL, base);
11404
11405 if (uri == NULL) {
11406 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11407 if (inputStream == NULL) {
11408 xmlFreeParserCtxt(ctxt);
11409 return(NULL);
11410 }
11411
11412 inputPush(ctxt, inputStream);
11413
11414 if ((ctxt->directory == NULL) && (directory == NULL))
11415 directory = xmlParserGetDirectory((char *)URL);
11416 if ((ctxt->directory == NULL) && (directory != NULL))
11417 ctxt->directory = directory;
11418 } else {
11419 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11420 if (inputStream == NULL) {
11421 xmlFree(uri);
11422 xmlFreeParserCtxt(ctxt);
11423 return(NULL);
11424 }
11425
11426 inputPush(ctxt, inputStream);
11427
11428 if ((ctxt->directory == NULL) && (directory == NULL))
11429 directory = xmlParserGetDirectory((char *)uri);
11430 if ((ctxt->directory == NULL) && (directory != NULL))
11431 ctxt->directory = directory;
11432 xmlFree(uri);
11433 }
Owen Taylor3473f882001-02-23 17:55:21 +000011434 return(ctxt);
11435}
11436
11437/************************************************************************
11438 * *
11439 * Front ends when parsing from a file *
11440 * *
11441 ************************************************************************/
11442
11443/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011444 * xmlCreateURLParserCtxt:
11445 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011446 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011447 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011448 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011449 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011450 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011451 *
11452 * Returns the new parser context or NULL
11453 */
11454xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011455xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011456{
11457 xmlParserCtxtPtr ctxt;
11458 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011459 char *directory = NULL;
11460
Owen Taylor3473f882001-02-23 17:55:21 +000011461 ctxt = xmlNewParserCtxt();
11462 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011463 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011464 return(NULL);
11465 }
11466
Daniel Veillard61b93382003-11-03 14:28:31 +000011467 if (options != 0)
11468 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011469
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011470 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011471 if (inputStream == NULL) {
11472 xmlFreeParserCtxt(ctxt);
11473 return(NULL);
11474 }
11475
Owen Taylor3473f882001-02-23 17:55:21 +000011476 inputPush(ctxt, inputStream);
11477 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011478 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011479 if ((ctxt->directory == NULL) && (directory != NULL))
11480 ctxt->directory = directory;
11481
11482 return(ctxt);
11483}
11484
Daniel Veillard61b93382003-11-03 14:28:31 +000011485/**
11486 * xmlCreateFileParserCtxt:
11487 * @filename: the filename
11488 *
11489 * Create a parser context for a file content.
11490 * Automatic support for ZLIB/Compress compressed document is provided
11491 * by default if found at compile-time.
11492 *
11493 * Returns the new parser context or NULL
11494 */
11495xmlParserCtxtPtr
11496xmlCreateFileParserCtxt(const char *filename)
11497{
11498 return(xmlCreateURLParserCtxt(filename, 0));
11499}
11500
Daniel Veillard81273902003-09-30 00:43:48 +000011501#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011502/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011503 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011504 * @sax: the SAX handler block
11505 * @filename: the filename
11506 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11507 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011508 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011509 *
11510 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11511 * compressed document is provided by default if found at compile-time.
11512 * It use the given SAX function block to handle the parsing callback.
11513 * If sax is NULL, fallback to the default DOM tree building routines.
11514 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011515 * User data (void *) is stored within the parser context in the
11516 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011517 *
Owen Taylor3473f882001-02-23 17:55:21 +000011518 * Returns the resulting document tree
11519 */
11520
11521xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011522xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11523 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011524 xmlDocPtr ret;
11525 xmlParserCtxtPtr ctxt;
11526 char *directory = NULL;
11527
Daniel Veillard635ef722001-10-29 11:48:19 +000011528 xmlInitParser();
11529
Owen Taylor3473f882001-02-23 17:55:21 +000011530 ctxt = xmlCreateFileParserCtxt(filename);
11531 if (ctxt == NULL) {
11532 return(NULL);
11533 }
11534 if (sax != NULL) {
11535 if (ctxt->sax != NULL)
11536 xmlFree(ctxt->sax);
11537 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011538 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011539 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011540 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011541 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011542 }
Owen Taylor3473f882001-02-23 17:55:21 +000011543
11544 if ((ctxt->directory == NULL) && (directory == NULL))
11545 directory = xmlParserGetDirectory(filename);
11546 if ((ctxt->directory == NULL) && (directory != NULL))
11547 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11548
Daniel Veillarddad3f682002-11-17 16:47:27 +000011549 ctxt->recovery = recovery;
11550
Owen Taylor3473f882001-02-23 17:55:21 +000011551 xmlParseDocument(ctxt);
11552
William M. Brackc07329e2003-09-08 01:57:30 +000011553 if ((ctxt->wellFormed) || recovery) {
11554 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011555 if (ret != NULL) {
11556 if (ctxt->input->buf->compressed > 0)
11557 ret->compression = 9;
11558 else
11559 ret->compression = ctxt->input->buf->compressed;
11560 }
William M. Brackc07329e2003-09-08 01:57:30 +000011561 }
Owen Taylor3473f882001-02-23 17:55:21 +000011562 else {
11563 ret = NULL;
11564 xmlFreeDoc(ctxt->myDoc);
11565 ctxt->myDoc = NULL;
11566 }
11567 if (sax != NULL)
11568 ctxt->sax = NULL;
11569 xmlFreeParserCtxt(ctxt);
11570
11571 return(ret);
11572}
11573
11574/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011575 * xmlSAXParseFile:
11576 * @sax: the SAX handler block
11577 * @filename: the filename
11578 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11579 * documents
11580 *
11581 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11582 * compressed document is provided by default if found at compile-time.
11583 * It use the given SAX function block to handle the parsing callback.
11584 * If sax is NULL, fallback to the default DOM tree building routines.
11585 *
11586 * Returns the resulting document tree
11587 */
11588
11589xmlDocPtr
11590xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11591 int recovery) {
11592 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11593}
11594
11595/**
Owen Taylor3473f882001-02-23 17:55:21 +000011596 * xmlRecoverDoc:
11597 * @cur: a pointer to an array of xmlChar
11598 *
11599 * parse an XML in-memory document and build a tree.
11600 * In the case the document is not Well Formed, a tree is built anyway
11601 *
11602 * Returns the resulting document tree
11603 */
11604
11605xmlDocPtr
11606xmlRecoverDoc(xmlChar *cur) {
11607 return(xmlSAXParseDoc(NULL, cur, 1));
11608}
11609
11610/**
11611 * xmlParseFile:
11612 * @filename: the filename
11613 *
11614 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11615 * compressed document is provided by default if found at compile-time.
11616 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011617 * Returns the resulting document tree if the file was wellformed,
11618 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011619 */
11620
11621xmlDocPtr
11622xmlParseFile(const char *filename) {
11623 return(xmlSAXParseFile(NULL, filename, 0));
11624}
11625
11626/**
11627 * xmlRecoverFile:
11628 * @filename: the filename
11629 *
11630 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11631 * compressed document is provided by default if found at compile-time.
11632 * In the case the document is not Well Formed, a tree is built anyway
11633 *
11634 * Returns the resulting document tree
11635 */
11636
11637xmlDocPtr
11638xmlRecoverFile(const char *filename) {
11639 return(xmlSAXParseFile(NULL, filename, 1));
11640}
11641
11642
11643/**
11644 * xmlSetupParserForBuffer:
11645 * @ctxt: an XML parser context
11646 * @buffer: a xmlChar * buffer
11647 * @filename: a file name
11648 *
11649 * Setup the parser context to parse a new buffer; Clears any prior
11650 * contents from the parser context. The buffer parameter must not be
11651 * NULL, but the filename parameter can be
11652 */
11653void
11654xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11655 const char* filename)
11656{
11657 xmlParserInputPtr input;
11658
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011659 if ((ctxt == NULL) || (buffer == NULL))
11660 return;
11661
Owen Taylor3473f882001-02-23 17:55:21 +000011662 input = xmlNewInputStream(ctxt);
11663 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011664 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011665 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011666 return;
11667 }
11668
11669 xmlClearParserCtxt(ctxt);
11670 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011671 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011672 input->base = buffer;
11673 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011674 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011675 inputPush(ctxt, input);
11676}
11677
11678/**
11679 * xmlSAXUserParseFile:
11680 * @sax: a SAX handler
11681 * @user_data: The user data returned on SAX callbacks
11682 * @filename: a file name
11683 *
11684 * parse an XML file and call the given SAX handler routines.
11685 * Automatic support for ZLIB/Compress compressed document is provided
11686 *
11687 * Returns 0 in case of success or a error number otherwise
11688 */
11689int
11690xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11691 const char *filename) {
11692 int ret = 0;
11693 xmlParserCtxtPtr ctxt;
11694
11695 ctxt = xmlCreateFileParserCtxt(filename);
11696 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011697#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011698 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011699#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011700 xmlFree(ctxt->sax);
11701 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011702 xmlDetectSAX2(ctxt);
11703
Owen Taylor3473f882001-02-23 17:55:21 +000011704 if (user_data != NULL)
11705 ctxt->userData = user_data;
11706
11707 xmlParseDocument(ctxt);
11708
11709 if (ctxt->wellFormed)
11710 ret = 0;
11711 else {
11712 if (ctxt->errNo != 0)
11713 ret = ctxt->errNo;
11714 else
11715 ret = -1;
11716 }
11717 if (sax != NULL)
11718 ctxt->sax = NULL;
11719 xmlFreeParserCtxt(ctxt);
11720
11721 return ret;
11722}
Daniel Veillard81273902003-09-30 00:43:48 +000011723#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011724
11725/************************************************************************
11726 * *
11727 * Front ends when parsing from memory *
11728 * *
11729 ************************************************************************/
11730
11731/**
11732 * xmlCreateMemoryParserCtxt:
11733 * @buffer: a pointer to a char array
11734 * @size: the size of the array
11735 *
11736 * Create a parser context for an XML in-memory document.
11737 *
11738 * Returns the new parser context or NULL
11739 */
11740xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011741xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011742 xmlParserCtxtPtr ctxt;
11743 xmlParserInputPtr input;
11744 xmlParserInputBufferPtr buf;
11745
11746 if (buffer == NULL)
11747 return(NULL);
11748 if (size <= 0)
11749 return(NULL);
11750
11751 ctxt = xmlNewParserCtxt();
11752 if (ctxt == NULL)
11753 return(NULL);
11754
Daniel Veillard53350552003-09-18 13:35:51 +000011755 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011756 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011757 if (buf == NULL) {
11758 xmlFreeParserCtxt(ctxt);
11759 return(NULL);
11760 }
Owen Taylor3473f882001-02-23 17:55:21 +000011761
11762 input = xmlNewInputStream(ctxt);
11763 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011764 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011765 xmlFreeParserCtxt(ctxt);
11766 return(NULL);
11767 }
11768
11769 input->filename = NULL;
11770 input->buf = buf;
11771 input->base = input->buf->buffer->content;
11772 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011773 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011774
11775 inputPush(ctxt, input);
11776 return(ctxt);
11777}
11778
Daniel Veillard81273902003-09-30 00:43:48 +000011779#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011780/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011781 * xmlSAXParseMemoryWithData:
11782 * @sax: the SAX handler block
11783 * @buffer: an pointer to a char array
11784 * @size: the size of the array
11785 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11786 * documents
11787 * @data: the userdata
11788 *
11789 * parse an XML in-memory block and use the given SAX function block
11790 * to handle the parsing callback. If sax is NULL, fallback to the default
11791 * DOM tree building routines.
11792 *
11793 * User data (void *) is stored within the parser context in the
11794 * context's _private member, so it is available nearly everywhere in libxml
11795 *
11796 * Returns the resulting document tree
11797 */
11798
11799xmlDocPtr
11800xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11801 int size, int recovery, void *data) {
11802 xmlDocPtr ret;
11803 xmlParserCtxtPtr ctxt;
11804
11805 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11806 if (ctxt == NULL) return(NULL);
11807 if (sax != NULL) {
11808 if (ctxt->sax != NULL)
11809 xmlFree(ctxt->sax);
11810 ctxt->sax = sax;
11811 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011812 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011813 if (data!=NULL) {
11814 ctxt->_private=data;
11815 }
11816
Daniel Veillardadba5f12003-04-04 16:09:01 +000011817 ctxt->recovery = recovery;
11818
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011819 xmlParseDocument(ctxt);
11820
11821 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11822 else {
11823 ret = NULL;
11824 xmlFreeDoc(ctxt->myDoc);
11825 ctxt->myDoc = NULL;
11826 }
11827 if (sax != NULL)
11828 ctxt->sax = NULL;
11829 xmlFreeParserCtxt(ctxt);
11830
11831 return(ret);
11832}
11833
11834/**
Owen Taylor3473f882001-02-23 17:55:21 +000011835 * xmlSAXParseMemory:
11836 * @sax: the SAX handler block
11837 * @buffer: an pointer to a char array
11838 * @size: the size of the array
11839 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11840 * documents
11841 *
11842 * parse an XML in-memory block and use the given SAX function block
11843 * to handle the parsing callback. If sax is NULL, fallback to the default
11844 * DOM tree building routines.
11845 *
11846 * Returns the resulting document tree
11847 */
11848xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011849xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11850 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011851 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011852}
11853
11854/**
11855 * xmlParseMemory:
11856 * @buffer: an pointer to a char array
11857 * @size: the size of the array
11858 *
11859 * parse an XML in-memory block and build a tree.
11860 *
11861 * Returns the resulting document tree
11862 */
11863
Daniel Veillard50822cb2001-07-26 20:05:51 +000011864xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011865 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11866}
11867
11868/**
11869 * xmlRecoverMemory:
11870 * @buffer: an pointer to a char array
11871 * @size: the size of the array
11872 *
11873 * parse an XML in-memory block and build a tree.
11874 * In the case the document is not Well Formed, a tree is built anyway
11875 *
11876 * Returns the resulting document tree
11877 */
11878
Daniel Veillard50822cb2001-07-26 20:05:51 +000011879xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011880 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11881}
11882
11883/**
11884 * xmlSAXUserParseMemory:
11885 * @sax: a SAX handler
11886 * @user_data: The user data returned on SAX callbacks
11887 * @buffer: an in-memory XML document input
11888 * @size: the length of the XML document in bytes
11889 *
11890 * A better SAX parsing routine.
11891 * parse an XML in-memory buffer and call the given SAX handler routines.
11892 *
11893 * Returns 0 in case of success or a error number otherwise
11894 */
11895int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011896 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011897 int ret = 0;
11898 xmlParserCtxtPtr ctxt;
11899 xmlSAXHandlerPtr oldsax = NULL;
11900
Daniel Veillard9e923512002-08-14 08:48:52 +000011901 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011902 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11903 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011904 oldsax = ctxt->sax;
11905 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011906 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011907 if (user_data != NULL)
11908 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011909
11910 xmlParseDocument(ctxt);
11911
11912 if (ctxt->wellFormed)
11913 ret = 0;
11914 else {
11915 if (ctxt->errNo != 0)
11916 ret = ctxt->errNo;
11917 else
11918 ret = -1;
11919 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011920 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011921 xmlFreeParserCtxt(ctxt);
11922
11923 return ret;
11924}
Daniel Veillard81273902003-09-30 00:43:48 +000011925#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011926
11927/**
11928 * xmlCreateDocParserCtxt:
11929 * @cur: a pointer to an array of xmlChar
11930 *
11931 * Creates a parser context for an XML in-memory document.
11932 *
11933 * Returns the new parser context or NULL
11934 */
11935xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011936xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011937 int len;
11938
11939 if (cur == NULL)
11940 return(NULL);
11941 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011942 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011943}
11944
Daniel Veillard81273902003-09-30 00:43:48 +000011945#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011946/**
11947 * xmlSAXParseDoc:
11948 * @sax: the SAX handler block
11949 * @cur: a pointer to an array of xmlChar
11950 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11951 * documents
11952 *
11953 * parse an XML in-memory document and build a tree.
11954 * It use the given SAX function block to handle the parsing callback.
11955 * If sax is NULL, fallback to the default DOM tree building routines.
11956 *
11957 * Returns the resulting document tree
11958 */
11959
11960xmlDocPtr
11961xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11962 xmlDocPtr ret;
11963 xmlParserCtxtPtr ctxt;
11964
11965 if (cur == NULL) return(NULL);
11966
11967
11968 ctxt = xmlCreateDocParserCtxt(cur);
11969 if (ctxt == NULL) return(NULL);
11970 if (sax != NULL) {
11971 ctxt->sax = sax;
11972 ctxt->userData = NULL;
11973 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011974 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011975
11976 xmlParseDocument(ctxt);
11977 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11978 else {
11979 ret = NULL;
11980 xmlFreeDoc(ctxt->myDoc);
11981 ctxt->myDoc = NULL;
11982 }
11983 if (sax != NULL)
11984 ctxt->sax = NULL;
11985 xmlFreeParserCtxt(ctxt);
11986
11987 return(ret);
11988}
11989
11990/**
11991 * xmlParseDoc:
11992 * @cur: a pointer to an array of xmlChar
11993 *
11994 * parse an XML in-memory document and build a tree.
11995 *
11996 * Returns the resulting document tree
11997 */
11998
11999xmlDocPtr
12000xmlParseDoc(xmlChar *cur) {
12001 return(xmlSAXParseDoc(NULL, cur, 0));
12002}
Daniel Veillard81273902003-09-30 00:43:48 +000012003#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012004
Daniel Veillard81273902003-09-30 00:43:48 +000012005#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012006/************************************************************************
12007 * *
12008 * Specific function to keep track of entities references *
12009 * and used by the XSLT debugger *
12010 * *
12011 ************************************************************************/
12012
12013static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12014
12015/**
12016 * xmlAddEntityReference:
12017 * @ent : A valid entity
12018 * @firstNode : A valid first node for children of entity
12019 * @lastNode : A valid last node of children entity
12020 *
12021 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12022 */
12023static void
12024xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12025 xmlNodePtr lastNode)
12026{
12027 if (xmlEntityRefFunc != NULL) {
12028 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12029 }
12030}
12031
12032
12033/**
12034 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012035 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012036 *
12037 * Set the function to call call back when a xml reference has been made
12038 */
12039void
12040xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12041{
12042 xmlEntityRefFunc = func;
12043}
Daniel Veillard81273902003-09-30 00:43:48 +000012044#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012045
12046/************************************************************************
12047 * *
12048 * Miscellaneous *
12049 * *
12050 ************************************************************************/
12051
12052#ifdef LIBXML_XPATH_ENABLED
12053#include <libxml/xpath.h>
12054#endif
12055
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012056extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012057static int xmlParserInitialized = 0;
12058
12059/**
12060 * xmlInitParser:
12061 *
12062 * Initialization function for the XML parser.
12063 * This is not reentrant. Call once before processing in case of
12064 * use in multithreaded programs.
12065 */
12066
12067void
12068xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012069 if (xmlParserInitialized != 0)
12070 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012071
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012072 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12073 (xmlGenericError == NULL))
12074 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012075 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012076 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012077 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012078 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012079 xmlDefaultSAXHandlerInit();
12080 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012081#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012082 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012083#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012084#ifdef LIBXML_HTML_ENABLED
12085 htmlInitAutoClose();
12086 htmlDefaultSAXHandlerInit();
12087#endif
12088#ifdef LIBXML_XPATH_ENABLED
12089 xmlXPathInit();
12090#endif
12091 xmlParserInitialized = 1;
12092}
12093
12094/**
12095 * xmlCleanupParser:
12096 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012097 * Cleanup function for the XML library. It tries to reclaim all
12098 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012099 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012100 * function should not prevent reusing the library but one should
12101 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012102 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012103 */
12104
12105void
12106xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012107 if (!xmlParserInitialized)
12108 return;
12109
Owen Taylor3473f882001-02-23 17:55:21 +000012110 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012111#ifdef LIBXML_CATALOG_ENABLED
12112 xmlCatalogCleanup();
12113#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012114 xmlCleanupInputCallbacks();
12115#ifdef LIBXML_OUTPUT_ENABLED
12116 xmlCleanupOutputCallbacks();
12117#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012118#ifdef LIBXML_SCHEMAS_ENABLED
12119 xmlSchemaCleanupTypes();
12120#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012121 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012122 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012123 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012124 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012125 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012126}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012127
12128/************************************************************************
12129 * *
12130 * New set (2.6.0) of simpler and more flexible APIs *
12131 * *
12132 ************************************************************************/
12133
12134/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012135 * DICT_FREE:
12136 * @str: a string
12137 *
12138 * Free a string if it is not owned by the "dict" dictionnary in the
12139 * current scope
12140 */
12141#define DICT_FREE(str) \
12142 if ((str) && ((!dict) || \
12143 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12144 xmlFree((char *)(str));
12145
12146/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012147 * xmlCtxtReset:
12148 * @ctxt: an XML parser context
12149 *
12150 * Reset a parser context
12151 */
12152void
12153xmlCtxtReset(xmlParserCtxtPtr ctxt)
12154{
12155 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012156 xmlDictPtr dict;
12157
12158 if (ctxt == NULL)
12159 return;
12160
12161 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012162
12163 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12164 xmlFreeInputStream(input);
12165 }
12166 ctxt->inputNr = 0;
12167 ctxt->input = NULL;
12168
12169 ctxt->spaceNr = 0;
12170 ctxt->spaceTab[0] = -1;
12171 ctxt->space = &ctxt->spaceTab[0];
12172
12173
12174 ctxt->nodeNr = 0;
12175 ctxt->node = NULL;
12176
12177 ctxt->nameNr = 0;
12178 ctxt->name = NULL;
12179
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012180 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012181 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012182 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012183 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012184 DICT_FREE(ctxt->directory);
12185 ctxt->directory = NULL;
12186 DICT_FREE(ctxt->extSubURI);
12187 ctxt->extSubURI = NULL;
12188 DICT_FREE(ctxt->extSubSystem);
12189 ctxt->extSubSystem = NULL;
12190 if (ctxt->myDoc != NULL)
12191 xmlFreeDoc(ctxt->myDoc);
12192 ctxt->myDoc = NULL;
12193
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012194 ctxt->standalone = -1;
12195 ctxt->hasExternalSubset = 0;
12196 ctxt->hasPErefs = 0;
12197 ctxt->html = 0;
12198 ctxt->external = 0;
12199 ctxt->instate = XML_PARSER_START;
12200 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012201
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012202 ctxt->wellFormed = 1;
12203 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012204 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012205 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012206#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012207 ctxt->vctxt.userData = ctxt;
12208 ctxt->vctxt.error = xmlParserValidityError;
12209 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012210#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012211 ctxt->record_info = 0;
12212 ctxt->nbChars = 0;
12213 ctxt->checkIndex = 0;
12214 ctxt->inSubset = 0;
12215 ctxt->errNo = XML_ERR_OK;
12216 ctxt->depth = 0;
12217 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12218 ctxt->catalogs = NULL;
12219 xmlInitNodeInfoSeq(&ctxt->node_seq);
12220
12221 if (ctxt->attsDefault != NULL) {
12222 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12223 ctxt->attsDefault = NULL;
12224 }
12225 if (ctxt->attsSpecial != NULL) {
12226 xmlHashFree(ctxt->attsSpecial, NULL);
12227 ctxt->attsSpecial = NULL;
12228 }
12229
Daniel Veillard4432df22003-09-28 18:58:27 +000012230#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012231 if (ctxt->catalogs != NULL)
12232 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012233#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012234 if (ctxt->lastError.code != XML_ERR_OK)
12235 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012236}
12237
12238/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012239 * xmlCtxtResetPush:
12240 * @ctxt: an XML parser context
12241 * @chunk: a pointer to an array of chars
12242 * @size: number of chars in the array
12243 * @filename: an optional file name or URI
12244 * @encoding: the document encoding, or NULL
12245 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012246 * Reset a push parser context
12247 *
12248 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012249 */
12250int
12251xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12252 int size, const char *filename, const char *encoding)
12253{
12254 xmlParserInputPtr inputStream;
12255 xmlParserInputBufferPtr buf;
12256 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12257
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012258 if (ctxt == NULL)
12259 return(1);
12260
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012261 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12262 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12263
12264 buf = xmlAllocParserInputBuffer(enc);
12265 if (buf == NULL)
12266 return(1);
12267
12268 if (ctxt == NULL) {
12269 xmlFreeParserInputBuffer(buf);
12270 return(1);
12271 }
12272
12273 xmlCtxtReset(ctxt);
12274
12275 if (ctxt->pushTab == NULL) {
12276 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12277 sizeof(xmlChar *));
12278 if (ctxt->pushTab == NULL) {
12279 xmlErrMemory(ctxt, NULL);
12280 xmlFreeParserInputBuffer(buf);
12281 return(1);
12282 }
12283 }
12284
12285 if (filename == NULL) {
12286 ctxt->directory = NULL;
12287 } else {
12288 ctxt->directory = xmlParserGetDirectory(filename);
12289 }
12290
12291 inputStream = xmlNewInputStream(ctxt);
12292 if (inputStream == NULL) {
12293 xmlFreeParserInputBuffer(buf);
12294 return(1);
12295 }
12296
12297 if (filename == NULL)
12298 inputStream->filename = NULL;
12299 else
12300 inputStream->filename = (char *)
12301 xmlCanonicPath((const xmlChar *) filename);
12302 inputStream->buf = buf;
12303 inputStream->base = inputStream->buf->buffer->content;
12304 inputStream->cur = inputStream->buf->buffer->content;
12305 inputStream->end =
12306 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12307
12308 inputPush(ctxt, inputStream);
12309
12310 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12311 (ctxt->input->buf != NULL)) {
12312 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12313 int cur = ctxt->input->cur - ctxt->input->base;
12314
12315 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12316
12317 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12318 ctxt->input->cur = ctxt->input->base + cur;
12319 ctxt->input->end =
12320 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12321 use];
12322#ifdef DEBUG_PUSH
12323 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12324#endif
12325 }
12326
12327 if (encoding != NULL) {
12328 xmlCharEncodingHandlerPtr hdlr;
12329
12330 hdlr = xmlFindCharEncodingHandler(encoding);
12331 if (hdlr != NULL) {
12332 xmlSwitchToEncoding(ctxt, hdlr);
12333 } else {
12334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12335 "Unsupported encoding %s\n", BAD_CAST encoding);
12336 }
12337 } else if (enc != XML_CHAR_ENCODING_NONE) {
12338 xmlSwitchEncoding(ctxt, enc);
12339 }
12340
12341 return(0);
12342}
12343
12344/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345 * xmlCtxtUseOptions:
12346 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012347 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012348 *
12349 * Applies the options to the parser context
12350 *
12351 * Returns 0 in case of success, the set of unknown or unimplemented options
12352 * in case of error.
12353 */
12354int
12355xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12356{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012357 if (ctxt == NULL)
12358 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012359 if (options & XML_PARSE_RECOVER) {
12360 ctxt->recovery = 1;
12361 options -= XML_PARSE_RECOVER;
12362 } else
12363 ctxt->recovery = 0;
12364 if (options & XML_PARSE_DTDLOAD) {
12365 ctxt->loadsubset = XML_DETECT_IDS;
12366 options -= XML_PARSE_DTDLOAD;
12367 } else
12368 ctxt->loadsubset = 0;
12369 if (options & XML_PARSE_DTDATTR) {
12370 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12371 options -= XML_PARSE_DTDATTR;
12372 }
12373 if (options & XML_PARSE_NOENT) {
12374 ctxt->replaceEntities = 1;
12375 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12376 options -= XML_PARSE_NOENT;
12377 } else
12378 ctxt->replaceEntities = 0;
12379 if (options & XML_PARSE_NOWARNING) {
12380 ctxt->sax->warning = NULL;
12381 options -= XML_PARSE_NOWARNING;
12382 }
12383 if (options & XML_PARSE_NOERROR) {
12384 ctxt->sax->error = NULL;
12385 ctxt->sax->fatalError = NULL;
12386 options -= XML_PARSE_NOERROR;
12387 }
12388 if (options & XML_PARSE_PEDANTIC) {
12389 ctxt->pedantic = 1;
12390 options -= XML_PARSE_PEDANTIC;
12391 } else
12392 ctxt->pedantic = 0;
12393 if (options & XML_PARSE_NOBLANKS) {
12394 ctxt->keepBlanks = 0;
12395 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12396 options -= XML_PARSE_NOBLANKS;
12397 } else
12398 ctxt->keepBlanks = 1;
12399 if (options & XML_PARSE_DTDVALID) {
12400 ctxt->validate = 1;
12401 if (options & XML_PARSE_NOWARNING)
12402 ctxt->vctxt.warning = NULL;
12403 if (options & XML_PARSE_NOERROR)
12404 ctxt->vctxt.error = NULL;
12405 options -= XML_PARSE_DTDVALID;
12406 } else
12407 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012408#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012409 if (options & XML_PARSE_SAX1) {
12410 ctxt->sax->startElement = xmlSAX2StartElement;
12411 ctxt->sax->endElement = xmlSAX2EndElement;
12412 ctxt->sax->startElementNs = NULL;
12413 ctxt->sax->endElementNs = NULL;
12414 ctxt->sax->initialized = 1;
12415 options -= XML_PARSE_SAX1;
12416 }
Daniel Veillard81273902003-09-30 00:43:48 +000012417#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012418 if (options & XML_PARSE_NODICT) {
12419 ctxt->dictNames = 0;
12420 options -= XML_PARSE_NODICT;
12421 } else {
12422 ctxt->dictNames = 1;
12423 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012424 if (options & XML_PARSE_NOCDATA) {
12425 ctxt->sax->cdataBlock = NULL;
12426 options -= XML_PARSE_NOCDATA;
12427 }
12428 if (options & XML_PARSE_NSCLEAN) {
12429 ctxt->options |= XML_PARSE_NSCLEAN;
12430 options -= XML_PARSE_NSCLEAN;
12431 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012432 if (options & XML_PARSE_NONET) {
12433 ctxt->options |= XML_PARSE_NONET;
12434 options -= XML_PARSE_NONET;
12435 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012436 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012437 return (options);
12438}
12439
12440/**
12441 * xmlDoRead:
12442 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012443 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012444 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012445 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012446 * @reuse: keep the context for reuse
12447 *
12448 * Common front-end for the xmlRead functions
12449 *
12450 * Returns the resulting document tree or NULL
12451 */
12452static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012453xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12454 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012455{
12456 xmlDocPtr ret;
12457
12458 xmlCtxtUseOptions(ctxt, options);
12459 if (encoding != NULL) {
12460 xmlCharEncodingHandlerPtr hdlr;
12461
12462 hdlr = xmlFindCharEncodingHandler(encoding);
12463 if (hdlr != NULL)
12464 xmlSwitchToEncoding(ctxt, hdlr);
12465 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012466 if ((URL != NULL) && (ctxt->input != NULL) &&
12467 (ctxt->input->filename == NULL))
12468 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012469 xmlParseDocument(ctxt);
12470 if ((ctxt->wellFormed) || ctxt->recovery)
12471 ret = ctxt->myDoc;
12472 else {
12473 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012474 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012475 xmlFreeDoc(ctxt->myDoc);
12476 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012477 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012478 ctxt->myDoc = NULL;
12479 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012480 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012481 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012482
12483 return (ret);
12484}
12485
12486/**
12487 * xmlReadDoc:
12488 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012489 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012490 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012491 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012492 *
12493 * parse an XML in-memory document and build a tree.
12494 *
12495 * Returns the resulting document tree
12496 */
12497xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012498xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012499{
12500 xmlParserCtxtPtr ctxt;
12501
12502 if (cur == NULL)
12503 return (NULL);
12504
12505 ctxt = xmlCreateDocParserCtxt(cur);
12506 if (ctxt == NULL)
12507 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012508 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012509}
12510
12511/**
12512 * xmlReadFile:
12513 * @filename: a file or URL
12514 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012515 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012516 *
12517 * parse an XML file from the filesystem or the network.
12518 *
12519 * Returns the resulting document tree
12520 */
12521xmlDocPtr
12522xmlReadFile(const char *filename, const char *encoding, int options)
12523{
12524 xmlParserCtxtPtr ctxt;
12525
Daniel Veillard61b93382003-11-03 14:28:31 +000012526 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012527 if (ctxt == NULL)
12528 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012529 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012530}
12531
12532/**
12533 * xmlReadMemory:
12534 * @buffer: a pointer to a char array
12535 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012536 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012537 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012538 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012539 *
12540 * parse an XML in-memory document and build a tree.
12541 *
12542 * Returns the resulting document tree
12543 */
12544xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012545xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012546{
12547 xmlParserCtxtPtr ctxt;
12548
12549 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12550 if (ctxt == NULL)
12551 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012552 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012553}
12554
12555/**
12556 * xmlReadFd:
12557 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012558 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012559 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012560 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561 *
12562 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012563 * NOTE that the file descriptor will not be closed when the
12564 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012565 *
12566 * Returns the resulting document tree
12567 */
12568xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012569xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012570{
12571 xmlParserCtxtPtr ctxt;
12572 xmlParserInputBufferPtr input;
12573 xmlParserInputPtr stream;
12574
12575 if (fd < 0)
12576 return (NULL);
12577
12578 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12579 if (input == NULL)
12580 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012581 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012582 ctxt = xmlNewParserCtxt();
12583 if (ctxt == NULL) {
12584 xmlFreeParserInputBuffer(input);
12585 return (NULL);
12586 }
12587 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12588 if (stream == NULL) {
12589 xmlFreeParserInputBuffer(input);
12590 xmlFreeParserCtxt(ctxt);
12591 return (NULL);
12592 }
12593 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012594 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012595}
12596
12597/**
12598 * xmlReadIO:
12599 * @ioread: an I/O read function
12600 * @ioclose: an I/O close function
12601 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012602 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012603 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012604 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605 *
12606 * parse an XML document from I/O functions and source and build a tree.
12607 *
12608 * Returns the resulting document tree
12609 */
12610xmlDocPtr
12611xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012612 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012613{
12614 xmlParserCtxtPtr ctxt;
12615 xmlParserInputBufferPtr input;
12616 xmlParserInputPtr stream;
12617
12618 if (ioread == NULL)
12619 return (NULL);
12620
12621 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12622 XML_CHAR_ENCODING_NONE);
12623 if (input == NULL)
12624 return (NULL);
12625 ctxt = xmlNewParserCtxt();
12626 if (ctxt == NULL) {
12627 xmlFreeParserInputBuffer(input);
12628 return (NULL);
12629 }
12630 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12631 if (stream == NULL) {
12632 xmlFreeParserInputBuffer(input);
12633 xmlFreeParserCtxt(ctxt);
12634 return (NULL);
12635 }
12636 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012637 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012638}
12639
12640/**
12641 * xmlCtxtReadDoc:
12642 * @ctxt: an XML parser context
12643 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012644 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012645 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012646 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012647 *
12648 * parse an XML in-memory document and build a tree.
12649 * This reuses the existing @ctxt parser context
12650 *
12651 * Returns the resulting document tree
12652 */
12653xmlDocPtr
12654xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012655 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012656{
12657 xmlParserInputPtr stream;
12658
12659 if (cur == NULL)
12660 return (NULL);
12661 if (ctxt == NULL)
12662 return (NULL);
12663
12664 xmlCtxtReset(ctxt);
12665
12666 stream = xmlNewStringInputStream(ctxt, cur);
12667 if (stream == NULL) {
12668 return (NULL);
12669 }
12670 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012671 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012672}
12673
12674/**
12675 * xmlCtxtReadFile:
12676 * @ctxt: an XML parser context
12677 * @filename: a file or URL
12678 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012679 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012680 *
12681 * parse an XML file from the filesystem or the network.
12682 * This reuses the existing @ctxt parser context
12683 *
12684 * Returns the resulting document tree
12685 */
12686xmlDocPtr
12687xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12688 const char *encoding, int options)
12689{
12690 xmlParserInputPtr stream;
12691
12692 if (filename == NULL)
12693 return (NULL);
12694 if (ctxt == NULL)
12695 return (NULL);
12696
12697 xmlCtxtReset(ctxt);
12698
12699 stream = xmlNewInputFromFile(ctxt, filename);
12700 if (stream == NULL) {
12701 return (NULL);
12702 }
12703 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012704 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012705}
12706
12707/**
12708 * xmlCtxtReadMemory:
12709 * @ctxt: an XML parser context
12710 * @buffer: a pointer to a char array
12711 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012712 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012713 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012714 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012715 *
12716 * parse an XML in-memory document and build a tree.
12717 * This reuses the existing @ctxt parser context
12718 *
12719 * Returns the resulting document tree
12720 */
12721xmlDocPtr
12722xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012723 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012724{
12725 xmlParserInputBufferPtr input;
12726 xmlParserInputPtr stream;
12727
12728 if (ctxt == NULL)
12729 return (NULL);
12730 if (buffer == NULL)
12731 return (NULL);
12732
12733 xmlCtxtReset(ctxt);
12734
12735 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12736 if (input == NULL) {
12737 return(NULL);
12738 }
12739
12740 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12741 if (stream == NULL) {
12742 xmlFreeParserInputBuffer(input);
12743 return(NULL);
12744 }
12745
12746 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012747 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012748}
12749
12750/**
12751 * xmlCtxtReadFd:
12752 * @ctxt: an XML parser context
12753 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012754 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012755 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012756 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012757 *
12758 * parse an XML from a file descriptor and build a tree.
12759 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012760 * NOTE that the file descriptor will not be closed when the
12761 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012762 *
12763 * Returns the resulting document tree
12764 */
12765xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012766xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12767 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012768{
12769 xmlParserInputBufferPtr input;
12770 xmlParserInputPtr stream;
12771
12772 if (fd < 0)
12773 return (NULL);
12774 if (ctxt == NULL)
12775 return (NULL);
12776
12777 xmlCtxtReset(ctxt);
12778
12779
12780 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12781 if (input == NULL)
12782 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012783 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012784 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12785 if (stream == NULL) {
12786 xmlFreeParserInputBuffer(input);
12787 return (NULL);
12788 }
12789 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012790 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012791}
12792
12793/**
12794 * xmlCtxtReadIO:
12795 * @ctxt: an XML parser context
12796 * @ioread: an I/O read function
12797 * @ioclose: an I/O close function
12798 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012799 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012800 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012801 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012802 *
12803 * parse an XML document from I/O functions and source and build a tree.
12804 * This reuses the existing @ctxt parser context
12805 *
12806 * Returns the resulting document tree
12807 */
12808xmlDocPtr
12809xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12810 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012811 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012812 const char *encoding, int options)
12813{
12814 xmlParserInputBufferPtr input;
12815 xmlParserInputPtr stream;
12816
12817 if (ioread == NULL)
12818 return (NULL);
12819 if (ctxt == NULL)
12820 return (NULL);
12821
12822 xmlCtxtReset(ctxt);
12823
12824 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12825 XML_CHAR_ENCODING_NONE);
12826 if (input == NULL)
12827 return (NULL);
12828 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12829 if (stream == NULL) {
12830 xmlFreeParserInputBuffer(input);
12831 return (NULL);
12832 }
12833 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012834 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012835}