blob: ad398e386c76ff0f0685f79f09663dbf3043ba67 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
429 * Handle a warning.
430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
857 xmlRealloc(ctxt->nsTab,
858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
1847 buffer##_size *= 2; \
1848 buffer = (xmlChar *) \
1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001850 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001851}
1852
1853/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001854 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * @ctxt: the parser context
1856 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001857 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1859 * @end: an end marker xmlChar, 0 if none
1860 * @end2: an end marker xmlChar, 0 if none
1861 * @end3: an end marker xmlChar, 0 if none
1862 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001863 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001864 *
1865 * [67] Reference ::= EntityRef | CharRef
1866 *
1867 * [69] PEReference ::= '%' Name ';'
1868 *
1869 * Returns A newly allocated string with the substitution done. The caller
1870 * must deallocate it !
1871 */
1872xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1874 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 xmlChar *buffer = NULL;
1876 int buffer_size = 0;
1877
1878 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlEntityPtr ent;
1881 int c,l;
1882 int nbchars = 0;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001885 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001887
1888 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 return(NULL);
1891 }
1892
1893 /*
1894 * allocate a translation buffer.
1895 */
1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001898 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 * we are operating on already parsed values.
1903 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 if (str < last)
1905 c = CUR_SCHAR(str, l);
1906 else
1907 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001908 while ((c != 0) && (c != end) && /* non input consuming loop */
1909 (c != end2) && (c != end3)) {
1910
1911 if (c == 0) break;
1912 if ((c == '&') && (str[1] == '#')) {
1913 int val = xmlParseStringCharRef(ctxt, &str);
1914 if (val != 0) {
1915 COPY_BUF(0,buffer,nbchars,val);
1916 }
1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext,
1920 "String decoding Entity Reference: %.30s\n",
1921 str);
1922 ent = xmlParseStringEntityRef(ctxt, &str);
1923 if ((ent != NULL) &&
1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1925 if (ent->content != NULL) {
1926 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1927 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1929 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931 } else if ((ent != NULL) && (ent->content != NULL)) {
1932 xmlChar *rep;
1933
1934 ctxt->depth++;
1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1936 0, 0, 0);
1937 ctxt->depth--;
1938 if (rep != NULL) {
1939 current = rep;
1940 while (*current != 0) { /* non input consuming loop */
1941 buffer[nbchars++] = *current++;
1942 if (nbchars >
1943 buffer_size - XML_PARSER_BUFFER_SIZE) {
1944 growBuffer(buffer);
1945 }
1946 }
1947 xmlFree(rep);
1948 }
1949 } else if (ent != NULL) {
1950 int i = xmlStrlen(ent->name);
1951 const xmlChar *cur = ent->name;
1952
1953 buffer[nbchars++] = '&';
1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1955 growBuffer(buffer);
1956 }
1957 for (;i > 0;i--)
1958 buffer[nbchars++] = *cur++;
1959 buffer[nbchars++] = ';';
1960 }
1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "String decoding PE Reference: %.30s\n", str);
1965 ent = xmlParseStringPEReference(ctxt, &str);
1966 if (ent != NULL) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 }
1985 } else {
1986 COPY_BUF(l,buffer,nbchars,c);
1987 str += l;
1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001992 if (str < last)
1993 c = CUR_SCHAR(str, l);
1994 else
1995 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 }
1997 buffer[nbchars++] = 0;
1998 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001999
2000mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002002 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003}
2004
Daniel Veillarde57ec792003-09-10 10:50:59 +00002005/**
2006 * xmlStringDecodeEntities:
2007 * @ctxt: the parser context
2008 * @str: the input string
2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2010 * @end: an end marker xmlChar, 0 if none
2011 * @end2: an end marker xmlChar, 0 if none
2012 * @end3: an end marker xmlChar, 0 if none
2013 *
2014 * Takes a entity string content and process to do the adequate substitutions.
2015 *
2016 * [67] Reference ::= EntityRef | CharRef
2017 *
2018 * [69] PEReference ::= '%' Name ';'
2019 *
2020 * Returns A newly allocated string with the substitution done. The caller
2021 * must deallocate it !
2022 */
2023xmlChar *
2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2025 xmlChar end, xmlChar end2, xmlChar end3) {
2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2027 end, end2, end3));
2028}
Owen Taylor3473f882001-02-23 17:55:21 +00002029
2030/************************************************************************
2031 * *
2032 * Commodity functions to handle xmlChars *
2033 * *
2034 ************************************************************************/
2035
2036/**
2037 * xmlStrndup:
2038 * @cur: the input xmlChar *
2039 * @len: the len of @cur
2040 *
2041 * a strndup for array of xmlChar's
2042 *
2043 * Returns a new xmlChar * or NULL
2044 */
2045xmlChar *
2046xmlStrndup(const xmlChar *cur, int len) {
2047 xmlChar *ret;
2048
2049 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002050 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002051 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002052 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002053 return(NULL);
2054 }
2055 memcpy(ret, cur, len * sizeof(xmlChar));
2056 ret[len] = 0;
2057 return(ret);
2058}
2059
2060/**
2061 * xmlStrdup:
2062 * @cur: the input xmlChar *
2063 *
2064 * a strdup for array of xmlChar's. Since they are supposed to be
2065 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2066 * a termination mark of '0'.
2067 *
2068 * Returns a new xmlChar * or NULL
2069 */
2070xmlChar *
2071xmlStrdup(const xmlChar *cur) {
2072 const xmlChar *p = cur;
2073
2074 if (cur == NULL) return(NULL);
2075 while (*p != 0) p++; /* non input consuming */
2076 return(xmlStrndup(cur, p - cur));
2077}
2078
2079/**
2080 * xmlCharStrndup:
2081 * @cur: the input char *
2082 * @len: the len of @cur
2083 *
2084 * a strndup for char's to xmlChar's
2085 *
2086 * Returns a new xmlChar * or NULL
2087 */
2088
2089xmlChar *
2090xmlCharStrndup(const char *cur, int len) {
2091 int i;
2092 xmlChar *ret;
2093
2094 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002095 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002096 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002097 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002098 return(NULL);
2099 }
2100 for (i = 0;i < len;i++)
2101 ret[i] = (xmlChar) cur[i];
2102 ret[len] = 0;
2103 return(ret);
2104}
2105
2106/**
2107 * xmlCharStrdup:
2108 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002109 *
2110 * a strdup for char's to xmlChar's
2111 *
2112 * Returns a new xmlChar * or NULL
2113 */
2114
2115xmlChar *
2116xmlCharStrdup(const char *cur) {
2117 const char *p = cur;
2118
2119 if (cur == NULL) return(NULL);
2120 while (*p != '\0') p++; /* non input consuming */
2121 return(xmlCharStrndup(cur, p - cur));
2122}
2123
2124/**
2125 * xmlStrcmp:
2126 * @str1: the first xmlChar *
2127 * @str2: the second xmlChar *
2128 *
2129 * a strcmp for xmlChar's
2130 *
2131 * Returns the integer result of the comparison
2132 */
2133
2134int
2135xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2136 register int tmp;
2137
2138 if (str1 == str2) return(0);
2139 if (str1 == NULL) return(-1);
2140 if (str2 == NULL) return(1);
2141 do {
2142 tmp = *str1++ - *str2;
2143 if (tmp != 0) return(tmp);
2144 } while (*str2++ != 0);
2145 return 0;
2146}
2147
2148/**
2149 * xmlStrEqual:
2150 * @str1: the first xmlChar *
2151 * @str2: the second xmlChar *
2152 *
2153 * Check if both string are equal of have same content
2154 * Should be a bit more readable and faster than xmlStrEqual()
2155 *
2156 * Returns 1 if they are equal, 0 if they are different
2157 */
2158
2159int
2160xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2161 if (str1 == str2) return(1);
2162 if (str1 == NULL) return(0);
2163 if (str2 == NULL) return(0);
2164 do {
2165 if (*str1++ != *str2) return(0);
2166 } while (*str2++);
2167 return(1);
2168}
2169
2170/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002171 * xmlStrQEqual:
2172 * @pref: the prefix of the QName
2173 * @name: the localname of the QName
2174 * @str: the second xmlChar *
2175 *
2176 * Check if a QName is Equal to a given string
2177 *
2178 * Returns 1 if they are equal, 0 if they are different
2179 */
2180
2181int
2182xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2183 if (pref == NULL) return(xmlStrEqual(name, str));
2184 if (name == NULL) return(0);
2185 if (str == NULL) return(0);
2186
2187 do {
2188 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002189 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002190 if (*str++ != ':') return(0);
2191 do {
2192 if (*name++ != *str) return(0);
2193 } while (*str++);
2194 return(1);
2195}
2196
2197/**
Owen Taylor3473f882001-02-23 17:55:21 +00002198 * xmlStrncmp:
2199 * @str1: the first xmlChar *
2200 * @str2: the second xmlChar *
2201 * @len: the max comparison length
2202 *
2203 * a strncmp for xmlChar's
2204 *
2205 * Returns the integer result of the comparison
2206 */
2207
2208int
2209xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2210 register int tmp;
2211
2212 if (len <= 0) return(0);
2213 if (str1 == str2) return(0);
2214 if (str1 == NULL) return(-1);
2215 if (str2 == NULL) return(1);
2216 do {
2217 tmp = *str1++ - *str2;
2218 if (tmp != 0 || --len == 0) return(tmp);
2219 } while (*str2++ != 0);
2220 return 0;
2221}
2222
Daniel Veillardb44025c2001-10-11 22:55:55 +00002223static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002224 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2225 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2226 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2227 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2228 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2229 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2230 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2231 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2232 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2233 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2234 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2235 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2236 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2237 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2238 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2239 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2240 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2241 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2242 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2243 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2244 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2245 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2246 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2247 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2248 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2249 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2250 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2251 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2252 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2253 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2254 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2255 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2256};
2257
2258/**
2259 * xmlStrcasecmp:
2260 * @str1: the first xmlChar *
2261 * @str2: the second xmlChar *
2262 *
2263 * a strcasecmp for xmlChar's
2264 *
2265 * Returns the integer result of the comparison
2266 */
2267
2268int
2269xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2270 register int tmp;
2271
2272 if (str1 == str2) return(0);
2273 if (str1 == NULL) return(-1);
2274 if (str2 == NULL) return(1);
2275 do {
2276 tmp = casemap[*str1++] - casemap[*str2];
2277 if (tmp != 0) return(tmp);
2278 } while (*str2++ != 0);
2279 return 0;
2280}
2281
2282/**
2283 * xmlStrncasecmp:
2284 * @str1: the first xmlChar *
2285 * @str2: the second xmlChar *
2286 * @len: the max comparison length
2287 *
2288 * a strncasecmp for xmlChar's
2289 *
2290 * Returns the integer result of the comparison
2291 */
2292
2293int
2294xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2295 register int tmp;
2296
2297 if (len <= 0) return(0);
2298 if (str1 == str2) return(0);
2299 if (str1 == NULL) return(-1);
2300 if (str2 == NULL) return(1);
2301 do {
2302 tmp = casemap[*str1++] - casemap[*str2];
2303 if (tmp != 0 || --len == 0) return(tmp);
2304 } while (*str2++ != 0);
2305 return 0;
2306}
2307
2308/**
2309 * xmlStrchr:
2310 * @str: the xmlChar * array
2311 * @val: the xmlChar to search
2312 *
2313 * a strchr for xmlChar's
2314 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002315 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002316 */
2317
2318const xmlChar *
2319xmlStrchr(const xmlChar *str, xmlChar val) {
2320 if (str == NULL) return(NULL);
2321 while (*str != 0) { /* non input consuming */
2322 if (*str == val) return((xmlChar *) str);
2323 str++;
2324 }
2325 return(NULL);
2326}
2327
2328/**
2329 * xmlStrstr:
2330 * @str: the xmlChar * array (haystack)
2331 * @val: the xmlChar to search (needle)
2332 *
2333 * a strstr for xmlChar's
2334 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002335 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002336 */
2337
2338const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002339xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002340 int n;
2341
2342 if (str == NULL) return(NULL);
2343 if (val == NULL) return(NULL);
2344 n = xmlStrlen(val);
2345
2346 if (n == 0) return(str);
2347 while (*str != 0) { /* non input consuming */
2348 if (*str == *val) {
2349 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2350 }
2351 str++;
2352 }
2353 return(NULL);
2354}
2355
2356/**
2357 * xmlStrcasestr:
2358 * @str: the xmlChar * array (haystack)
2359 * @val: the xmlChar to search (needle)
2360 *
2361 * a case-ignoring strstr for xmlChar's
2362 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002363 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002364 */
2365
2366const xmlChar *
2367xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2368 int n;
2369
2370 if (str == NULL) return(NULL);
2371 if (val == NULL) return(NULL);
2372 n = xmlStrlen(val);
2373
2374 if (n == 0) return(str);
2375 while (*str != 0) { /* non input consuming */
2376 if (casemap[*str] == casemap[*val])
2377 if (!xmlStrncasecmp(str, val, n)) return(str);
2378 str++;
2379 }
2380 return(NULL);
2381}
2382
2383/**
2384 * xmlStrsub:
2385 * @str: the xmlChar * array (haystack)
2386 * @start: the index of the first char (zero based)
2387 * @len: the length of the substring
2388 *
2389 * Extract a substring of a given string
2390 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002391 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002392 */
2393
2394xmlChar *
2395xmlStrsub(const xmlChar *str, int start, int len) {
2396 int i;
2397
2398 if (str == NULL) return(NULL);
2399 if (start < 0) return(NULL);
2400 if (len < 0) return(NULL);
2401
2402 for (i = 0;i < start;i++) {
2403 if (*str == 0) return(NULL);
2404 str++;
2405 }
2406 if (*str == 0) return(NULL);
2407 return(xmlStrndup(str, len));
2408}
2409
2410/**
2411 * xmlStrlen:
2412 * @str: the xmlChar * array
2413 *
2414 * length of a xmlChar's string
2415 *
2416 * Returns the number of xmlChar contained in the ARRAY.
2417 */
2418
2419int
2420xmlStrlen(const xmlChar *str) {
2421 int len = 0;
2422
2423 if (str == NULL) return(0);
2424 while (*str != 0) { /* non input consuming */
2425 str++;
2426 len++;
2427 }
2428 return(len);
2429}
2430
2431/**
2432 * xmlStrncat:
2433 * @cur: the original xmlChar * array
2434 * @add: the xmlChar * array added
2435 * @len: the length of @add
2436 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002437 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002438 * first bytes of @add.
2439 *
2440 * Returns a new xmlChar *, the original @cur is reallocated if needed
2441 * and should not be freed
2442 */
2443
2444xmlChar *
2445xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2446 int size;
2447 xmlChar *ret;
2448
2449 if ((add == NULL) || (len == 0))
2450 return(cur);
2451 if (cur == NULL)
2452 return(xmlStrndup(add, len));
2453
2454 size = xmlStrlen(cur);
2455 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2456 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002457 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002458 return(cur);
2459 }
2460 memcpy(&ret[size], add, len * sizeof(xmlChar));
2461 ret[size + len] = 0;
2462 return(ret);
2463}
2464
2465/**
2466 * xmlStrcat:
2467 * @cur: the original xmlChar * array
2468 * @add: the xmlChar * array added
2469 *
2470 * a strcat for array of xmlChar's. Since they are supposed to be
2471 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2472 * a termination mark of '0'.
2473 *
2474 * Returns a new xmlChar * containing the concatenated string.
2475 */
2476xmlChar *
2477xmlStrcat(xmlChar *cur, const xmlChar *add) {
2478 const xmlChar *p = add;
2479
2480 if (add == NULL) return(cur);
2481 if (cur == NULL)
2482 return(xmlStrdup(add));
2483
2484 while (*p != 0) p++; /* non input consuming */
2485 return(xmlStrncat(cur, add, p - add));
2486}
2487
Aleksey Sanine7acf432003-10-02 20:05:27 +00002488/**
2489 * xmlStrPrintf:
2490 * @buf: the result buffer.
2491 * @len: the result buffer length.
2492 * @msg: the message with printf formatting.
2493 * @...: extra parameters for the message.
2494 *
2495 * Formats @msg and places result into @buf.
2496 *
2497 * Returns the number of characters written to @buf or -1 if an error occurs.
2498 */
2499int
2500xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2501 va_list args;
2502 int ret;
2503
2504 if((buf == NULL) || (msg == NULL)) {
2505 return(-1);
2506 }
2507
2508 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002509 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002510 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002511 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002512
2513 return(ret);
2514}
2515
Aleksey Saninb5a46da2003-10-29 15:51:17 +00002516/**
2517 * xmlStrVPrintf:
2518 * @buf: the result buffer.
2519 * @len: the result buffer length.
2520 * @msg: the message with printf formatting.
2521 * @ap: extra parameters for the message.
2522 *
2523 * Formats @msg and places result into @buf.
2524 *
2525 * Returns the number of characters written to @buf or -1 if an error occurs.
2526 */
2527int
2528xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
2529 int ret;
2530
2531 if((buf == NULL) || (msg == NULL)) {
2532 return(-1);
2533 }
2534
2535 ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
2536 buf[len - 1] = 0; /* be safe ! */
2537
2538 return(ret);
2539}
Owen Taylor3473f882001-02-23 17:55:21 +00002540/************************************************************************
2541 * *
2542 * Commodity functions, cleanup needed ? *
2543 * *
2544 ************************************************************************/
2545
2546/**
2547 * areBlanks:
2548 * @ctxt: an XML parser context
2549 * @str: a xmlChar *
2550 * @len: the size of @str
2551 *
2552 * Is this a sequence of blank chars that one can ignore ?
2553 *
2554 * Returns 1 if ignorable 0 otherwise.
2555 */
2556
2557static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2558 int i, ret;
2559 xmlNodePtr lastChild;
2560
Daniel Veillard05c13a22001-09-09 08:38:09 +00002561 /*
2562 * Don't spend time trying to differentiate them, the same callback is
2563 * used !
2564 */
2565 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002566 return(0);
2567
Owen Taylor3473f882001-02-23 17:55:21 +00002568 /*
2569 * Check for xml:space value.
2570 */
2571 if (*(ctxt->space) == 1)
2572 return(0);
2573
2574 /*
2575 * Check that the string is made of blanks
2576 */
2577 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002578 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002579
2580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002581 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002582 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002583 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ctxt->myDoc != NULL) {
2585 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2586 if (ret == 0) return(1);
2587 if (ret == 1) return(0);
2588 }
2589
2590 /*
2591 * Otherwise, heuristic :-\
2592 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002593 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002594 if ((ctxt->node->children == NULL) &&
2595 (RAW == '<') && (NXT(1) == '/')) return(0);
2596
2597 lastChild = xmlGetLastChild(ctxt->node);
2598 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002599 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2600 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 } else if (xmlNodeIsText(lastChild))
2602 return(0);
2603 else if ((ctxt->node->children != NULL) &&
2604 (xmlNodeIsText(ctxt->node->children)))
2605 return(0);
2606 return(1);
2607}
2608
Owen Taylor3473f882001-02-23 17:55:21 +00002609/************************************************************************
2610 * *
2611 * Extra stuff for namespace support *
2612 * Relates to http://www.w3.org/TR/WD-xml-names *
2613 * *
2614 ************************************************************************/
2615
2616/**
2617 * xmlSplitQName:
2618 * @ctxt: an XML parser context
2619 * @name: an XML parser context
2620 * @prefix: a xmlChar **
2621 *
2622 * parse an UTF8 encoded XML qualified name string
2623 *
2624 * [NS 5] QName ::= (Prefix ':')? LocalPart
2625 *
2626 * [NS 6] Prefix ::= NCName
2627 *
2628 * [NS 7] LocalPart ::= NCName
2629 *
2630 * Returns the local part, and prefix is updated
2631 * to get the Prefix if any.
2632 */
2633
2634xmlChar *
2635xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2636 xmlChar buf[XML_MAX_NAMELEN + 5];
2637 xmlChar *buffer = NULL;
2638 int len = 0;
2639 int max = XML_MAX_NAMELEN;
2640 xmlChar *ret = NULL;
2641 const xmlChar *cur = name;
2642 int c;
2643
2644 *prefix = NULL;
2645
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002646 if (cur == NULL) return(NULL);
2647
Owen Taylor3473f882001-02-23 17:55:21 +00002648#ifndef XML_XML_NAMESPACE
2649 /* xml: prefix is not really a namespace */
2650 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2651 (cur[2] == 'l') && (cur[3] == ':'))
2652 return(xmlStrdup(name));
2653#endif
2654
Daniel Veillard597bc482003-07-24 16:08:28 +00002655 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002656 if (cur[0] == ':')
2657 return(xmlStrdup(name));
2658
2659 c = *cur++;
2660 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2661 buf[len++] = c;
2662 c = *cur++;
2663 }
2664 if (len >= max) {
2665 /*
2666 * Okay someone managed to make a huge name, so he's ready to pay
2667 * for the processing speed.
2668 */
2669 max = len * 2;
2670
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676 memcpy(buffer, buf, len);
2677 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2678 if (len + 10 > max) {
2679 max *= 2;
2680 buffer = (xmlChar *) xmlRealloc(buffer,
2681 max * sizeof(xmlChar));
2682 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002684 return(NULL);
2685 }
2686 }
2687 buffer[len++] = c;
2688 c = *cur++;
2689 }
2690 buffer[len] = 0;
2691 }
2692
Daniel Veillard597bc482003-07-24 16:08:28 +00002693 /* nasty but well=formed
2694 if ((c == ':') && (*cur == 0)) {
2695 return(xmlStrdup(name));
2696 } */
2697
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if (buffer == NULL)
2699 ret = xmlStrndup(buf, len);
2700 else {
2701 ret = buffer;
2702 buffer = NULL;
2703 max = XML_MAX_NAMELEN;
2704 }
2705
2706
2707 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002708 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002709 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002710 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002711 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002712 }
Owen Taylor3473f882001-02-23 17:55:21 +00002713 len = 0;
2714
Daniel Veillardbb284f42002-10-16 18:02:47 +00002715 /*
2716 * Check that the first character is proper to start
2717 * a new name
2718 */
2719 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2720 ((c >= 0x41) && (c <= 0x5A)) ||
2721 (c == '_') || (c == ':'))) {
2722 int l;
2723 int first = CUR_SCHAR(cur, l);
2724
2725 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002726 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002727 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002728 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002729 }
2730 }
2731 cur++;
2732
Owen Taylor3473f882001-02-23 17:55:21 +00002733 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2734 buf[len++] = c;
2735 c = *cur++;
2736 }
2737 if (len >= max) {
2738 /*
2739 * Okay someone managed to make a huge name, so he's ready to pay
2740 * for the processing speed.
2741 */
2742 max = len * 2;
2743
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002744 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002745 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 return(NULL);
2748 }
2749 memcpy(buffer, buf, len);
2750 while (c != 0) { /* tested bigname2.xml */
2751 if (len + 10 > max) {
2752 max *= 2;
2753 buffer = (xmlChar *) xmlRealloc(buffer,
2754 max * sizeof(xmlChar));
2755 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002756 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002757 return(NULL);
2758 }
2759 }
2760 buffer[len++] = c;
2761 c = *cur++;
2762 }
2763 buffer[len] = 0;
2764 }
2765
2766 if (buffer == NULL)
2767 ret = xmlStrndup(buf, len);
2768 else {
2769 ret = buffer;
2770 }
2771 }
2772
2773 return(ret);
2774}
2775
2776/************************************************************************
2777 * *
2778 * The parser itself *
2779 * Relates to http://www.w3.org/TR/REC-xml *
2780 * *
2781 ************************************************************************/
2782
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002783static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002784static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002786
Owen Taylor3473f882001-02-23 17:55:21 +00002787/**
2788 * xmlParseName:
2789 * @ctxt: an XML parser context
2790 *
2791 * parse an XML name.
2792 *
2793 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2794 * CombiningChar | Extender
2795 *
2796 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2797 *
2798 * [6] Names ::= Name (S Name)*
2799 *
2800 * Returns the Name parsed or NULL
2801 */
2802
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002803const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002804xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002805 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002806 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002807 int count = 0;
2808
2809 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002810
2811 /*
2812 * Accelerator for simple ASCII names
2813 */
2814 in = ctxt->input->cur;
2815 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2816 ((*in >= 0x41) && (*in <= 0x5A)) ||
2817 (*in == '_') || (*in == ':')) {
2818 in++;
2819 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2820 ((*in >= 0x41) && (*in <= 0x5A)) ||
2821 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002822 (*in == '_') || (*in == '-') ||
2823 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002825 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002826 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002827 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002828 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002829 ctxt->nbChars += count;
2830 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002831 if (ret == NULL)
2832 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 return(ret);
2834 }
2835 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002836 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002837}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002838
Daniel Veillard46de64e2002-05-29 08:21:33 +00002839/**
2840 * xmlParseNameAndCompare:
2841 * @ctxt: an XML parser context
2842 *
2843 * parse an XML name and compares for match
2844 * (specialized for endtag parsing)
2845 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002846 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2847 * and the name for mismatch
2848 */
2849
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002850static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002851xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2852 const xmlChar *cmp = other;
2853 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002854 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002855
2856 GROW;
2857
2858 in = ctxt->input->cur;
2859 while (*in != 0 && *in == *cmp) {
2860 ++in;
2861 ++cmp;
2862 }
William M. Brack76e95df2003-10-18 16:20:14 +00002863 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002864 /* success */
2865 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002866 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002867 }
2868 /* failure (or end of input buffer), check with full function */
2869 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002870 /* strings coming from the dictionnary direct compare possible */
2871 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002872 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002873 }
2874 return ret;
2875}
2876
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002877static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002878xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002879 int len = 0, l;
2880 int c;
2881 int count = 0;
2882
2883 /*
2884 * Handler for more complex cases
2885 */
2886 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002887 c = CUR_CHAR(l);
2888 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2889 (!IS_LETTER(c) && (c != '_') &&
2890 (c != ':'))) {
2891 return(NULL);
2892 }
2893
2894 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002895 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002896 (c == '.') || (c == '-') ||
2897 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002898 (IS_COMBINING(c)) ||
2899 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (count++ > 100) {
2901 count = 0;
2902 GROW;
2903 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002904 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002905 NEXTL(l);
2906 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002907 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002908 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002909}
2910
2911/**
2912 * xmlParseStringName:
2913 * @ctxt: an XML parser context
2914 * @str: a pointer to the string pointer (IN/OUT)
2915 *
2916 * parse an XML name.
2917 *
2918 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2919 * CombiningChar | Extender
2920 *
2921 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2922 *
2923 * [6] Names ::= Name (S Name)*
2924 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002925 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002926 * is updated to the current location in the string.
2927 */
2928
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002929static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002930xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2931 xmlChar buf[XML_MAX_NAMELEN + 5];
2932 const xmlChar *cur = *str;
2933 int len = 0, l;
2934 int c;
2935
2936 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002937 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002938 (c != ':')) {
2939 return(NULL);
2940 }
2941
William M. Brack871611b2003-10-18 04:53:14 +00002942 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (c == '.') || (c == '-') ||
2944 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002945 (IS_COMBINING(c)) ||
2946 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002947 COPY_BUF(l,buf,len,c);
2948 cur += l;
2949 c = CUR_SCHAR(cur, l);
2950 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2951 /*
2952 * Okay someone managed to make a huge name, so he's ready to pay
2953 * for the processing speed.
2954 */
2955 xmlChar *buffer;
2956 int max = len * 2;
2957
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002958 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 return(NULL);
2962 }
2963 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002964 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002965 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002966 (c == '.') || (c == '-') ||
2967 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002968 (IS_COMBINING(c)) ||
2969 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002970 if (len + 10 > max) {
2971 max *= 2;
2972 buffer = (xmlChar *) xmlRealloc(buffer,
2973 max * sizeof(xmlChar));
2974 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002975 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002976 return(NULL);
2977 }
2978 }
2979 COPY_BUF(l,buffer,len,c);
2980 cur += l;
2981 c = CUR_SCHAR(cur, l);
2982 }
2983 buffer[len] = 0;
2984 *str = cur;
2985 return(buffer);
2986 }
2987 }
2988 *str = cur;
2989 return(xmlStrndup(buf, len));
2990}
2991
2992/**
2993 * xmlParseNmtoken:
2994 * @ctxt: an XML parser context
2995 *
2996 * parse an XML Nmtoken.
2997 *
2998 * [7] Nmtoken ::= (NameChar)+
2999 *
3000 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
3001 *
3002 * Returns the Nmtoken parsed or NULL
3003 */
3004
3005xmlChar *
3006xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3007 xmlChar buf[XML_MAX_NAMELEN + 5];
3008 int len = 0, l;
3009 int c;
3010 int count = 0;
3011
3012 GROW;
3013 c = CUR_CHAR(l);
3014
William M. Brack871611b2003-10-18 04:53:14 +00003015 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003016 (c == '.') || (c == '-') ||
3017 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003018 (IS_COMBINING(c)) ||
3019 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003020 if (count++ > 100) {
3021 count = 0;
3022 GROW;
3023 }
3024 COPY_BUF(l,buf,len,c);
3025 NEXTL(l);
3026 c = CUR_CHAR(l);
3027 if (len >= XML_MAX_NAMELEN) {
3028 /*
3029 * Okay someone managed to make a huge token, so he's ready to pay
3030 * for the processing speed.
3031 */
3032 xmlChar *buffer;
3033 int max = len * 2;
3034
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003035 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
3040 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003041 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003042 (c == '.') || (c == '-') ||
3043 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003044 (IS_COMBINING(c)) ||
3045 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (count++ > 100) {
3047 count = 0;
3048 GROW;
3049 }
3050 if (len + 10 > max) {
3051 max *= 2;
3052 buffer = (xmlChar *) xmlRealloc(buffer,
3053 max * sizeof(xmlChar));
3054 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003055 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003056 return(NULL);
3057 }
3058 }
3059 COPY_BUF(l,buffer,len,c);
3060 NEXTL(l);
3061 c = CUR_CHAR(l);
3062 }
3063 buffer[len] = 0;
3064 return(buffer);
3065 }
3066 }
3067 if (len == 0)
3068 return(NULL);
3069 return(xmlStrndup(buf, len));
3070}
3071
3072/**
3073 * xmlParseEntityValue:
3074 * @ctxt: an XML parser context
3075 * @orig: if non-NULL store a copy of the original entity value
3076 *
3077 * parse a value for ENTITY declarations
3078 *
3079 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3080 * "'" ([^%&'] | PEReference | Reference)* "'"
3081 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003082 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003083 */
3084
3085xmlChar *
3086xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3087 xmlChar *buf = NULL;
3088 int len = 0;
3089 int size = XML_PARSER_BUFFER_SIZE;
3090 int c, l;
3091 xmlChar stop;
3092 xmlChar *ret = NULL;
3093 const xmlChar *cur = NULL;
3094 xmlParserInputPtr input;
3095
3096 if (RAW == '"') stop = '"';
3097 else if (RAW == '\'') stop = '\'';
3098 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003099 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 return(NULL);
3101 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003102 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003103 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003105 return(NULL);
3106 }
3107
3108 /*
3109 * The content of the entity definition is copied in a buffer.
3110 */
3111
3112 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3113 input = ctxt->input;
3114 GROW;
3115 NEXT;
3116 c = CUR_CHAR(l);
3117 /*
3118 * NOTE: 4.4.5 Included in Literal
3119 * When a parameter entity reference appears in a literal entity
3120 * value, ... a single or double quote character in the replacement
3121 * text is always treated as a normal data character and will not
3122 * terminate the literal.
3123 * In practice it means we stop the loop only when back at parsing
3124 * the initial entity and the quote is found
3125 */
William M. Brack871611b2003-10-18 04:53:14 +00003126 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003127 (ctxt->input != input))) {
3128 if (len + 5 >= size) {
3129 size *= 2;
3130 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3131 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003132 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 return(NULL);
3134 }
3135 }
3136 COPY_BUF(l,buf,len,c);
3137 NEXTL(l);
3138 /*
3139 * Pop-up of finished entities.
3140 */
3141 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3142 xmlPopInput(ctxt);
3143
3144 GROW;
3145 c = CUR_CHAR(l);
3146 if (c == 0) {
3147 GROW;
3148 c = CUR_CHAR(l);
3149 }
3150 }
3151 buf[len] = 0;
3152
3153 /*
3154 * Raise problem w.r.t. '&' and '%' being used in non-entities
3155 * reference constructs. Note Charref will be handled in
3156 * xmlStringDecodeEntities()
3157 */
3158 cur = buf;
3159 while (*cur != 0) { /* non input consuming */
3160 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3161 xmlChar *name;
3162 xmlChar tmp = *cur;
3163
3164 cur++;
3165 name = xmlParseStringName(ctxt, &cur);
3166 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003167 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003168 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003169 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003170 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003171 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3172 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003173 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003174 }
3175 if (name != NULL)
3176 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003177 if (*cur == 0)
3178 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003179 }
3180 cur++;
3181 }
3182
3183 /*
3184 * Then PEReference entities are substituted.
3185 */
3186 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003187 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003188 xmlFree(buf);
3189 } else {
3190 NEXT;
3191 /*
3192 * NOTE: 4.4.7 Bypassed
3193 * When a general entity reference appears in the EntityValue in
3194 * an entity declaration, it is bypassed and left as is.
3195 * so XML_SUBSTITUTE_REF is not set here.
3196 */
3197 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3198 0, 0, 0);
3199 if (orig != NULL)
3200 *orig = buf;
3201 else
3202 xmlFree(buf);
3203 }
3204
3205 return(ret);
3206}
3207
3208/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003209 * xmlParseAttValueComplex:
3210 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003211 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003212 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003213 *
3214 * parse a value for an attribute, this is the fallback function
3215 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003216 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003220static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003221xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003222 xmlChar limit = 0;
3223 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003224 int len = 0;
3225 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003226 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003227 xmlChar *current = NULL;
3228 xmlEntityPtr ent;
3229
Owen Taylor3473f882001-02-23 17:55:21 +00003230 if (NXT(0) == '"') {
3231 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3232 limit = '"';
3233 NEXT;
3234 } else if (NXT(0) == '\'') {
3235 limit = '\'';
3236 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3237 NEXT;
3238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003239 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003240 return(NULL);
3241 }
3242
3243 /*
3244 * allocate a translation buffer.
3245 */
3246 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003247 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003248 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003249
3250 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003251 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003252 */
3253 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003254 while ((NXT(0) != limit) && /* checked */
3255 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003256 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003257 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003258 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003259 if (NXT(1) == '#') {
3260 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003261
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003263 if (ctxt->replaceEntities) {
3264 if (len > buf_size - 10) {
3265 growBuffer(buf);
3266 }
3267 buf[len++] = '&';
3268 } else {
3269 /*
3270 * The reparsing will be done in xmlStringGetNodeList()
3271 * called by the attribute() function in SAX.c
3272 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003273 if (len > buf_size - 10) {
3274 growBuffer(buf);
3275 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003276 buf[len++] = '&';
3277 buf[len++] = '#';
3278 buf[len++] = '3';
3279 buf[len++] = '8';
3280 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003283 if (len > buf_size - 10) {
3284 growBuffer(buf);
3285 }
Owen Taylor3473f882001-02-23 17:55:21 +00003286 len += xmlCopyChar(0, &buf[len], val);
3287 }
3288 } else {
3289 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003290 if ((ent != NULL) &&
3291 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3292 if (len > buf_size - 10) {
3293 growBuffer(buf);
3294 }
3295 if ((ctxt->replaceEntities == 0) &&
3296 (ent->content[0] == '&')) {
3297 buf[len++] = '&';
3298 buf[len++] = '#';
3299 buf[len++] = '3';
3300 buf[len++] = '8';
3301 buf[len++] = ';';
3302 } else {
3303 buf[len++] = ent->content[0];
3304 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003305 } else if ((ent != NULL) &&
3306 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003307 xmlChar *rep;
3308
3309 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3310 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003311 XML_SUBSTITUTE_REF,
3312 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003313 if (rep != NULL) {
3314 current = rep;
3315 while (*current != 0) { /* non input consuming */
3316 buf[len++] = *current++;
3317 if (len > buf_size - 10) {
3318 growBuffer(buf);
3319 }
3320 }
3321 xmlFree(rep);
3322 }
3323 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003324 if (len > buf_size - 10) {
3325 growBuffer(buf);
3326 }
Owen Taylor3473f882001-02-23 17:55:21 +00003327 if (ent->content != NULL)
3328 buf[len++] = ent->content[0];
3329 }
3330 } else if (ent != NULL) {
3331 int i = xmlStrlen(ent->name);
3332 const xmlChar *cur = ent->name;
3333
3334 /*
3335 * This may look absurd but is needed to detect
3336 * entities problems
3337 */
3338 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3339 (ent->content != NULL)) {
3340 xmlChar *rep;
3341 rep = xmlStringDecodeEntities(ctxt, ent->content,
3342 XML_SUBSTITUTE_REF, 0, 0, 0);
3343 if (rep != NULL)
3344 xmlFree(rep);
3345 }
3346
3347 /*
3348 * Just output the reference
3349 */
3350 buf[len++] = '&';
3351 if (len > buf_size - i - 10) {
3352 growBuffer(buf);
3353 }
3354 for (;i > 0;i--)
3355 buf[len++] = *cur++;
3356 buf[len++] = ';';
3357 }
3358 }
3359 } else {
3360 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003361 if ((len != 0) || (!normalize)) {
3362 if ((!normalize) || (!in_space)) {
3363 COPY_BUF(l,buf,len,0x20);
3364 if (len > buf_size - 10) {
3365 growBuffer(buf);
3366 }
3367 }
3368 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003369 }
3370 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003371 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003372 COPY_BUF(l,buf,len,c);
3373 if (len > buf_size - 10) {
3374 growBuffer(buf);
3375 }
3376 }
3377 NEXTL(l);
3378 }
3379 GROW;
3380 c = CUR_CHAR(l);
3381 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003382 if ((in_space) && (normalize)) {
3383 while (buf[len - 1] == 0x20) len--;
3384 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003385 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003386 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003388 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003389 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3390 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003391 } else
3392 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003393 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003394 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003395
3396mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003397 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003398 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003399}
3400
3401/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003402 * xmlParseAttValue:
3403 * @ctxt: an XML parser context
3404 *
3405 * parse a value for an attribute
3406 * Note: the parser won't do substitution of entities here, this
3407 * will be handled later in xmlStringGetNodeList
3408 *
3409 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3410 * "'" ([^<&'] | Reference)* "'"
3411 *
3412 * 3.3.3 Attribute-Value Normalization:
3413 * Before the value of an attribute is passed to the application or
3414 * checked for validity, the XML processor must normalize it as follows:
3415 * - a character reference is processed by appending the referenced
3416 * character to the attribute value
3417 * - an entity reference is processed by recursively processing the
3418 * replacement text of the entity
3419 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3420 * appending #x20 to the normalized value, except that only a single
3421 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3422 * parsed entity or the literal entity value of an internal parsed entity
3423 * - other characters are processed by appending them to the normalized value
3424 * If the declared value is not CDATA, then the XML processor must further
3425 * process the normalized attribute value by discarding any leading and
3426 * trailing space (#x20) characters, and by replacing sequences of space
3427 * (#x20) characters by a single space (#x20) character.
3428 * All attributes for which no declaration has been read should be treated
3429 * by a non-validating parser as if declared CDATA.
3430 *
3431 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3432 */
3433
3434
3435xmlChar *
3436xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003437 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003438}
3439
3440/**
Owen Taylor3473f882001-02-23 17:55:21 +00003441 * xmlParseSystemLiteral:
3442 * @ctxt: an XML parser context
3443 *
3444 * parse an XML Literal
3445 *
3446 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3447 *
3448 * Returns the SystemLiteral parsed or NULL
3449 */
3450
3451xmlChar *
3452xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3453 xmlChar *buf = NULL;
3454 int len = 0;
3455 int size = XML_PARSER_BUFFER_SIZE;
3456 int cur, l;
3457 xmlChar stop;
3458 int state = ctxt->instate;
3459 int count = 0;
3460
3461 SHRINK;
3462 if (RAW == '"') {
3463 NEXT;
3464 stop = '"';
3465 } else if (RAW == '\'') {
3466 NEXT;
3467 stop = '\'';
3468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003469 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003470 return(NULL);
3471 }
3472
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003473 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003474 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003475 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003476 return(NULL);
3477 }
3478 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3479 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003480 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003481 if (len + 5 >= size) {
3482 size *= 2;
3483 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3484 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 ctxt->instate = (xmlParserInputState) state;
3487 return(NULL);
3488 }
3489 }
3490 count++;
3491 if (count > 50) {
3492 GROW;
3493 count = 0;
3494 }
3495 COPY_BUF(l,buf,len,cur);
3496 NEXTL(l);
3497 cur = CUR_CHAR(l);
3498 if (cur == 0) {
3499 GROW;
3500 SHRINK;
3501 cur = CUR_CHAR(l);
3502 }
3503 }
3504 buf[len] = 0;
3505 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003506 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003507 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003508 } else {
3509 NEXT;
3510 }
3511 return(buf);
3512}
3513
3514/**
3515 * xmlParsePubidLiteral:
3516 * @ctxt: an XML parser context
3517 *
3518 * parse an XML public literal
3519 *
3520 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3521 *
3522 * Returns the PubidLiteral parsed or NULL.
3523 */
3524
3525xmlChar *
3526xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3527 xmlChar *buf = NULL;
3528 int len = 0;
3529 int size = XML_PARSER_BUFFER_SIZE;
3530 xmlChar cur;
3531 xmlChar stop;
3532 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003533 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003534
3535 SHRINK;
3536 if (RAW == '"') {
3537 NEXT;
3538 stop = '"';
3539 } else if (RAW == '\'') {
3540 NEXT;
3541 stop = '\'';
3542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003543 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003544 return(NULL);
3545 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003546 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003547 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003548 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003549 return(NULL);
3550 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003551 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003553 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003554 if (len + 1 >= size) {
3555 size *= 2;
3556 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3557 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003558 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003559 return(NULL);
3560 }
3561 }
3562 buf[len++] = cur;
3563 count++;
3564 if (count > 50) {
3565 GROW;
3566 count = 0;
3567 }
3568 NEXT;
3569 cur = CUR;
3570 if (cur == 0) {
3571 GROW;
3572 SHRINK;
3573 cur = CUR;
3574 }
3575 }
3576 buf[len] = 0;
3577 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003578 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003579 } else {
3580 NEXT;
3581 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003582 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003583 return(buf);
3584}
3585
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003587/**
3588 * xmlParseCharData:
3589 * @ctxt: an XML parser context
3590 * @cdata: int indicating whether we are within a CDATA section
3591 *
3592 * parse a CharData section.
3593 * if we are within a CDATA section ']]>' marks an end of section.
3594 *
3595 * The right angle bracket (>) may be represented using the string "&gt;",
3596 * and must, for compatibility, be escaped using "&gt;" or a character
3597 * reference when it appears in the string "]]>" in content, when that
3598 * string is not marking the end of a CDATA section.
3599 *
3600 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3601 */
3602
3603void
3604xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003605 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003606 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003607 int line = ctxt->input->line;
3608 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003609
3610 SHRINK;
3611 GROW;
3612 /*
3613 * Accelerated common case where input don't need to be
3614 * modified before passing it to the handler.
3615 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003616 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003617 in = ctxt->input->cur;
3618 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003619get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003620 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3621 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003622 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003623 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003624 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003625 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003626 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003627 ctxt->input->line++;
3628 in++;
3629 }
3630 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003631 }
3632 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003633 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003634 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003635 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003636 return;
3637 }
3638 in++;
3639 goto get_more;
3640 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003641 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003642 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003643 if ((ctxt->sax->ignorableWhitespace !=
3644 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003645 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003646 const xmlChar *tmp = ctxt->input->cur;
3647 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003648
Daniel Veillarda7374592001-05-10 14:17:55 +00003649 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003650 ctxt->sax->ignorableWhitespace(ctxt->userData,
3651 tmp, nbchar);
3652 } else if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData,
3654 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003655 line = ctxt->input->line;
3656 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003657 } else {
3658 if (ctxt->sax->characters != NULL)
3659 ctxt->sax->characters(ctxt->userData,
3660 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003661 line = ctxt->input->line;
3662 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003663 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003664 }
3665 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003666 if (*in == 0xD) {
3667 in++;
3668 if (*in == 0xA) {
3669 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003670 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003671 ctxt->input->line++;
3672 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003673 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003674 in--;
3675 }
3676 if (*in == '<') {
3677 return;
3678 }
3679 if (*in == '&') {
3680 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003681 }
3682 SHRINK;
3683 GROW;
3684 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003685 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003686 nbchar = 0;
3687 }
Daniel Veillard50582112001-03-26 22:52:16 +00003688 ctxt->input->line = line;
3689 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003690 xmlParseCharDataComplex(ctxt, cdata);
3691}
3692
Daniel Veillard01c13b52002-12-10 15:19:08 +00003693/**
3694 * xmlParseCharDataComplex:
3695 * @ctxt: an XML parser context
3696 * @cdata: int indicating whether we are within a CDATA section
3697 *
3698 * parse a CharData section.this is the fallback function
3699 * of xmlParseCharData() when the parsing requires handling
3700 * of non-ASCII characters.
3701 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003702void
3703xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003704 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3705 int nbchar = 0;
3706 int cur, l;
3707 int count = 0;
3708
3709 SHRINK;
3710 GROW;
3711 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003712 while ((cur != '<') && /* checked */
3713 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003714 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003715 if ((cur == ']') && (NXT(1) == ']') &&
3716 (NXT(2) == '>')) {
3717 if (cdata) break;
3718 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003719 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 }
3722 COPY_BUF(l,buf,nbchar,cur);
3723 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003724 buf[nbchar] = 0;
3725
Owen Taylor3473f882001-02-23 17:55:21 +00003726 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003727 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003728 */
3729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3730 if (areBlanks(ctxt, buf, nbchar)) {
3731 if (ctxt->sax->ignorableWhitespace != NULL)
3732 ctxt->sax->ignorableWhitespace(ctxt->userData,
3733 buf, nbchar);
3734 } else {
3735 if (ctxt->sax->characters != NULL)
3736 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3737 }
3738 }
3739 nbchar = 0;
3740 }
3741 count++;
3742 if (count > 50) {
3743 GROW;
3744 count = 0;
3745 }
3746 NEXTL(l);
3747 cur = CUR_CHAR(l);
3748 }
3749 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003750 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003751 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003752 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003753 */
3754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3755 if (areBlanks(ctxt, buf, nbchar)) {
3756 if (ctxt->sax->ignorableWhitespace != NULL)
3757 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3758 } else {
3759 if (ctxt->sax->characters != NULL)
3760 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3761 }
3762 }
3763 }
3764}
3765
3766/**
3767 * xmlParseExternalID:
3768 * @ctxt: an XML parser context
3769 * @publicID: a xmlChar** receiving PubidLiteral
3770 * @strict: indicate whether we should restrict parsing to only
3771 * production [75], see NOTE below
3772 *
3773 * Parse an External ID or a Public ID
3774 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003775 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003776 * 'PUBLIC' S PubidLiteral S SystemLiteral
3777 *
3778 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3779 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3780 *
3781 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3782 *
3783 * Returns the function returns SystemLiteral and in the second
3784 * case publicID receives PubidLiteral, is strict is off
3785 * it is possible to return NULL and have publicID set.
3786 */
3787
3788xmlChar *
3789xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3790 xmlChar *URI = NULL;
3791
3792 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003793
3794 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003795 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003797 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3799 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003800 }
3801 SKIP_BLANKS;
3802 URI = xmlParseSystemLiteral(ctxt);
3803 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003804 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003806 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003807 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003808 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003809 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003810 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003811 }
3812 SKIP_BLANKS;
3813 *publicID = xmlParsePubidLiteral(ctxt);
3814 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003815 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003816 }
3817 if (strict) {
3818 /*
3819 * We don't handle [83] so "S SystemLiteral" is required.
3820 */
William M. Brack76e95df2003-10-18 16:20:14 +00003821 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003822 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003823 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003824 }
3825 } else {
3826 /*
3827 * We handle [83] so we return immediately, if
3828 * "S SystemLiteral" is not detected. From a purely parsing
3829 * point of view that's a nice mess.
3830 */
3831 const xmlChar *ptr;
3832 GROW;
3833
3834 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003835 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836
William M. Brack76e95df2003-10-18 16:20:14 +00003837 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3839 }
3840 SKIP_BLANKS;
3841 URI = xmlParseSystemLiteral(ctxt);
3842 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003843 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 }
3846 return(URI);
3847}
3848
3849/**
3850 * xmlParseComment:
3851 * @ctxt: an XML parser context
3852 *
3853 * Skip an XML (SGML) comment <!-- .... -->
3854 * The spec says that "For compatibility, the string "--" (double-hyphen)
3855 * must not occur within comments. "
3856 *
3857 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3858 */
3859void
3860xmlParseComment(xmlParserCtxtPtr ctxt) {
3861 xmlChar *buf = NULL;
3862 int len;
3863 int size = XML_PARSER_BUFFER_SIZE;
3864 int q, ql;
3865 int r, rl;
3866 int cur, l;
3867 xmlParserInputState state;
3868 xmlParserInputPtr input = ctxt->input;
3869 int count = 0;
3870
3871 /*
3872 * Check that there is a comment right here.
3873 */
3874 if ((RAW != '<') || (NXT(1) != '!') ||
3875 (NXT(2) != '-') || (NXT(3) != '-')) return;
3876
3877 state = ctxt->instate;
3878 ctxt->instate = XML_PARSER_COMMENT;
3879 SHRINK;
3880 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003881 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003882 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003883 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003884 ctxt->instate = state;
3885 return;
3886 }
3887 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003888 if (q == 0)
3889 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003890 NEXTL(ql);
3891 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003892 if (r == 0)
3893 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003894 NEXTL(rl);
3895 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003896 if (cur == 0)
3897 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003898 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003899 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003900 ((cur != '>') ||
3901 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003902 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003903 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003904 }
3905 if (len + 5 >= size) {
3906 size *= 2;
3907 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3908 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003909 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003910 ctxt->instate = state;
3911 return;
3912 }
3913 }
3914 COPY_BUF(ql,buf,len,q);
3915 q = r;
3916 ql = rl;
3917 r = cur;
3918 rl = l;
3919
3920 count++;
3921 if (count > 50) {
3922 GROW;
3923 count = 0;
3924 }
3925 NEXTL(l);
3926 cur = CUR_CHAR(l);
3927 if (cur == 0) {
3928 SHRINK;
3929 GROW;
3930 cur = CUR_CHAR(l);
3931 }
3932 }
3933 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003934 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003935 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003936 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 xmlFree(buf);
3938 } else {
3939 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3941 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003942 }
3943 NEXT;
3944 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3945 (!ctxt->disableSAX))
3946 ctxt->sax->comment(ctxt->userData, buf);
3947 xmlFree(buf);
3948 }
3949 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003950 return;
3951not_terminated:
3952 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3953 "Comment not terminated\n", NULL);
3954 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003955}
3956
3957/**
3958 * xmlParsePITarget:
3959 * @ctxt: an XML parser context
3960 *
3961 * parse the name of a PI
3962 *
3963 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3964 *
3965 * Returns the PITarget name or NULL
3966 */
3967
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003968const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003969xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003970 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003971
3972 name = xmlParseName(ctxt);
3973 if ((name != NULL) &&
3974 ((name[0] == 'x') || (name[0] == 'X')) &&
3975 ((name[1] == 'm') || (name[1] == 'M')) &&
3976 ((name[2] == 'l') || (name[2] == 'L'))) {
3977 int i;
3978 if ((name[0] == 'x') && (name[1] == 'm') &&
3979 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003980 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003981 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003982 return(name);
3983 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003984 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 return(name);
3986 }
3987 for (i = 0;;i++) {
3988 if (xmlW3CPIs[i] == NULL) break;
3989 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3990 return(name);
3991 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003992 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3993 "xmlParsePITarget: invalid name prefix 'xml'\n",
3994 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 }
3996 return(name);
3997}
3998
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003999#ifdef LIBXML_CATALOG_ENABLED
4000/**
4001 * xmlParseCatalogPI:
4002 * @ctxt: an XML parser context
4003 * @catalog: the PI value string
4004 *
4005 * parse an XML Catalog Processing Instruction.
4006 *
4007 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4008 *
4009 * Occurs only if allowed by the user and if happening in the Misc
4010 * part of the document before any doctype informations
4011 * This will add the given catalog to the parsing context in order
4012 * to be used if there is a resolution need further down in the document
4013 */
4014
4015static void
4016xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4017 xmlChar *URL = NULL;
4018 const xmlChar *tmp, *base;
4019 xmlChar marker;
4020
4021 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004022 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004023 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4024 goto error;
4025 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004026 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004027 if (*tmp != '=') {
4028 return;
4029 }
4030 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004031 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004032 marker = *tmp;
4033 if ((marker != '\'') && (marker != '"'))
4034 goto error;
4035 tmp++;
4036 base = tmp;
4037 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4038 if (*tmp == 0)
4039 goto error;
4040 URL = xmlStrndup(base, tmp - base);
4041 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004042 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004043 if (*tmp != 0)
4044 goto error;
4045
4046 if (URL != NULL) {
4047 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4048 xmlFree(URL);
4049 }
4050 return;
4051
4052error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004053 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4054 "Catalog PI syntax error: %s\n",
4055 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004056 if (URL != NULL)
4057 xmlFree(URL);
4058}
4059#endif
4060
Owen Taylor3473f882001-02-23 17:55:21 +00004061/**
4062 * xmlParsePI:
4063 * @ctxt: an XML parser context
4064 *
4065 * parse an XML Processing Instruction.
4066 *
4067 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4068 *
4069 * The processing is transfered to SAX once parsed.
4070 */
4071
4072void
4073xmlParsePI(xmlParserCtxtPtr ctxt) {
4074 xmlChar *buf = NULL;
4075 int len = 0;
4076 int size = XML_PARSER_BUFFER_SIZE;
4077 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004078 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 xmlParserInputState state;
4080 int count = 0;
4081
4082 if ((RAW == '<') && (NXT(1) == '?')) {
4083 xmlParserInputPtr input = ctxt->input;
4084 state = ctxt->instate;
4085 ctxt->instate = XML_PARSER_PI;
4086 /*
4087 * this is a Processing Instruction.
4088 */
4089 SKIP(2);
4090 SHRINK;
4091
4092 /*
4093 * Parse the target name and check for special support like
4094 * namespace.
4095 */
4096 target = xmlParsePITarget(ctxt);
4097 if (target != NULL) {
4098 if ((RAW == '?') && (NXT(1) == '>')) {
4099 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004100 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4101 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004102 }
4103 SKIP(2);
4104
4105 /*
4106 * SAX: PI detected.
4107 */
4108 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4109 (ctxt->sax->processingInstruction != NULL))
4110 ctxt->sax->processingInstruction(ctxt->userData,
4111 target, NULL);
4112 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004113 return;
4114 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004115 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004116 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004117 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 ctxt->instate = state;
4119 return;
4120 }
4121 cur = CUR;
4122 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004123 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4124 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004125 }
4126 SKIP_BLANKS;
4127 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004128 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004129 ((cur != '?') || (NXT(1) != '>'))) {
4130 if (len + 5 >= size) {
4131 size *= 2;
4132 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4133 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004134 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004135 ctxt->instate = state;
4136 return;
4137 }
4138 }
4139 count++;
4140 if (count > 50) {
4141 GROW;
4142 count = 0;
4143 }
4144 COPY_BUF(l,buf,len,cur);
4145 NEXTL(l);
4146 cur = CUR_CHAR(l);
4147 if (cur == 0) {
4148 SHRINK;
4149 GROW;
4150 cur = CUR_CHAR(l);
4151 }
4152 }
4153 buf[len] = 0;
4154 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004155 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4156 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 } else {
4158 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4160 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004161 }
4162 SKIP(2);
4163
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004164#ifdef LIBXML_CATALOG_ENABLED
4165 if (((state == XML_PARSER_MISC) ||
4166 (state == XML_PARSER_START)) &&
4167 (xmlStrEqual(target, XML_CATALOG_PI))) {
4168 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4169 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4170 (allow == XML_CATA_ALLOW_ALL))
4171 xmlParseCatalogPI(ctxt, buf);
4172 }
4173#endif
4174
4175
Owen Taylor3473f882001-02-23 17:55:21 +00004176 /*
4177 * SAX: PI detected.
4178 */
4179 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4180 (ctxt->sax->processingInstruction != NULL))
4181 ctxt->sax->processingInstruction(ctxt->userData,
4182 target, buf);
4183 }
4184 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004185 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 }
4188 ctxt->instate = state;
4189 }
4190}
4191
4192/**
4193 * xmlParseNotationDecl:
4194 * @ctxt: an XML parser context
4195 *
4196 * parse a notation declaration
4197 *
4198 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4199 *
4200 * Hence there is actually 3 choices:
4201 * 'PUBLIC' S PubidLiteral
4202 * 'PUBLIC' S PubidLiteral S SystemLiteral
4203 * and 'SYSTEM' S SystemLiteral
4204 *
4205 * See the NOTE on xmlParseExternalID().
4206 */
4207
4208void
4209xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004210 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 xmlChar *Pubid;
4212 xmlChar *Systemid;
4213
Daniel Veillarda07050d2003-10-19 14:46:32 +00004214 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004215 xmlParserInputPtr input = ctxt->input;
4216 SHRINK;
4217 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004218 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004219 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4220 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004221 return;
4222 }
4223 SKIP_BLANKS;
4224
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004227 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004228 return;
4229 }
William M. Brack76e95df2003-10-18 16:20:14 +00004230 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004231 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004232 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236
4237 /*
4238 * Parse the IDs.
4239 */
4240 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4241 SKIP_BLANKS;
4242
4243 if (RAW == '>') {
4244 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004245 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4246 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004247 }
4248 NEXT;
4249 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4250 (ctxt->sax->notationDecl != NULL))
4251 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4252 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004253 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 if (Systemid != NULL) xmlFree(Systemid);
4256 if (Pubid != NULL) xmlFree(Pubid);
4257 }
4258}
4259
4260/**
4261 * xmlParseEntityDecl:
4262 * @ctxt: an XML parser context
4263 *
4264 * parse <!ENTITY declarations
4265 *
4266 * [70] EntityDecl ::= GEDecl | PEDecl
4267 *
4268 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4269 *
4270 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4271 *
4272 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4273 *
4274 * [74] PEDef ::= EntityValue | ExternalID
4275 *
4276 * [76] NDataDecl ::= S 'NDATA' S Name
4277 *
4278 * [ VC: Notation Declared ]
4279 * The Name must match the declared name of a notation.
4280 */
4281
4282void
4283xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004284 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 xmlChar *value = NULL;
4286 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004287 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 int isParameter = 0;
4289 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004290 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004291
4292 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004293 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004294 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 SHRINK;
4296 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004297 skipped = SKIP_BLANKS;
4298 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4300 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302
4303 if (RAW == '%') {
4304 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004305 skipped = SKIP_BLANKS;
4306 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4308 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004309 }
Owen Taylor3473f882001-02-23 17:55:21 +00004310 isParameter = 1;
4311 }
4312
Daniel Veillard76d66f42001-05-16 21:05:17 +00004313 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004314 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004315 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4316 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004317 return;
4318 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004319 skipped = SKIP_BLANKS;
4320 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4322 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Owen Taylor3473f882001-02-23 17:55:21 +00004324
Daniel Veillardf5582f12002-06-11 10:08:16 +00004325 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004326 /*
4327 * handle the various case of definitions...
4328 */
4329 if (isParameter) {
4330 if ((RAW == '"') || (RAW == '\'')) {
4331 value = xmlParseEntityValue(ctxt, &orig);
4332 if (value) {
4333 if ((ctxt->sax != NULL) &&
4334 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4335 ctxt->sax->entityDecl(ctxt->userData, name,
4336 XML_INTERNAL_PARAMETER_ENTITY,
4337 NULL, NULL, value);
4338 }
4339 } else {
4340 URI = xmlParseExternalID(ctxt, &literal, 1);
4341 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004342 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 if (URI) {
4345 xmlURIPtr uri;
4346
4347 uri = xmlParseURI((const char *) URI);
4348 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004349 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4350 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004351 /*
4352 * This really ought to be a well formedness error
4353 * but the XML Core WG decided otherwise c.f. issue
4354 * E26 of the XML erratas.
4355 */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 } else {
4357 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004358 /*
4359 * Okay this is foolish to block those but not
4360 * invalid URIs.
4361 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004362 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004363 } else {
4364 if ((ctxt->sax != NULL) &&
4365 (!ctxt->disableSAX) &&
4366 (ctxt->sax->entityDecl != NULL))
4367 ctxt->sax->entityDecl(ctxt->userData, name,
4368 XML_EXTERNAL_PARAMETER_ENTITY,
4369 literal, URI, NULL);
4370 }
4371 xmlFreeURI(uri);
4372 }
4373 }
4374 }
4375 } else {
4376 if ((RAW == '"') || (RAW == '\'')) {
4377 value = xmlParseEntityValue(ctxt, &orig);
4378 if ((ctxt->sax != NULL) &&
4379 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4380 ctxt->sax->entityDecl(ctxt->userData, name,
4381 XML_INTERNAL_GENERAL_ENTITY,
4382 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004383 /*
4384 * For expat compatibility in SAX mode.
4385 */
4386 if ((ctxt->myDoc == NULL) ||
4387 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4388 if (ctxt->myDoc == NULL) {
4389 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4390 }
4391 if (ctxt->myDoc->intSubset == NULL)
4392 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4393 BAD_CAST "fake", NULL, NULL);
4394
Daniel Veillard1af9a412003-08-20 22:54:39 +00004395 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4396 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004397 }
Owen Taylor3473f882001-02-23 17:55:21 +00004398 } else {
4399 URI = xmlParseExternalID(ctxt, &literal, 1);
4400 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004401 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 }
4403 if (URI) {
4404 xmlURIPtr uri;
4405
4406 uri = xmlParseURI((const char *)URI);
4407 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004408 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4409 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004410 /*
4411 * This really ought to be a well formedness error
4412 * but the XML Core WG decided otherwise c.f. issue
4413 * E26 of the XML erratas.
4414 */
Owen Taylor3473f882001-02-23 17:55:21 +00004415 } else {
4416 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004417 /*
4418 * Okay this is foolish to block those but not
4419 * invalid URIs.
4420 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 }
4423 xmlFreeURI(uri);
4424 }
4425 }
William M. Brack76e95df2003-10-18 16:20:14 +00004426 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4428 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004431 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004432 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004433 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004434 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4435 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004438 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4440 (ctxt->sax->unparsedEntityDecl != NULL))
4441 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4442 literal, URI, ndata);
4443 } else {
4444 if ((ctxt->sax != NULL) &&
4445 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4446 ctxt->sax->entityDecl(ctxt->userData, name,
4447 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4448 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004449 /*
4450 * For expat compatibility in SAX mode.
4451 * assuming the entity repalcement was asked for
4452 */
4453 if ((ctxt->replaceEntities != 0) &&
4454 ((ctxt->myDoc == NULL) ||
4455 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4456 if (ctxt->myDoc == NULL) {
4457 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4458 }
4459
4460 if (ctxt->myDoc->intSubset == NULL)
4461 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4462 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004463 xmlSAX2EntityDecl(ctxt, name,
4464 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4465 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004466 }
Owen Taylor3473f882001-02-23 17:55:21 +00004467 }
4468 }
4469 }
4470 SKIP_BLANKS;
4471 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004472 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004473 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004474 } else {
4475 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004476 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4477 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004478 }
4479 NEXT;
4480 }
4481 if (orig != NULL) {
4482 /*
4483 * Ugly mechanism to save the raw entity value.
4484 */
4485 xmlEntityPtr cur = NULL;
4486
4487 if (isParameter) {
4488 if ((ctxt->sax != NULL) &&
4489 (ctxt->sax->getParameterEntity != NULL))
4490 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4491 } else {
4492 if ((ctxt->sax != NULL) &&
4493 (ctxt->sax->getEntity != NULL))
4494 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004495 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004496 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004497 }
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 if (cur != NULL) {
4500 if (cur->orig != NULL)
4501 xmlFree(orig);
4502 else
4503 cur->orig = orig;
4504 } else
4505 xmlFree(orig);
4506 }
Owen Taylor3473f882001-02-23 17:55:21 +00004507 if (value != NULL) xmlFree(value);
4508 if (URI != NULL) xmlFree(URI);
4509 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004510 }
4511}
4512
4513/**
4514 * xmlParseDefaultDecl:
4515 * @ctxt: an XML parser context
4516 * @value: Receive a possible fixed default value for the attribute
4517 *
4518 * Parse an attribute default declaration
4519 *
4520 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4521 *
4522 * [ VC: Required Attribute ]
4523 * if the default declaration is the keyword #REQUIRED, then the
4524 * attribute must be specified for all elements of the type in the
4525 * attribute-list declaration.
4526 *
4527 * [ VC: Attribute Default Legal ]
4528 * The declared default value must meet the lexical constraints of
4529 * the declared attribute type c.f. xmlValidateAttributeDecl()
4530 *
4531 * [ VC: Fixed Attribute Default ]
4532 * if an attribute has a default value declared with the #FIXED
4533 * keyword, instances of that attribute must match the default value.
4534 *
4535 * [ WFC: No < in Attribute Values ]
4536 * handled in xmlParseAttValue()
4537 *
4538 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4539 * or XML_ATTRIBUTE_FIXED.
4540 */
4541
4542int
4543xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4544 int val;
4545 xmlChar *ret;
4546
4547 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004548 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 SKIP(9);
4550 return(XML_ATTRIBUTE_REQUIRED);
4551 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004552 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004553 SKIP(8);
4554 return(XML_ATTRIBUTE_IMPLIED);
4555 }
4556 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004557 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004558 SKIP(6);
4559 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004560 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004561 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4562 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004563 }
4564 SKIP_BLANKS;
4565 }
4566 ret = xmlParseAttValue(ctxt);
4567 ctxt->instate = XML_PARSER_DTD;
4568 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004569 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004570 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004571 } else
4572 *value = ret;
4573 return(val);
4574}
4575
4576/**
4577 * xmlParseNotationType:
4578 * @ctxt: an XML parser context
4579 *
4580 * parse an Notation attribute type.
4581 *
4582 * Note: the leading 'NOTATION' S part has already being parsed...
4583 *
4584 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4585 *
4586 * [ VC: Notation Attributes ]
4587 * Values of this type must match one of the notation names included
4588 * in the declaration; all notation names in the declaration must be declared.
4589 *
4590 * Returns: the notation attribute tree built while parsing
4591 */
4592
4593xmlEnumerationPtr
4594xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004595 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004596 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4597
4598 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 return(NULL);
4601 }
4602 SHRINK;
4603 do {
4604 NEXT;
4605 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004606 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004607 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004608 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4609 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004610 return(ret);
4611 }
4612 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004613 if (cur == NULL) return(ret);
4614 if (last == NULL) ret = last = cur;
4615 else {
4616 last->next = cur;
4617 last = cur;
4618 }
4619 SKIP_BLANKS;
4620 } while (RAW == '|');
4621 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004622 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004623 if ((last != NULL) && (last != ret))
4624 xmlFreeEnumeration(last);
4625 return(ret);
4626 }
4627 NEXT;
4628 return(ret);
4629}
4630
4631/**
4632 * xmlParseEnumerationType:
4633 * @ctxt: an XML parser context
4634 *
4635 * parse an Enumeration attribute type.
4636 *
4637 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4638 *
4639 * [ VC: Enumeration ]
4640 * Values of this type must match one of the Nmtoken tokens in
4641 * the declaration
4642 *
4643 * Returns: the enumeration attribute tree built while parsing
4644 */
4645
4646xmlEnumerationPtr
4647xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4648 xmlChar *name;
4649 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4650
4651 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004652 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004653 return(NULL);
4654 }
4655 SHRINK;
4656 do {
4657 NEXT;
4658 SKIP_BLANKS;
4659 name = xmlParseNmtoken(ctxt);
4660 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004661 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004662 return(ret);
4663 }
4664 cur = xmlCreateEnumeration(name);
4665 xmlFree(name);
4666 if (cur == NULL) return(ret);
4667 if (last == NULL) ret = last = cur;
4668 else {
4669 last->next = cur;
4670 last = cur;
4671 }
4672 SKIP_BLANKS;
4673 } while (RAW == '|');
4674 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004675 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004676 return(ret);
4677 }
4678 NEXT;
4679 return(ret);
4680}
4681
4682/**
4683 * xmlParseEnumeratedType:
4684 * @ctxt: an XML parser context
4685 * @tree: the enumeration tree built while parsing
4686 *
4687 * parse an Enumerated attribute type.
4688 *
4689 * [57] EnumeratedType ::= NotationType | Enumeration
4690 *
4691 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4692 *
4693 *
4694 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4695 */
4696
4697int
4698xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004699 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004701 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004704 return(0);
4705 }
4706 SKIP_BLANKS;
4707 *tree = xmlParseNotationType(ctxt);
4708 if (*tree == NULL) return(0);
4709 return(XML_ATTRIBUTE_NOTATION);
4710 }
4711 *tree = xmlParseEnumerationType(ctxt);
4712 if (*tree == NULL) return(0);
4713 return(XML_ATTRIBUTE_ENUMERATION);
4714}
4715
4716/**
4717 * xmlParseAttributeType:
4718 * @ctxt: an XML parser context
4719 * @tree: the enumeration tree built while parsing
4720 *
4721 * parse the Attribute list def for an element
4722 *
4723 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4724 *
4725 * [55] StringType ::= 'CDATA'
4726 *
4727 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4728 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4729 *
4730 * Validity constraints for attribute values syntax are checked in
4731 * xmlValidateAttributeValue()
4732 *
4733 * [ VC: ID ]
4734 * Values of type ID must match the Name production. A name must not
4735 * appear more than once in an XML document as a value of this type;
4736 * i.e., ID values must uniquely identify the elements which bear them.
4737 *
4738 * [ VC: One ID per Element Type ]
4739 * No element type may have more than one ID attribute specified.
4740 *
4741 * [ VC: ID Attribute Default ]
4742 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4743 *
4744 * [ VC: IDREF ]
4745 * Values of type IDREF must match the Name production, and values
4746 * of type IDREFS must match Names; each IDREF Name must match the value
4747 * of an ID attribute on some element in the XML document; i.e. IDREF
4748 * values must match the value of some ID attribute.
4749 *
4750 * [ VC: Entity Name ]
4751 * Values of type ENTITY must match the Name production, values
4752 * of type ENTITIES must match Names; each Entity Name must match the
4753 * name of an unparsed entity declared in the DTD.
4754 *
4755 * [ VC: Name Token ]
4756 * Values of type NMTOKEN must match the Nmtoken production; values
4757 * of type NMTOKENS must match Nmtokens.
4758 *
4759 * Returns the attribute type
4760 */
4761int
4762xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4763 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004764 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004765 SKIP(5);
4766 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004767 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004768 SKIP(6);
4769 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004770 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004771 SKIP(5);
4772 return(XML_ATTRIBUTE_IDREF);
4773 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4774 SKIP(2);
4775 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004776 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004777 SKIP(6);
4778 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004779 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004780 SKIP(8);
4781 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004782 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004783 SKIP(8);
4784 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004785 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004786 SKIP(7);
4787 return(XML_ATTRIBUTE_NMTOKEN);
4788 }
4789 return(xmlParseEnumeratedType(ctxt, tree));
4790}
4791
4792/**
4793 * xmlParseAttributeListDecl:
4794 * @ctxt: an XML parser context
4795 *
4796 * : parse the Attribute list def for an element
4797 *
4798 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4799 *
4800 * [53] AttDef ::= S Name S AttType S DefaultDecl
4801 *
4802 */
4803void
4804xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004805 const xmlChar *elemName;
4806 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 xmlEnumerationPtr tree;
4808
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 xmlParserInputPtr input = ctxt->input;
4811
4812 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004813 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004814 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004815 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004816 }
4817 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004818 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4821 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004822 return;
4823 }
4824 SKIP_BLANKS;
4825 GROW;
4826 while (RAW != '>') {
4827 const xmlChar *check = CUR_PTR;
4828 int type;
4829 int def;
4830 xmlChar *defaultValue = NULL;
4831
4832 GROW;
4833 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004834 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004835 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004836 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4837 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004838 break;
4839 }
4840 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004841 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004842 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004843 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004844 if (defaultValue != NULL)
4845 xmlFree(defaultValue);
4846 break;
4847 }
4848 SKIP_BLANKS;
4849
4850 type = xmlParseAttributeType(ctxt, &tree);
4851 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004852 if (defaultValue != NULL)
4853 xmlFree(defaultValue);
4854 break;
4855 }
4856
4857 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004858 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004859 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4860 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004861 if (defaultValue != NULL)
4862 xmlFree(defaultValue);
4863 if (tree != NULL)
4864 xmlFreeEnumeration(tree);
4865 break;
4866 }
4867 SKIP_BLANKS;
4868
4869 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4870 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004871 if (defaultValue != NULL)
4872 xmlFree(defaultValue);
4873 if (tree != NULL)
4874 xmlFreeEnumeration(tree);
4875 break;
4876 }
4877
4878 GROW;
4879 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004880 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004882 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004883 if (defaultValue != NULL)
4884 xmlFree(defaultValue);
4885 if (tree != NULL)
4886 xmlFreeEnumeration(tree);
4887 break;
4888 }
4889 SKIP_BLANKS;
4890 }
4891 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004892 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4893 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 if (defaultValue != NULL)
4895 xmlFree(defaultValue);
4896 if (tree != NULL)
4897 xmlFreeEnumeration(tree);
4898 break;
4899 }
4900 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4901 (ctxt->sax->attributeDecl != NULL))
4902 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4903 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004904 else if (tree != NULL)
4905 xmlFreeEnumeration(tree);
4906
4907 if ((ctxt->sax2) && (defaultValue != NULL) &&
4908 (def != XML_ATTRIBUTE_IMPLIED) &&
4909 (def != XML_ATTRIBUTE_REQUIRED)) {
4910 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4911 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004912 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4913 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4914 }
Owen Taylor3473f882001-02-23 17:55:21 +00004915 if (defaultValue != NULL)
4916 xmlFree(defaultValue);
4917 GROW;
4918 }
4919 if (RAW == '>') {
4920 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004921 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4922 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 NEXT;
4925 }
Owen Taylor3473f882001-02-23 17:55:21 +00004926 }
4927}
4928
4929/**
4930 * xmlParseElementMixedContentDecl:
4931 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004932 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004933 *
4934 * parse the declaration for a Mixed Element content
4935 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4936 *
4937 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4938 * '(' S? '#PCDATA' S? ')'
4939 *
4940 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4941 *
4942 * [ VC: No Duplicate Types ]
4943 * The same name must not appear more than once in a single
4944 * mixed-content declaration.
4945 *
4946 * returns: the list of the xmlElementContentPtr describing the element choices
4947 */
4948xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004949xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004950 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004951 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004952
4953 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004954 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004955 SKIP(7);
4956 SKIP_BLANKS;
4957 SHRINK;
4958 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004960 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4961"Element content declaration doesn't start and stop in the same entity\n",
4962 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004963 }
Owen Taylor3473f882001-02-23 17:55:21 +00004964 NEXT;
4965 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4966 if (RAW == '*') {
4967 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4968 NEXT;
4969 }
4970 return(ret);
4971 }
4972 if ((RAW == '(') || (RAW == '|')) {
4973 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4974 if (ret == NULL) return(NULL);
4975 }
4976 while (RAW == '|') {
4977 NEXT;
4978 if (elem == NULL) {
4979 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4980 if (ret == NULL) return(NULL);
4981 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004982 if (cur != NULL)
4983 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004984 cur = ret;
4985 } else {
4986 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4987 if (n == NULL) return(NULL);
4988 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004989 if (n->c1 != NULL)
4990 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004991 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004992 if (n != NULL)
4993 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004994 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004995 }
4996 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004997 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004999 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005000 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005001 xmlFreeElementContent(cur);
5002 return(NULL);
5003 }
5004 SKIP_BLANKS;
5005 GROW;
5006 }
5007 if ((RAW == ')') && (NXT(1) == '*')) {
5008 if (elem != NULL) {
5009 cur->c2 = xmlNewElementContent(elem,
5010 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005011 if (cur->c2 != NULL)
5012 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 }
5014 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005015 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005016 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5017"Element content declaration doesn't start and stop in the same entity\n",
5018 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005019 }
Owen Taylor3473f882001-02-23 17:55:21 +00005020 SKIP(2);
5021 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005023 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 return(NULL);
5025 }
5026
5027 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005028 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
5030 return(ret);
5031}
5032
5033/**
5034 * xmlParseElementChildrenContentDecl:
5035 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005036 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005037 *
5038 * parse the declaration for a Mixed Element content
5039 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5040 *
5041 *
5042 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5043 *
5044 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5045 *
5046 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5047 *
5048 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5049 *
5050 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5051 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005052 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005053 * opening or closing parentheses in a choice, seq, or Mixed
5054 * construct is contained in the replacement text for a parameter
5055 * entity, both must be contained in the same replacement text. For
5056 * interoperability, if a parameter-entity reference appears in a
5057 * choice, seq, or Mixed construct, its replacement text should not
5058 * be empty, and neither the first nor last non-blank character of
5059 * the replacement text should be a connector (| or ,).
5060 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005061 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005062 * hierarchy.
5063 */
5064xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005066 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005067 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 xmlChar type = 0;
5069
5070 SKIP_BLANKS;
5071 GROW;
5072 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005073 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005074
Owen Taylor3473f882001-02-23 17:55:21 +00005075 /* Recurse on first child */
5076 NEXT;
5077 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005078 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005079 SKIP_BLANKS;
5080 GROW;
5081 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005082 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005083 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005084 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005085 return(NULL);
5086 }
5087 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005088 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005089 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005090 return(NULL);
5091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 GROW;
5093 if (RAW == '?') {
5094 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5095 NEXT;
5096 } else if (RAW == '*') {
5097 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5098 NEXT;
5099 } else if (RAW == '+') {
5100 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5101 NEXT;
5102 } else {
5103 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5104 }
Owen Taylor3473f882001-02-23 17:55:21 +00005105 GROW;
5106 }
5107 SKIP_BLANKS;
5108 SHRINK;
5109 while (RAW != ')') {
5110 /*
5111 * Each loop we parse one separator and one element.
5112 */
5113 if (RAW == ',') {
5114 if (type == 0) type = CUR;
5115
5116 /*
5117 * Detect "Name | Name , Name" error
5118 */
5119 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005120 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005121 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005123 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005124 xmlFreeElementContent(last);
5125 if (ret != NULL)
5126 xmlFreeElementContent(ret);
5127 return(NULL);
5128 }
5129 NEXT;
5130
5131 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5132 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005133 if ((last != NULL) && (last != ret))
5134 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005135 xmlFreeElementContent(ret);
5136 return(NULL);
5137 }
5138 if (last == NULL) {
5139 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005140 if (ret != NULL)
5141 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005142 ret = cur = op;
5143 } else {
5144 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005145 if (op != NULL)
5146 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005147 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005148 if (last != NULL)
5149 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005150 cur =op;
5151 last = NULL;
5152 }
5153 } else if (RAW == '|') {
5154 if (type == 0) type = CUR;
5155
5156 /*
5157 * Detect "Name , Name | Name" error
5158 */
5159 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005160 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005161 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005162 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005163 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005164 xmlFreeElementContent(last);
5165 if (ret != NULL)
5166 xmlFreeElementContent(ret);
5167 return(NULL);
5168 }
5169 NEXT;
5170
5171 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5172 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005173 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005174 xmlFreeElementContent(last);
5175 if (ret != NULL)
5176 xmlFreeElementContent(ret);
5177 return(NULL);
5178 }
5179 if (last == NULL) {
5180 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005181 if (ret != NULL)
5182 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005183 ret = cur = op;
5184 } else {
5185 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005186 if (op != NULL)
5187 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005188 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005189 if (last != NULL)
5190 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005191 cur =op;
5192 last = NULL;
5193 }
5194 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005195 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if (ret != NULL)
5197 xmlFreeElementContent(ret);
5198 return(NULL);
5199 }
5200 GROW;
5201 SKIP_BLANKS;
5202 GROW;
5203 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005205 /* Recurse on second child */
5206 NEXT;
5207 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 SKIP_BLANKS;
5210 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005211 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005212 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005213 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005214 if (ret != NULL)
5215 xmlFreeElementContent(ret);
5216 return(NULL);
5217 }
5218 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (RAW == '?') {
5220 last->ocur = XML_ELEMENT_CONTENT_OPT;
5221 NEXT;
5222 } else if (RAW == '*') {
5223 last->ocur = XML_ELEMENT_CONTENT_MULT;
5224 NEXT;
5225 } else if (RAW == '+') {
5226 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5227 NEXT;
5228 } else {
5229 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5230 }
5231 }
5232 SKIP_BLANKS;
5233 GROW;
5234 }
5235 if ((cur != NULL) && (last != NULL)) {
5236 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005237 if (last != NULL)
5238 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005240 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005241 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5242"Element content declaration doesn't start and stop in the same entity\n",
5243 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005244 }
Owen Taylor3473f882001-02-23 17:55:21 +00005245 NEXT;
5246 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005247 if (ret != NULL)
5248 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005249 NEXT;
5250 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005251 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005252 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005253 cur = ret;
5254 /*
5255 * Some normalization:
5256 * (a | b* | c?)* == (a | b | c)*
5257 */
5258 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5259 if ((cur->c1 != NULL) &&
5260 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5261 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5262 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5263 if ((cur->c2 != NULL) &&
5264 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5265 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5266 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5267 cur = cur->c2;
5268 }
5269 }
Owen Taylor3473f882001-02-23 17:55:21 +00005270 NEXT;
5271 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005272 if (ret != NULL) {
5273 int found = 0;
5274
Daniel Veillarde470df72001-04-18 21:41:07 +00005275 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005276 /*
5277 * Some normalization:
5278 * (a | b*)+ == (a | b)*
5279 * (a | b?)+ == (a | b)*
5280 */
5281 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5282 if ((cur->c1 != NULL) &&
5283 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5284 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5285 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5286 found = 1;
5287 }
5288 if ((cur->c2 != NULL) &&
5289 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5290 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5291 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5292 found = 1;
5293 }
5294 cur = cur->c2;
5295 }
5296 if (found)
5297 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5298 }
Owen Taylor3473f882001-02-23 17:55:21 +00005299 NEXT;
5300 }
5301 return(ret);
5302}
5303
5304/**
5305 * xmlParseElementContentDecl:
5306 * @ctxt: an XML parser context
5307 * @name: the name of the element being defined.
5308 * @result: the Element Content pointer will be stored here if any
5309 *
5310 * parse the declaration for an Element content either Mixed or Children,
5311 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5312 *
5313 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5314 *
5315 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5316 */
5317
5318int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005319xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005320 xmlElementContentPtr *result) {
5321
5322 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005323 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005324 int res;
5325
5326 *result = NULL;
5327
5328 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005329 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005330 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005331 return(-1);
5332 }
5333 NEXT;
5334 GROW;
5335 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005336 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005337 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 res = XML_ELEMENT_TYPE_MIXED;
5339 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005340 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005341 res = XML_ELEMENT_TYPE_ELEMENT;
5342 }
Owen Taylor3473f882001-02-23 17:55:21 +00005343 SKIP_BLANKS;
5344 *result = tree;
5345 return(res);
5346}
5347
5348/**
5349 * xmlParseElementDecl:
5350 * @ctxt: an XML parser context
5351 *
5352 * parse an Element declaration.
5353 *
5354 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5355 *
5356 * [ VC: Unique Element Type Declaration ]
5357 * No element type may be declared more than once
5358 *
5359 * Returns the type of the element, or -1 in case of error
5360 */
5361int
5362xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005363 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005364 int ret = -1;
5365 xmlElementContentPtr content = NULL;
5366
5367 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005368 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005369 xmlParserInputPtr input = ctxt->input;
5370
5371 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005372 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5374 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005375 }
5376 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005377 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005378 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005379 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5380 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005381 return(-1);
5382 }
5383 while ((RAW == 0) && (ctxt->inputNr > 1))
5384 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005385 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5387 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005388 }
5389 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005390 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005391 SKIP(5);
5392 /*
5393 * Element must always be empty.
5394 */
5395 ret = XML_ELEMENT_TYPE_EMPTY;
5396 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5397 (NXT(2) == 'Y')) {
5398 SKIP(3);
5399 /*
5400 * Element is a generic container.
5401 */
5402 ret = XML_ELEMENT_TYPE_ANY;
5403 } else if (RAW == '(') {
5404 ret = xmlParseElementContentDecl(ctxt, name, &content);
5405 } else {
5406 /*
5407 * [ WFC: PEs in Internal Subset ] error handling.
5408 */
5409 if ((RAW == '%') && (ctxt->external == 0) &&
5410 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005411 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005412 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005413 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005414 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005415 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5416 }
Owen Taylor3473f882001-02-23 17:55:21 +00005417 return(-1);
5418 }
5419
5420 SKIP_BLANKS;
5421 /*
5422 * Pop-up of finished entities.
5423 */
5424 while ((RAW == 0) && (ctxt->inputNr > 1))
5425 xmlPopInput(ctxt);
5426 SKIP_BLANKS;
5427
5428 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005429 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005430 } else {
5431 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005432 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5433 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005434 }
5435
5436 NEXT;
5437 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5438 (ctxt->sax->elementDecl != NULL))
5439 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5440 content);
5441 }
5442 if (content != NULL) {
5443 xmlFreeElementContent(content);
5444 }
Owen Taylor3473f882001-02-23 17:55:21 +00005445 }
5446 return(ret);
5447}
5448
5449/**
Owen Taylor3473f882001-02-23 17:55:21 +00005450 * xmlParseConditionalSections
5451 * @ctxt: an XML parser context
5452 *
5453 * [61] conditionalSect ::= includeSect | ignoreSect
5454 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5455 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5456 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5457 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5458 */
5459
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005460static void
Owen Taylor3473f882001-02-23 17:55:21 +00005461xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5462 SKIP(3);
5463 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005464 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005465 SKIP(7);
5466 SKIP_BLANKS;
5467 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 } else {
5470 NEXT;
5471 }
5472 if (xmlParserDebugEntities) {
5473 if ((ctxt->input != NULL) && (ctxt->input->filename))
5474 xmlGenericError(xmlGenericErrorContext,
5475 "%s(%d): ", ctxt->input->filename,
5476 ctxt->input->line);
5477 xmlGenericError(xmlGenericErrorContext,
5478 "Entering INCLUDE Conditional Section\n");
5479 }
5480
5481 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5482 (NXT(2) != '>'))) {
5483 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005484 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005485
5486 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5487 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005488 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005489 NEXT;
5490 } else if (RAW == '%') {
5491 xmlParsePEReference(ctxt);
5492 } else
5493 xmlParseMarkupDecl(ctxt);
5494
5495 /*
5496 * Pop-up of finished entities.
5497 */
5498 while ((RAW == 0) && (ctxt->inputNr > 1))
5499 xmlPopInput(ctxt);
5500
Daniel Veillardfdc91562002-07-01 21:52:03 +00005501 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005502 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 break;
5504 }
5505 }
5506 if (xmlParserDebugEntities) {
5507 if ((ctxt->input != NULL) && (ctxt->input->filename))
5508 xmlGenericError(xmlGenericErrorContext,
5509 "%s(%d): ", ctxt->input->filename,
5510 ctxt->input->line);
5511 xmlGenericError(xmlGenericErrorContext,
5512 "Leaving INCLUDE Conditional Section\n");
5513 }
5514
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005517 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005518 int depth = 0;
5519
5520 SKIP(6);
5521 SKIP_BLANKS;
5522 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005523 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005524 } else {
5525 NEXT;
5526 }
5527 if (xmlParserDebugEntities) {
5528 if ((ctxt->input != NULL) && (ctxt->input->filename))
5529 xmlGenericError(xmlGenericErrorContext,
5530 "%s(%d): ", ctxt->input->filename,
5531 ctxt->input->line);
5532 xmlGenericError(xmlGenericErrorContext,
5533 "Entering IGNORE Conditional Section\n");
5534 }
5535
5536 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005537 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005538 * But disable SAX event generating DTD building in the meantime
5539 */
5540 state = ctxt->disableSAX;
5541 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005543 ctxt->instate = XML_PARSER_IGNORE;
5544
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005545 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005546 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5547 depth++;
5548 SKIP(3);
5549 continue;
5550 }
5551 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5552 if (--depth >= 0) SKIP(3);
5553 continue;
5554 }
5555 NEXT;
5556 continue;
5557 }
5558
5559 ctxt->disableSAX = state;
5560 ctxt->instate = instate;
5561
5562 if (xmlParserDebugEntities) {
5563 if ((ctxt->input != NULL) && (ctxt->input->filename))
5564 xmlGenericError(xmlGenericErrorContext,
5565 "%s(%d): ", ctxt->input->filename,
5566 ctxt->input->line);
5567 xmlGenericError(xmlGenericErrorContext,
5568 "Leaving IGNORE Conditional Section\n");
5569 }
5570
5571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005572 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005573 }
5574
5575 if (RAW == 0)
5576 SHRINK;
5577
5578 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 } else {
5581 SKIP(3);
5582 }
5583}
5584
5585/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005586 * xmlParseMarkupDecl:
5587 * @ctxt: an XML parser context
5588 *
5589 * parse Markup declarations
5590 *
5591 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5592 * NotationDecl | PI | Comment
5593 *
5594 * [ VC: Proper Declaration/PE Nesting ]
5595 * Parameter-entity replacement text must be properly nested with
5596 * markup declarations. That is to say, if either the first character
5597 * or the last character of a markup declaration (markupdecl above) is
5598 * contained in the replacement text for a parameter-entity reference,
5599 * both must be contained in the same replacement text.
5600 *
5601 * [ WFC: PEs in Internal Subset ]
5602 * In the internal DTD subset, parameter-entity references can occur
5603 * only where markup declarations can occur, not within markup declarations.
5604 * (This does not apply to references that occur in external parameter
5605 * entities or to the external subset.)
5606 */
5607void
5608xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5609 GROW;
5610 xmlParseElementDecl(ctxt);
5611 xmlParseAttributeListDecl(ctxt);
5612 xmlParseEntityDecl(ctxt);
5613 xmlParseNotationDecl(ctxt);
5614 xmlParsePI(ctxt);
5615 xmlParseComment(ctxt);
5616 /*
5617 * This is only for internal subset. On external entities,
5618 * the replacement is done before parsing stage
5619 */
5620 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5621 xmlParsePEReference(ctxt);
5622
5623 /*
5624 * Conditional sections are allowed from entities included
5625 * by PE References in the internal subset.
5626 */
5627 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5628 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5629 xmlParseConditionalSections(ctxt);
5630 }
5631 }
5632
5633 ctxt->instate = XML_PARSER_DTD;
5634}
5635
5636/**
5637 * xmlParseTextDecl:
5638 * @ctxt: an XML parser context
5639 *
5640 * parse an XML declaration header for external entities
5641 *
5642 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5643 *
5644 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5645 */
5646
5647void
5648xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5649 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005650 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005651
5652 /*
5653 * We know that '<?xml' is here.
5654 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005655 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005656 SKIP(5);
5657 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005658 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005659 return;
5660 }
5661
William M. Brack76e95df2003-10-18 16:20:14 +00005662 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005663 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5664 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005665 }
5666 SKIP_BLANKS;
5667
5668 /*
5669 * We may have the VersionInfo here.
5670 */
5671 version = xmlParseVersionInfo(ctxt);
5672 if (version == NULL)
5673 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005674 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005675 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005676 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5677 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005678 }
5679 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005680 ctxt->input->version = version;
5681
5682 /*
5683 * We must have the encoding declaration
5684 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005685 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005686 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5687 /*
5688 * The XML REC instructs us to stop parsing right here
5689 */
5690 return;
5691 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005692 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5693 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5694 "Missing encoding in text declaration\n");
5695 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005696
5697 SKIP_BLANKS;
5698 if ((RAW == '?') && (NXT(1) == '>')) {
5699 SKIP(2);
5700 } else if (RAW == '>') {
5701 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005702 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005703 NEXT;
5704 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005705 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005706 MOVETO_ENDTAG(CUR_PTR);
5707 NEXT;
5708 }
5709}
5710
5711/**
Owen Taylor3473f882001-02-23 17:55:21 +00005712 * xmlParseExternalSubset:
5713 * @ctxt: an XML parser context
5714 * @ExternalID: the external identifier
5715 * @SystemID: the system identifier (or URL)
5716 *
5717 * parse Markup declarations from an external subset
5718 *
5719 * [30] extSubset ::= textDecl? extSubsetDecl
5720 *
5721 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5722 */
5723void
5724xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5725 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005726 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005728 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005729 xmlParseTextDecl(ctxt);
5730 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5731 /*
5732 * The XML REC instructs us to stop parsing right here
5733 */
5734 ctxt->instate = XML_PARSER_EOF;
5735 return;
5736 }
5737 }
5738 if (ctxt->myDoc == NULL) {
5739 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5740 }
5741 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5742 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5743
5744 ctxt->instate = XML_PARSER_DTD;
5745 ctxt->external = 1;
5746 while (((RAW == '<') && (NXT(1) == '?')) ||
5747 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005748 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005749 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005750 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005751
5752 GROW;
5753 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5754 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005755 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005756 NEXT;
5757 } else if (RAW == '%') {
5758 xmlParsePEReference(ctxt);
5759 } else
5760 xmlParseMarkupDecl(ctxt);
5761
5762 /*
5763 * Pop-up of finished entities.
5764 */
5765 while ((RAW == 0) && (ctxt->inputNr > 1))
5766 xmlPopInput(ctxt);
5767
Daniel Veillardfdc91562002-07-01 21:52:03 +00005768 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 break;
5771 }
5772 }
5773
5774 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005775 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 }
5777
5778}
5779
5780/**
5781 * xmlParseReference:
5782 * @ctxt: an XML parser context
5783 *
5784 * parse and handle entity references in content, depending on the SAX
5785 * interface, this may end-up in a call to character() if this is a
5786 * CharRef, a predefined entity, if there is no reference() callback.
5787 * or if the parser was asked to switch to that mode.
5788 *
5789 * [67] Reference ::= EntityRef | CharRef
5790 */
5791void
5792xmlParseReference(xmlParserCtxtPtr ctxt) {
5793 xmlEntityPtr ent;
5794 xmlChar *val;
5795 if (RAW != '&') return;
5796
5797 if (NXT(1) == '#') {
5798 int i = 0;
5799 xmlChar out[10];
5800 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005801 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005802
5803 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5804 /*
5805 * So we are using non-UTF-8 buffers
5806 * Check that the char fit on 8bits, if not
5807 * generate a CharRef.
5808 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005809 if (value <= 0xFF) {
5810 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005811 out[1] = 0;
5812 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5813 (!ctxt->disableSAX))
5814 ctxt->sax->characters(ctxt->userData, out, 1);
5815 } else {
5816 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005817 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005818 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005819 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005820 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5821 (!ctxt->disableSAX))
5822 ctxt->sax->reference(ctxt->userData, out);
5823 }
5824 } else {
5825 /*
5826 * Just encode the value in UTF-8
5827 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005828 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005829 out[i] = 0;
5830 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5831 (!ctxt->disableSAX))
5832 ctxt->sax->characters(ctxt->userData, out, i);
5833 }
5834 } else {
5835 ent = xmlParseEntityRef(ctxt);
5836 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005837 if (!ctxt->wellFormed)
5838 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005839 if ((ent->name != NULL) &&
5840 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5841 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005842 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005843
5844
5845 /*
5846 * The first reference to the entity trigger a parsing phase
5847 * where the ent->children is filled with the result from
5848 * the parsing.
5849 */
5850 if (ent->children == NULL) {
5851 xmlChar *value;
5852 value = ent->content;
5853
5854 /*
5855 * Check that this entity is well formed
5856 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005857 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005858 (value[1] == 0) && (value[0] == '<') &&
5859 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5860 /*
5861 * DONE: get definite answer on this !!!
5862 * Lots of entity decls are used to declare a single
5863 * char
5864 * <!ENTITY lt "<">
5865 * Which seems to be valid since
5866 * 2.4: The ampersand character (&) and the left angle
5867 * bracket (<) may appear in their literal form only
5868 * when used ... They are also legal within the literal
5869 * entity value of an internal entity declaration;i
5870 * see "4.3.2 Well-Formed Parsed Entities".
5871 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5872 * Looking at the OASIS test suite and James Clark
5873 * tests, this is broken. However the XML REC uses
5874 * it. Is the XML REC not well-formed ????
5875 * This is a hack to avoid this problem
5876 *
5877 * ANSWER: since lt gt amp .. are already defined,
5878 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005879 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005880 * is lousy but acceptable.
5881 */
5882 list = xmlNewDocText(ctxt->myDoc, value);
5883 if (list != NULL) {
5884 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5885 (ent->children == NULL)) {
5886 ent->children = list;
5887 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005888 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005889 list->parent = (xmlNodePtr) ent;
5890 } else {
5891 xmlFreeNodeList(list);
5892 }
5893 } else if (list != NULL) {
5894 xmlFreeNodeList(list);
5895 }
5896 } else {
5897 /*
5898 * 4.3.2: An internal general parsed entity is well-formed
5899 * if its replacement text matches the production labeled
5900 * content.
5901 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005902
5903 void *user_data;
5904 /*
5905 * This is a bit hackish but this seems the best
5906 * way to make sure both SAX and DOM entity support
5907 * behaves okay.
5908 */
5909 if (ctxt->userData == ctxt)
5910 user_data = NULL;
5911 else
5912 user_data = ctxt->userData;
5913
Owen Taylor3473f882001-02-23 17:55:21 +00005914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5915 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005916 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5917 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 ctxt->depth--;
5919 } else if (ent->etype ==
5920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5921 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005923 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005924 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 ctxt->depth--;
5926 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005927 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5929 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005930 }
5931 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005933 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005934 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005935 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5936 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005937 (ent->children == NULL)) {
5938 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005939 if (ctxt->replaceEntities) {
5940 /*
5941 * Prune it directly in the generated document
5942 * except for single text nodes.
5943 */
5944 if ((list->type == XML_TEXT_NODE) &&
5945 (list->next == NULL)) {
5946 list->parent = (xmlNodePtr) ent;
5947 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005948 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005949 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005950 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005951 while (list != NULL) {
5952 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005953 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005954 if (list->next == NULL)
5955 ent->last = list;
5956 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005957 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005958 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005959#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005960 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5961 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005962#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005963 }
5964 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005965 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005966 while (list != NULL) {
5967 list->parent = (xmlNodePtr) ent;
5968 if (list->next == NULL)
5969 ent->last = list;
5970 list = list->next;
5971 }
Owen Taylor3473f882001-02-23 17:55:21 +00005972 }
5973 } else {
5974 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005975 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005976 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005977 } else if ((ret != XML_ERR_OK) &&
5978 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005979 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005980 } else if (list != NULL) {
5981 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005982 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984 }
5985 }
5986 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5987 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5988 /*
5989 * Create a node.
5990 */
5991 ctxt->sax->reference(ctxt->userData, ent->name);
5992 return;
5993 } else if (ctxt->replaceEntities) {
5994 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5995 /*
5996 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005997 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005999 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006000 if ((list == NULL) && (ent->owner == 0)) {
6001 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006002 cur = ent->children;
6003 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006004 nw = xmlCopyNode(cur, 1);
6005 if (nw != NULL) {
6006 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006007 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006008 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006009 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006010 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006011 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006012 if (cur == ent->last)
6013 break;
6014 cur = cur->next;
6015 }
Daniel Veillard81273902003-09-30 00:43:48 +00006016#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006017 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006018 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006019#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006020 } else if (list == NULL) {
6021 xmlNodePtr nw = NULL, cur, next, last,
6022 firstChild = NULL;
6023 /*
6024 * Copy the entity child list and make it the new
6025 * entity child list. The goal is to make sure any
6026 * ID or REF referenced will be the one from the
6027 * document content and not the entity copy.
6028 */
6029 cur = ent->children;
6030 ent->children = NULL;
6031 last = ent->last;
6032 ent->last = NULL;
6033 while (cur != NULL) {
6034 next = cur->next;
6035 cur->next = NULL;
6036 cur->parent = NULL;
6037 nw = xmlCopyNode(cur, 1);
6038 if (nw != NULL) {
6039 nw->_private = cur->_private;
6040 if (firstChild == NULL){
6041 firstChild = cur;
6042 }
6043 xmlAddChild((xmlNodePtr) ent, nw);
6044 xmlAddChild(ctxt->node, cur);
6045 }
6046 if (cur == last)
6047 break;
6048 cur = next;
6049 }
6050 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006051#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006052 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6053 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006054#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006055 } else {
6056 /*
6057 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006058 * node with a possible previous text one which
6059 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006060 */
6061 if (ent->children->type == XML_TEXT_NODE)
6062 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
6063 if ((ent->last != ent->children) &&
6064 (ent->last->type == XML_TEXT_NODE))
6065 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
6066 xmlAddChildList(ctxt->node, ent->children);
6067 }
6068
Owen Taylor3473f882001-02-23 17:55:21 +00006069 /*
6070 * This is to avoid a nasty side effect, see
6071 * characters() in SAX.c
6072 */
6073 ctxt->nodemem = 0;
6074 ctxt->nodelen = 0;
6075 return;
6076 } else {
6077 /*
6078 * Probably running in SAX mode
6079 */
6080 xmlParserInputPtr input;
6081
6082 input = xmlNewEntityInputStream(ctxt, ent);
6083 xmlPushInput(ctxt, input);
6084 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006085 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6086 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006087 xmlParseTextDecl(ctxt);
6088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6089 /*
6090 * The XML REC instructs us to stop parsing right here
6091 */
6092 ctxt->instate = XML_PARSER_EOF;
6093 return;
6094 }
6095 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6097 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
6099 }
6100 return;
6101 }
6102 }
6103 } else {
6104 val = ent->content;
6105 if (val == NULL) return;
6106 /*
6107 * inline the entity.
6108 */
6109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6110 (!ctxt->disableSAX))
6111 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6112 }
6113 }
6114}
6115
6116/**
6117 * xmlParseEntityRef:
6118 * @ctxt: an XML parser context
6119 *
6120 * parse ENTITY references declarations
6121 *
6122 * [68] EntityRef ::= '&' Name ';'
6123 *
6124 * [ WFC: Entity Declared ]
6125 * In a document without any DTD, a document with only an internal DTD
6126 * subset which contains no parameter entity references, or a document
6127 * with "standalone='yes'", the Name given in the entity reference
6128 * must match that in an entity declaration, except that well-formed
6129 * documents need not declare any of the following entities: amp, lt,
6130 * gt, apos, quot. The declaration of a parameter entity must precede
6131 * any reference to it. Similarly, the declaration of a general entity
6132 * must precede any reference to it which appears in a default value in an
6133 * attribute-list declaration. Note that if entities are declared in the
6134 * external subset or in external parameter entities, a non-validating
6135 * processor is not obligated to read and process their declarations;
6136 * for such documents, the rule that an entity must be declared is a
6137 * well-formedness constraint only if standalone='yes'.
6138 *
6139 * [ WFC: Parsed Entity ]
6140 * An entity reference must not contain the name of an unparsed entity
6141 *
6142 * Returns the xmlEntityPtr if found, or NULL otherwise.
6143 */
6144xmlEntityPtr
6145xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006146 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006147 xmlEntityPtr ent = NULL;
6148
6149 GROW;
6150
6151 if (RAW == '&') {
6152 NEXT;
6153 name = xmlParseName(ctxt);
6154 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006155 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6156 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006157 } else {
6158 if (RAW == ';') {
6159 NEXT;
6160 /*
6161 * Ask first SAX for entity resolution, otherwise try the
6162 * predefined set.
6163 */
6164 if (ctxt->sax != NULL) {
6165 if (ctxt->sax->getEntity != NULL)
6166 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006167 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006168 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006169 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6170 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006171 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006172 }
Owen Taylor3473f882001-02-23 17:55:21 +00006173 }
6174 /*
6175 * [ WFC: Entity Declared ]
6176 * In a document without any DTD, a document with only an
6177 * internal DTD subset which contains no parameter entity
6178 * references, or a document with "standalone='yes'", the
6179 * Name given in the entity reference must match that in an
6180 * entity declaration, except that well-formed documents
6181 * need not declare any of the following entities: amp, lt,
6182 * gt, apos, quot.
6183 * The declaration of a parameter entity must precede any
6184 * reference to it.
6185 * Similarly, the declaration of a general entity must
6186 * precede any reference to it which appears in a default
6187 * value in an attribute-list declaration. Note that if
6188 * entities are declared in the external subset or in
6189 * external parameter entities, a non-validating processor
6190 * is not obligated to read and process their declarations;
6191 * for such documents, the rule that an entity must be
6192 * declared is a well-formedness constraint only if
6193 * standalone='yes'.
6194 */
6195 if (ent == NULL) {
6196 if ((ctxt->standalone == 1) ||
6197 ((ctxt->hasExternalSubset == 0) &&
6198 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006199 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006200 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006201 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006202 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006203 "Entity '%s' not defined\n", name);
6204 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006205 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207
6208 /*
6209 * [ WFC: Parsed Entity ]
6210 * An entity reference must not contain the name of an
6211 * unparsed entity
6212 */
6213 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006214 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006215 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 }
6217
6218 /*
6219 * [ WFC: No External Entity References ]
6220 * Attribute values cannot contain direct or indirect
6221 * entity references to external entities.
6222 */
6223 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6224 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006225 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6226 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 }
6228 /*
6229 * [ WFC: No < in Attribute Values ]
6230 * The replacement text of any entity referred to directly or
6231 * indirectly in an attribute value (other than "&lt;") must
6232 * not contain a <.
6233 */
6234 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6235 (ent != NULL) &&
6236 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6237 (ent->content != NULL) &&
6238 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006239 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006240 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006241 }
6242
6243 /*
6244 * Internal check, no parameter entities here ...
6245 */
6246 else {
6247 switch (ent->etype) {
6248 case XML_INTERNAL_PARAMETER_ENTITY:
6249 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006250 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6251 "Attempt to reference the parameter entity '%s'\n",
6252 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006253 break;
6254 default:
6255 break;
6256 }
6257 }
6258
6259 /*
6260 * [ WFC: No Recursion ]
6261 * A parsed entity must not contain a recursive reference
6262 * to itself, either directly or indirectly.
6263 * Done somewhere else
6264 */
6265
6266 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006267 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 }
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270 }
6271 return(ent);
6272}
6273
6274/**
6275 * xmlParseStringEntityRef:
6276 * @ctxt: an XML parser context
6277 * @str: a pointer to an index in the string
6278 *
6279 * parse ENTITY references declarations, but this version parses it from
6280 * a string value.
6281 *
6282 * [68] EntityRef ::= '&' Name ';'
6283 *
6284 * [ WFC: Entity Declared ]
6285 * In a document without any DTD, a document with only an internal DTD
6286 * subset which contains no parameter entity references, or a document
6287 * with "standalone='yes'", the Name given in the entity reference
6288 * must match that in an entity declaration, except that well-formed
6289 * documents need not declare any of the following entities: amp, lt,
6290 * gt, apos, quot. The declaration of a parameter entity must precede
6291 * any reference to it. Similarly, the declaration of a general entity
6292 * must precede any reference to it which appears in a default value in an
6293 * attribute-list declaration. Note that if entities are declared in the
6294 * external subset or in external parameter entities, a non-validating
6295 * processor is not obligated to read and process their declarations;
6296 * for such documents, the rule that an entity must be declared is a
6297 * well-formedness constraint only if standalone='yes'.
6298 *
6299 * [ WFC: Parsed Entity ]
6300 * An entity reference must not contain the name of an unparsed entity
6301 *
6302 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6303 * is updated to the current location in the string.
6304 */
6305xmlEntityPtr
6306xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6307 xmlChar *name;
6308 const xmlChar *ptr;
6309 xmlChar cur;
6310 xmlEntityPtr ent = NULL;
6311
6312 if ((str == NULL) || (*str == NULL))
6313 return(NULL);
6314 ptr = *str;
6315 cur = *ptr;
6316 if (cur == '&') {
6317 ptr++;
6318 cur = *ptr;
6319 name = xmlParseStringName(ctxt, &ptr);
6320 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006321 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6322 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006323 } else {
6324 if (*ptr == ';') {
6325 ptr++;
6326 /*
6327 * Ask first SAX for entity resolution, otherwise try the
6328 * predefined set.
6329 */
6330 if (ctxt->sax != NULL) {
6331 if (ctxt->sax->getEntity != NULL)
6332 ent = ctxt->sax->getEntity(ctxt->userData, name);
6333 if (ent == NULL)
6334 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006335 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006336 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006337 }
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
6339 /*
6340 * [ WFC: Entity Declared ]
6341 * In a document without any DTD, a document with only an
6342 * internal DTD subset which contains no parameter entity
6343 * references, or a document with "standalone='yes'", the
6344 * Name given in the entity reference must match that in an
6345 * entity declaration, except that well-formed documents
6346 * need not declare any of the following entities: amp, lt,
6347 * gt, apos, quot.
6348 * The declaration of a parameter entity must precede any
6349 * reference to it.
6350 * Similarly, the declaration of a general entity must
6351 * precede any reference to it which appears in a default
6352 * value in an attribute-list declaration. Note that if
6353 * entities are declared in the external subset or in
6354 * external parameter entities, a non-validating processor
6355 * is not obligated to read and process their declarations;
6356 * for such documents, the rule that an entity must be
6357 * declared is a well-formedness constraint only if
6358 * standalone='yes'.
6359 */
6360 if (ent == NULL) {
6361 if ((ctxt->standalone == 1) ||
6362 ((ctxt->hasExternalSubset == 0) &&
6363 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006364 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006365 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006367 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006368 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006369 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006371 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006372 }
6373
6374 /*
6375 * [ WFC: Parsed Entity ]
6376 * An entity reference must not contain the name of an
6377 * unparsed entity
6378 */
6379 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006380 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006381 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 }
6383
6384 /*
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect
6387 * entity references to external entities.
6388 */
6389 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6390 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006391 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006392 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 /*
6395 * [ WFC: No < in Attribute Values ]
6396 * The replacement text of any entity referred to directly or
6397 * indirectly in an attribute value (other than "&lt;") must
6398 * not contain a <.
6399 */
6400 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6401 (ent != NULL) &&
6402 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6403 (ent->content != NULL) &&
6404 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006405 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6406 "'<' in entity '%s' is not allowed in attributes values\n",
6407 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006408 }
6409
6410 /*
6411 * Internal check, no parameter entities here ...
6412 */
6413 else {
6414 switch (ent->etype) {
6415 case XML_INTERNAL_PARAMETER_ENTITY:
6416 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6418 "Attempt to reference the parameter entity '%s'\n",
6419 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 break;
6421 default:
6422 break;
6423 }
6424 }
6425
6426 /*
6427 * [ WFC: No Recursion ]
6428 * A parsed entity must not contain a recursive reference
6429 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006430 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006431 */
6432
6433 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006434 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006435 }
6436 xmlFree(name);
6437 }
6438 }
6439 *str = ptr;
6440 return(ent);
6441}
6442
6443/**
6444 * xmlParsePEReference:
6445 * @ctxt: an XML parser context
6446 *
6447 * parse PEReference declarations
6448 * The entity content is handled directly by pushing it's content as
6449 * a new input stream.
6450 *
6451 * [69] PEReference ::= '%' Name ';'
6452 *
6453 * [ WFC: No Recursion ]
6454 * A parsed entity must not contain a recursive
6455 * reference to itself, either directly or indirectly.
6456 *
6457 * [ WFC: Entity Declared ]
6458 * In a document without any DTD, a document with only an internal DTD
6459 * subset which contains no parameter entity references, or a document
6460 * with "standalone='yes'", ... ... The declaration of a parameter
6461 * entity must precede any reference to it...
6462 *
6463 * [ VC: Entity Declared ]
6464 * In a document with an external subset or external parameter entities
6465 * with "standalone='no'", ... ... The declaration of a parameter entity
6466 * must precede any reference to it...
6467 *
6468 * [ WFC: In DTD ]
6469 * Parameter-entity references may only appear in the DTD.
6470 * NOTE: misleading but this is handled.
6471 */
6472void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006473xmlParsePEReference(xmlParserCtxtPtr ctxt)
6474{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006475 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 xmlEntityPtr entity = NULL;
6477 xmlParserInputPtr input;
6478
6479 if (RAW == '%') {
6480 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006481 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006482 if (name == NULL) {
6483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6484 "xmlParsePEReference: no name\n");
6485 } else {
6486 if (RAW == ';') {
6487 NEXT;
6488 if ((ctxt->sax != NULL) &&
6489 (ctxt->sax->getParameterEntity != NULL))
6490 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6491 name);
6492 if (entity == NULL) {
6493 /*
6494 * [ WFC: Entity Declared ]
6495 * In a document without any DTD, a document with only an
6496 * internal DTD subset which contains no parameter entity
6497 * references, or a document with "standalone='yes'", ...
6498 * ... The declaration of a parameter entity must precede
6499 * any reference to it...
6500 */
6501 if ((ctxt->standalone == 1) ||
6502 ((ctxt->hasExternalSubset == 0) &&
6503 (ctxt->hasPErefs == 0))) {
6504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6505 "PEReference: %%%s; not found\n",
6506 name);
6507 } else {
6508 /*
6509 * [ VC: Entity Declared ]
6510 * In a document with an external subset or external
6511 * parameter entities with "standalone='no'", ...
6512 * ... The declaration of a parameter entity must
6513 * precede any reference to it...
6514 */
6515 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6516 "PEReference: %%%s; not found\n",
6517 name, NULL);
6518 ctxt->valid = 0;
6519 }
6520 } else {
6521 /*
6522 * Internal checking in case the entity quest barfed
6523 */
6524 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6525 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6526 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6527 "Internal: %%%s; is not a parameter entity\n",
6528 name, NULL);
6529 } else if (ctxt->input->free != deallocblankswrapper) {
6530 input =
6531 xmlNewBlanksWrapperInputStream(ctxt, entity);
6532 xmlPushInput(ctxt, input);
6533 } else {
6534 /*
6535 * TODO !!!
6536 * handle the extra spaces added before and after
6537 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6538 */
6539 input = xmlNewEntityInputStream(ctxt, entity);
6540 xmlPushInput(ctxt, input);
6541 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006542 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006543 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006544 xmlParseTextDecl(ctxt);
6545 if (ctxt->errNo ==
6546 XML_ERR_UNSUPPORTED_ENCODING) {
6547 /*
6548 * The XML REC instructs us to stop parsing
6549 * right here
6550 */
6551 ctxt->instate = XML_PARSER_EOF;
6552 return;
6553 }
6554 }
6555 }
6556 }
6557 ctxt->hasPErefs = 1;
6558 } else {
6559 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6560 }
6561 }
Owen Taylor3473f882001-02-23 17:55:21 +00006562 }
6563}
6564
6565/**
6566 * xmlParseStringPEReference:
6567 * @ctxt: an XML parser context
6568 * @str: a pointer to an index in the string
6569 *
6570 * parse PEReference declarations
6571 *
6572 * [69] PEReference ::= '%' Name ';'
6573 *
6574 * [ WFC: No Recursion ]
6575 * A parsed entity must not contain a recursive
6576 * reference to itself, either directly or indirectly.
6577 *
6578 * [ WFC: Entity Declared ]
6579 * In a document without any DTD, a document with only an internal DTD
6580 * subset which contains no parameter entity references, or a document
6581 * with "standalone='yes'", ... ... The declaration of a parameter
6582 * entity must precede any reference to it...
6583 *
6584 * [ VC: Entity Declared ]
6585 * In a document with an external subset or external parameter entities
6586 * with "standalone='no'", ... ... The declaration of a parameter entity
6587 * must precede any reference to it...
6588 *
6589 * [ WFC: In DTD ]
6590 * Parameter-entity references may only appear in the DTD.
6591 * NOTE: misleading but this is handled.
6592 *
6593 * Returns the string of the entity content.
6594 * str is updated to the current value of the index
6595 */
6596xmlEntityPtr
6597xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6598 const xmlChar *ptr;
6599 xmlChar cur;
6600 xmlChar *name;
6601 xmlEntityPtr entity = NULL;
6602
6603 if ((str == NULL) || (*str == NULL)) return(NULL);
6604 ptr = *str;
6605 cur = *ptr;
6606 if (cur == '%') {
6607 ptr++;
6608 cur = *ptr;
6609 name = xmlParseStringName(ctxt, &ptr);
6610 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006613 } else {
6614 cur = *ptr;
6615 if (cur == ';') {
6616 ptr++;
6617 cur = *ptr;
6618 if ((ctxt->sax != NULL) &&
6619 (ctxt->sax->getParameterEntity != NULL))
6620 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6621 name);
6622 if (entity == NULL) {
6623 /*
6624 * [ WFC: Entity Declared ]
6625 * In a document without any DTD, a document with only an
6626 * internal DTD subset which contains no parameter entity
6627 * references, or a document with "standalone='yes'", ...
6628 * ... The declaration of a parameter entity must precede
6629 * any reference to it...
6630 */
6631 if ((ctxt->standalone == 1) ||
6632 ((ctxt->hasExternalSubset == 0) &&
6633 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006634 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006635 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 } else {
6637 /*
6638 * [ VC: Entity Declared ]
6639 * In a document with an external subset or external
6640 * parameter entities with "standalone='no'", ...
6641 * ... The declaration of a parameter entity must
6642 * precede any reference to it...
6643 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006644 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6645 "PEReference: %%%s; not found\n",
6646 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 ctxt->valid = 0;
6648 }
6649 } else {
6650 /*
6651 * Internal checking in case the entity quest barfed
6652 */
6653 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6654 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006655 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6656 "%%%s; is not a parameter entity\n",
6657 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659 }
6660 ctxt->hasPErefs = 1;
6661 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006662 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664 xmlFree(name);
6665 }
6666 }
6667 *str = ptr;
6668 return(entity);
6669}
6670
6671/**
6672 * xmlParseDocTypeDecl:
6673 * @ctxt: an XML parser context
6674 *
6675 * parse a DOCTYPE declaration
6676 *
6677 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6678 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6679 *
6680 * [ VC: Root Element Type ]
6681 * The Name in the document type declaration must match the element
6682 * type of the root element.
6683 */
6684
6685void
6686xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006687 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006688 xmlChar *ExternalID = NULL;
6689 xmlChar *URI = NULL;
6690
6691 /*
6692 * We know that '<!DOCTYPE' has been detected.
6693 */
6694 SKIP(9);
6695
6696 SKIP_BLANKS;
6697
6698 /*
6699 * Parse the DOCTYPE name.
6700 */
6701 name = xmlParseName(ctxt);
6702 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006703 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6704 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006705 }
6706 ctxt->intSubName = name;
6707
6708 SKIP_BLANKS;
6709
6710 /*
6711 * Check for SystemID and ExternalID
6712 */
6713 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6714
6715 if ((URI != NULL) || (ExternalID != NULL)) {
6716 ctxt->hasExternalSubset = 1;
6717 }
6718 ctxt->extSubURI = URI;
6719 ctxt->extSubSystem = ExternalID;
6720
6721 SKIP_BLANKS;
6722
6723 /*
6724 * Create and update the internal subset.
6725 */
6726 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6727 (!ctxt->disableSAX))
6728 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6729
6730 /*
6731 * Is there any internal subset declarations ?
6732 * they are handled separately in xmlParseInternalSubset()
6733 */
6734 if (RAW == '[')
6735 return;
6736
6737 /*
6738 * We should be at the end of the DOCTYPE declaration.
6739 */
6740 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006741 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006742 }
6743 NEXT;
6744}
6745
6746/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006747 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006748 * @ctxt: an XML parser context
6749 *
6750 * parse the internal subset declaration
6751 *
6752 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6753 */
6754
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006755static void
Owen Taylor3473f882001-02-23 17:55:21 +00006756xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6757 /*
6758 * Is there any DTD definition ?
6759 */
6760 if (RAW == '[') {
6761 ctxt->instate = XML_PARSER_DTD;
6762 NEXT;
6763 /*
6764 * Parse the succession of Markup declarations and
6765 * PEReferences.
6766 * Subsequence (markupdecl | PEReference | S)*
6767 */
6768 while (RAW != ']') {
6769 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006770 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006771
6772 SKIP_BLANKS;
6773 xmlParseMarkupDecl(ctxt);
6774 xmlParsePEReference(ctxt);
6775
6776 /*
6777 * Pop-up of finished entities.
6778 */
6779 while ((RAW == 0) && (ctxt->inputNr > 1))
6780 xmlPopInput(ctxt);
6781
6782 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006783 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006784 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006785 break;
6786 }
6787 }
6788 if (RAW == ']') {
6789 NEXT;
6790 SKIP_BLANKS;
6791 }
6792 }
6793
6794 /*
6795 * We should be at the end of the DOCTYPE declaration.
6796 */
6797 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006798 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006799 }
6800 NEXT;
6801}
6802
Daniel Veillard81273902003-09-30 00:43:48 +00006803#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006804/**
6805 * xmlParseAttribute:
6806 * @ctxt: an XML parser context
6807 * @value: a xmlChar ** used to store the value of the attribute
6808 *
6809 * parse an attribute
6810 *
6811 * [41] Attribute ::= Name Eq AttValue
6812 *
6813 * [ WFC: No External Entity References ]
6814 * Attribute values cannot contain direct or indirect entity references
6815 * to external entities.
6816 *
6817 * [ WFC: No < in Attribute Values ]
6818 * The replacement text of any entity referred to directly or indirectly in
6819 * an attribute value (other than "&lt;") must not contain a <.
6820 *
6821 * [ VC: Attribute Value Type ]
6822 * The attribute must have been declared; the value must be of the type
6823 * declared for it.
6824 *
6825 * [25] Eq ::= S? '=' S?
6826 *
6827 * With namespace:
6828 *
6829 * [NS 11] Attribute ::= QName Eq AttValue
6830 *
6831 * Also the case QName == xmlns:??? is handled independently as a namespace
6832 * definition.
6833 *
6834 * Returns the attribute name, and the value in *value.
6835 */
6836
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006837const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006838xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006839 const xmlChar *name;
6840 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006841
6842 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006843 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006844 name = xmlParseName(ctxt);
6845 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006846 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006847 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006848 return(NULL);
6849 }
6850
6851 /*
6852 * read the value
6853 */
6854 SKIP_BLANKS;
6855 if (RAW == '=') {
6856 NEXT;
6857 SKIP_BLANKS;
6858 val = xmlParseAttValue(ctxt);
6859 ctxt->instate = XML_PARSER_CONTENT;
6860 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006861 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006862 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006863 return(NULL);
6864 }
6865
6866 /*
6867 * Check that xml:lang conforms to the specification
6868 * No more registered as an error, just generate a warning now
6869 * since this was deprecated in XML second edition
6870 */
6871 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6872 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006873 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6874 "Malformed value for xml:lang : %s\n",
6875 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006876 }
6877 }
6878
6879 /*
6880 * Check that xml:space conforms to the specification
6881 */
6882 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6883 if (xmlStrEqual(val, BAD_CAST "default"))
6884 *(ctxt->space) = 0;
6885 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6886 *(ctxt->space) = 1;
6887 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006888 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006889"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006890 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006891 }
6892 }
6893
6894 *value = val;
6895 return(name);
6896}
6897
6898/**
6899 * xmlParseStartTag:
6900 * @ctxt: an XML parser context
6901 *
6902 * parse a start of tag either for rule element or
6903 * EmptyElement. In both case we don't parse the tag closing chars.
6904 *
6905 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6906 *
6907 * [ WFC: Unique Att Spec ]
6908 * No attribute name may appear more than once in the same start-tag or
6909 * empty-element tag.
6910 *
6911 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6912 *
6913 * [ WFC: Unique Att Spec ]
6914 * No attribute name may appear more than once in the same start-tag or
6915 * empty-element tag.
6916 *
6917 * With namespace:
6918 *
6919 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6920 *
6921 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6922 *
6923 * Returns the element name parsed
6924 */
6925
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006926const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006927xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006928 const xmlChar *name;
6929 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006930 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006931 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006932 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006933 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006934 int i;
6935
6936 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006937 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006938
6939 name = xmlParseName(ctxt);
6940 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006941 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006942 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006943 return(NULL);
6944 }
6945
6946 /*
6947 * Now parse the attributes, it ends up with the ending
6948 *
6949 * (S Attribute)* S?
6950 */
6951 SKIP_BLANKS;
6952 GROW;
6953
Daniel Veillard21a0f912001-02-25 19:54:14 +00006954 while ((RAW != '>') &&
6955 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006956 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006957 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006958 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006959
6960 attname = xmlParseAttribute(ctxt, &attvalue);
6961 if ((attname != NULL) && (attvalue != NULL)) {
6962 /*
6963 * [ WFC: Unique Att Spec ]
6964 * No attribute name may appear more than once in the same
6965 * start-tag or empty-element tag.
6966 */
6967 for (i = 0; i < nbatts;i += 2) {
6968 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006969 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006970 xmlFree(attvalue);
6971 goto failed;
6972 }
6973 }
Owen Taylor3473f882001-02-23 17:55:21 +00006974 /*
6975 * Add the pair to atts
6976 */
6977 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006978 maxatts = 22; /* allow for 10 attrs by default */
6979 atts = (const xmlChar **)
6980 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006981 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006982 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006983 if (attvalue != NULL)
6984 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006985 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006986 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006987 ctxt->atts = atts;
6988 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006989 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006990 const xmlChar **n;
6991
Owen Taylor3473f882001-02-23 17:55:21 +00006992 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006993 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006994 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006995 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006996 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006997 if (attvalue != NULL)
6998 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006999 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007000 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007001 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007002 ctxt->atts = atts;
7003 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007004 }
7005 atts[nbatts++] = attname;
7006 atts[nbatts++] = attvalue;
7007 atts[nbatts] = NULL;
7008 atts[nbatts + 1] = NULL;
7009 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (attvalue != NULL)
7011 xmlFree(attvalue);
7012 }
7013
7014failed:
7015
Daniel Veillard3772de32002-12-17 10:31:45 +00007016 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007017 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7018 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007019 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7021 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007022 }
7023 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007024 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7025 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007026 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7027 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007028 break;
7029 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007030 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 GROW;
7032 }
7033
7034 /*
7035 * SAX: Start of Element !
7036 */
7037 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007038 (!ctxt->disableSAX)) {
7039 if (nbatts > 0)
7040 ctxt->sax->startElement(ctxt->userData, name, atts);
7041 else
7042 ctxt->sax->startElement(ctxt->userData, name, NULL);
7043 }
Owen Taylor3473f882001-02-23 17:55:21 +00007044
7045 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007046 /* Free only the content strings */
7047 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007048 if (atts[i] != NULL)
7049 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 }
7051 return(name);
7052}
7053
7054/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007055 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007056 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007057 * @line: line of the start tag
7058 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007059 *
7060 * parse an end of tag
7061 *
7062 * [42] ETag ::= '</' Name S? '>'
7063 *
7064 * With namespace
7065 *
7066 * [NS 9] ETag ::= '</' QName S? '>'
7067 */
7068
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007069static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007070xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007071 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007072
7073 GROW;
7074 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007075 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007076 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007077 return;
7078 }
7079 SKIP(2);
7080
Daniel Veillard46de64e2002-05-29 08:21:33 +00007081 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007082
7083 /*
7084 * We should definitely be at the ending "S? '>'" part
7085 */
7086 GROW;
7087 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007088 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007089 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007090 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007091 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007092
7093 /*
7094 * [ WFC: Element Type Match ]
7095 * The Name in an element's end-tag must match the element type in the
7096 * start-tag.
7097 *
7098 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007099 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007100 if (name == NULL) name = BAD_CAST "unparseable";
7101 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007102 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007103 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 }
7105
7106 /*
7107 * SAX: End of Tag
7108 */
7109 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7110 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007111 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007112
Daniel Veillarde57ec792003-09-10 10:50:59 +00007113 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007114 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007115 return;
7116}
7117
7118/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007119 * xmlParseEndTag:
7120 * @ctxt: an XML parser context
7121 *
7122 * parse an end of tag
7123 *
7124 * [42] ETag ::= '</' Name S? '>'
7125 *
7126 * With namespace
7127 *
7128 * [NS 9] ETag ::= '</' QName S? '>'
7129 */
7130
7131void
7132xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007133 xmlParseEndTag1(ctxt, 0);
7134}
Daniel Veillard81273902003-09-30 00:43:48 +00007135#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007136
7137/************************************************************************
7138 * *
7139 * SAX 2 specific operations *
7140 * *
7141 ************************************************************************/
7142
7143static const xmlChar *
7144xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7145 int len = 0, l;
7146 int c;
7147 int count = 0;
7148
7149 /*
7150 * Handler for more complex cases
7151 */
7152 GROW;
7153 c = CUR_CHAR(l);
7154 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007155 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007156 return(NULL);
7157 }
7158
7159 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007160 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007161 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007162 (IS_COMBINING(c)) ||
7163 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007164 if (count++ > 100) {
7165 count = 0;
7166 GROW;
7167 }
7168 len += l;
7169 NEXTL(l);
7170 c = CUR_CHAR(l);
7171 }
7172 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7173}
7174
7175/*
7176 * xmlGetNamespace:
7177 * @ctxt: an XML parser context
7178 * @prefix: the prefix to lookup
7179 *
7180 * Lookup the namespace name for the @prefix (which ca be NULL)
7181 * The prefix must come from the @ctxt->dict dictionnary
7182 *
7183 * Returns the namespace name or NULL if not bound
7184 */
7185static const xmlChar *
7186xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7187 int i;
7188
Daniel Veillarde57ec792003-09-10 10:50:59 +00007189 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007191 if (ctxt->nsTab[i] == prefix) {
7192 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7193 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007195 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007196 return(NULL);
7197}
7198
7199/**
7200 * xmlParseNCName:
7201 * @ctxt: an XML parser context
7202 *
7203 * parse an XML name.
7204 *
7205 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7206 * CombiningChar | Extender
7207 *
7208 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7209 *
7210 * Returns the Name parsed or NULL
7211 */
7212
7213static const xmlChar *
7214xmlParseNCName(xmlParserCtxtPtr ctxt) {
7215 const xmlChar *in;
7216 const xmlChar *ret;
7217 int count = 0;
7218
7219 /*
7220 * Accelerator for simple ASCII names
7221 */
7222 in = ctxt->input->cur;
7223 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7224 ((*in >= 0x41) && (*in <= 0x5A)) ||
7225 (*in == '_')) {
7226 in++;
7227 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7228 ((*in >= 0x41) && (*in <= 0x5A)) ||
7229 ((*in >= 0x30) && (*in <= 0x39)) ||
7230 (*in == '_') || (*in == '-') ||
7231 (*in == '.'))
7232 in++;
7233 if ((*in > 0) && (*in < 0x80)) {
7234 count = in - ctxt->input->cur;
7235 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7236 ctxt->input->cur = in;
7237 ctxt->nbChars += count;
7238 ctxt->input->col += count;
7239 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007240 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 }
7242 return(ret);
7243 }
7244 }
7245 return(xmlParseNCNameComplex(ctxt));
7246}
7247
7248/**
7249 * xmlParseQName:
7250 * @ctxt: an XML parser context
7251 * @prefix: pointer to store the prefix part
7252 *
7253 * parse an XML Namespace QName
7254 *
7255 * [6] QName ::= (Prefix ':')? LocalPart
7256 * [7] Prefix ::= NCName
7257 * [8] LocalPart ::= NCName
7258 *
7259 * Returns the Name parsed or NULL
7260 */
7261
7262static const xmlChar *
7263xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7264 const xmlChar *l, *p;
7265
7266 GROW;
7267
7268 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007269 if (l == NULL) {
7270 if (CUR == ':') {
7271 l = xmlParseName(ctxt);
7272 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007273 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7274 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007275 *prefix = NULL;
7276 return(l);
7277 }
7278 }
7279 return(NULL);
7280 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007281 if (CUR == ':') {
7282 NEXT;
7283 p = l;
7284 l = xmlParseNCName(ctxt);
7285 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007286 xmlChar *tmp;
7287
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007288 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7289 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007290 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7291 p = xmlDictLookup(ctxt->dict, tmp, -1);
7292 if (tmp != NULL) xmlFree(tmp);
7293 *prefix = NULL;
7294 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007295 }
7296 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007297 xmlChar *tmp;
7298
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007299 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7300 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007301 NEXT;
7302 tmp = (xmlChar *) xmlParseName(ctxt);
7303 if (tmp != NULL) {
7304 tmp = xmlBuildQName(tmp, l, NULL, 0);
7305 l = xmlDictLookup(ctxt->dict, tmp, -1);
7306 if (tmp != NULL) xmlFree(tmp);
7307 *prefix = p;
7308 return(l);
7309 }
7310 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7311 l = xmlDictLookup(ctxt->dict, tmp, -1);
7312 if (tmp != NULL) xmlFree(tmp);
7313 *prefix = p;
7314 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 }
7316 *prefix = p;
7317 } else
7318 *prefix = NULL;
7319 return(l);
7320}
7321
7322/**
7323 * xmlParseQNameAndCompare:
7324 * @ctxt: an XML parser context
7325 * @name: the localname
7326 * @prefix: the prefix, if any.
7327 *
7328 * parse an XML name and compares for match
7329 * (specialized for endtag parsing)
7330 *
7331 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7332 * and the name for mismatch
7333 */
7334
7335static const xmlChar *
7336xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7337 xmlChar const *prefix) {
7338 const xmlChar *cmp = name;
7339 const xmlChar *in;
7340 const xmlChar *ret;
7341 const xmlChar *prefix2;
7342
7343 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7344
7345 GROW;
7346 in = ctxt->input->cur;
7347
7348 cmp = prefix;
7349 while (*in != 0 && *in == *cmp) {
7350 ++in;
7351 ++cmp;
7352 }
7353 if ((*cmp == 0) && (*in == ':')) {
7354 in++;
7355 cmp = name;
7356 while (*in != 0 && *in == *cmp) {
7357 ++in;
7358 ++cmp;
7359 }
William M. Brack76e95df2003-10-18 16:20:14 +00007360 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 /* success */
7362 ctxt->input->cur = in;
7363 return((const xmlChar*) 1);
7364 }
7365 }
7366 /*
7367 * all strings coms from the dictionary, equality can be done directly
7368 */
7369 ret = xmlParseQName (ctxt, &prefix2);
7370 if ((ret == name) && (prefix == prefix2))
7371 return((const xmlChar*) 1);
7372 return ret;
7373}
7374
7375/**
7376 * xmlParseAttValueInternal:
7377 * @ctxt: an XML parser context
7378 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007379 * @alloc: whether the attribute was reallocated as a new string
7380 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 *
7382 * parse a value for an attribute.
7383 * NOTE: if no normalization is needed, the routine will return pointers
7384 * directly from the data buffer.
7385 *
7386 * 3.3.3 Attribute-Value Normalization:
7387 * Before the value of an attribute is passed to the application or
7388 * checked for validity, the XML processor must normalize it as follows:
7389 * - a character reference is processed by appending the referenced
7390 * character to the attribute value
7391 * - an entity reference is processed by recursively processing the
7392 * replacement text of the entity
7393 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7394 * appending #x20 to the normalized value, except that only a single
7395 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7396 * parsed entity or the literal entity value of an internal parsed entity
7397 * - other characters are processed by appending them to the normalized value
7398 * If the declared value is not CDATA, then the XML processor must further
7399 * process the normalized attribute value by discarding any leading and
7400 * trailing space (#x20) characters, and by replacing sequences of space
7401 * (#x20) characters by a single space (#x20) character.
7402 * All attributes for which no declaration has been read should be treated
7403 * by a non-validating parser as if declared CDATA.
7404 *
7405 * Returns the AttValue parsed or NULL. The value has to be freed by the
7406 * caller if it was copied, this can be detected by val[*len] == 0.
7407 */
7408
7409static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007410xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7411 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007412{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007413 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007414 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007415 xmlChar *ret = NULL;
7416
7417 GROW;
7418 in = (xmlChar *) CUR_PTR;
7419 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007420 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007421 return (NULL);
7422 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007424
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007425 /*
7426 * try to handle in this routine the most common case where no
7427 * allocation of a new string is required and where content is
7428 * pure ASCII.
7429 */
7430 limit = *in++;
7431 end = ctxt->input->end;
7432 start = in;
7433 if (in >= end) {
7434 const xmlChar *oldbase = ctxt->input->base;
7435 GROW;
7436 if (oldbase != ctxt->input->base) {
7437 long delta = ctxt->input->base - oldbase;
7438 start = start + delta;
7439 in = in + delta;
7440 }
7441 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007443 if (normalize) {
7444 /*
7445 * Skip any leading spaces
7446 */
7447 while ((in < end) && (*in != limit) &&
7448 ((*in == 0x20) || (*in == 0x9) ||
7449 (*in == 0xA) || (*in == 0xD))) {
7450 in++;
7451 start = in;
7452 if (in >= end) {
7453 const xmlChar *oldbase = ctxt->input->base;
7454 GROW;
7455 if (oldbase != ctxt->input->base) {
7456 long delta = ctxt->input->base - oldbase;
7457 start = start + delta;
7458 in = in + delta;
7459 }
7460 end = ctxt->input->end;
7461 }
7462 }
7463 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7464 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7465 if ((*in++ == 0x20) && (*in == 0x20)) break;
7466 if (in >= end) {
7467 const xmlChar *oldbase = ctxt->input->base;
7468 GROW;
7469 if (oldbase != ctxt->input->base) {
7470 long delta = ctxt->input->base - oldbase;
7471 start = start + delta;
7472 in = in + delta;
7473 }
7474 end = ctxt->input->end;
7475 }
7476 }
7477 last = in;
7478 /*
7479 * skip the trailing blanks
7480 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007481 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007482 while ((in < end) && (*in != limit) &&
7483 ((*in == 0x20) || (*in == 0x9) ||
7484 (*in == 0xA) || (*in == 0xD))) {
7485 in++;
7486 if (in >= end) {
7487 const xmlChar *oldbase = ctxt->input->base;
7488 GROW;
7489 if (oldbase != ctxt->input->base) {
7490 long delta = ctxt->input->base - oldbase;
7491 start = start + delta;
7492 in = in + delta;
7493 last = last + delta;
7494 }
7495 end = ctxt->input->end;
7496 }
7497 }
7498 if (*in != limit) goto need_complex;
7499 } else {
7500 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7501 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7502 in++;
7503 if (in >= end) {
7504 const xmlChar *oldbase = ctxt->input->base;
7505 GROW;
7506 if (oldbase != ctxt->input->base) {
7507 long delta = ctxt->input->base - oldbase;
7508 start = start + delta;
7509 in = in + delta;
7510 }
7511 end = ctxt->input->end;
7512 }
7513 }
7514 last = in;
7515 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007517 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007519 *len = last - start;
7520 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007522 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007523 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 }
7525 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007526 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007527 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007528need_complex:
7529 if (alloc) *alloc = 1;
7530 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531}
7532
7533/**
7534 * xmlParseAttribute2:
7535 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007536 * @pref: the element prefix
7537 * @elem: the element name
7538 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007539 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007540 * @len: an int * to save the length of the attribute
7541 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 *
7543 * parse an attribute in the new SAX2 framework.
7544 *
7545 * Returns the attribute name, and the value in *value, .
7546 */
7547
7548static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007549xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7550 const xmlChar *pref, const xmlChar *elem,
7551 const xmlChar **prefix, xmlChar **value,
7552 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007553 const xmlChar *name;
7554 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007555 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007556
7557 *value = NULL;
7558 GROW;
7559 name = xmlParseQName(ctxt, prefix);
7560 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007561 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7562 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007563 return(NULL);
7564 }
7565
7566 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007567 * get the type if needed
7568 */
7569 if (ctxt->attsSpecial != NULL) {
7570 int type;
7571
7572 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7573 pref, elem, *prefix, name);
7574 if (type != 0) normalize = 1;
7575 }
7576
7577 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007578 * read the value
7579 */
7580 SKIP_BLANKS;
7581 if (RAW == '=') {
7582 NEXT;
7583 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007584 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007585 ctxt->instate = XML_PARSER_CONTENT;
7586 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007587 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007589 return(NULL);
7590 }
7591
7592 /*
7593 * Check that xml:lang conforms to the specification
7594 * No more registered as an error, just generate a warning now
7595 * since this was deprecated in XML second edition
7596 */
7597 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7598 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007599 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7600 "Malformed value for xml:lang : %s\n",
7601 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007602 }
7603 }
7604
7605 /*
7606 * Check that xml:space conforms to the specification
7607 */
7608 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7609 if (xmlStrEqual(val, BAD_CAST "default"))
7610 *(ctxt->space) = 0;
7611 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7612 *(ctxt->space) = 1;
7613 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007614 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007615"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7616 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 }
7618 }
7619
7620 *value = val;
7621 return(name);
7622}
7623
7624/**
7625 * xmlParseStartTag2:
7626 * @ctxt: an XML parser context
7627 *
7628 * parse a start of tag either for rule element or
7629 * EmptyElement. In both case we don't parse the tag closing chars.
7630 * This routine is called when running SAX2 parsing
7631 *
7632 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7633 *
7634 * [ WFC: Unique Att Spec ]
7635 * No attribute name may appear more than once in the same start-tag or
7636 * empty-element tag.
7637 *
7638 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7639 *
7640 * [ WFC: Unique Att Spec ]
7641 * No attribute name may appear more than once in the same start-tag or
7642 * empty-element tag.
7643 *
7644 * With namespace:
7645 *
7646 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7647 *
7648 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7649 *
7650 * Returns the element name parsed
7651 */
7652
7653static const xmlChar *
7654xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7655 const xmlChar **URI) {
7656 const xmlChar *localname;
7657 const xmlChar *prefix;
7658 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007659 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007660 const xmlChar *nsname;
7661 xmlChar *attvalue;
7662 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007664 int nratts, nbatts, nbdef;
7665 int i, j, nbNs, attval;
7666 const xmlChar *base;
7667 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668
7669 if (RAW != '<') return(NULL);
7670 NEXT1;
7671
7672 /*
7673 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7674 * point since the attribute values may be stored as pointers to
7675 * the buffer and calling SHRINK would destroy them !
7676 * The Shrinking is only possible once the full set of attribute
7677 * callbacks have been done.
7678 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007679reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007680 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007681 base = ctxt->input->base;
7682 cur = ctxt->input->cur - ctxt->input->base;
7683 nbatts = 0;
7684 nratts = 0;
7685 nbdef = 0;
7686 nbNs = 0;
7687 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688
7689 localname = xmlParseQName(ctxt, &prefix);
7690 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007691 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7692 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 return(NULL);
7694 }
7695
7696 /*
7697 * Now parse the attributes, it ends up with the ending
7698 *
7699 * (S Attribute)* S?
7700 */
7701 SKIP_BLANKS;
7702 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007703 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704
7705 while ((RAW != '>') &&
7706 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007707 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 const xmlChar *q = CUR_PTR;
7709 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007710 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007711
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007712 attname = xmlParseAttribute2(ctxt, prefix, localname,
7713 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 if ((attname != NULL) && (attvalue != NULL)) {
7715 if (len < 0) len = xmlStrlen(attvalue);
7716 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007717 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7718 xmlURIPtr uri;
7719
7720 if (*URL != 0) {
7721 uri = xmlParseURI((const char *) URL);
7722 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007723 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7724 "xmlns: %s not a valid URI\n",
7725 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007726 } else {
7727 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007728 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7729 "xmlns: URI %s is not absolute\n",
7730 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007731 }
7732 xmlFreeURI(uri);
7733 }
7734 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007736 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007738 for (j = 1;j <= nbNs;j++)
7739 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7740 break;
7741 if (j <= nbNs)
7742 xmlErrAttributeDup(ctxt, NULL, attname);
7743 else
7744 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007745 if (alloc != 0) xmlFree(attvalue);
7746 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007747 continue;
7748 }
7749 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007750 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7751 xmlURIPtr uri;
7752
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007753 if (attname == ctxt->str_xml) {
7754 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007755 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7756 "xml namespace prefix mapped to wrong URI\n",
7757 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007758 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007759 /*
7760 * Do not keep a namespace definition node
7761 */
7762 if (alloc != 0) xmlFree(attvalue);
7763 SKIP_BLANKS;
7764 continue;
7765 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 uri = xmlParseURI((const char *) URL);
7767 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007768 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7769 "xmlns:%s: '%s' is not a valid URI\n",
7770 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007771 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007772 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007773 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7774 "xmlns:%s: URI %s is not absolute\n",
7775 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007776 }
7777 xmlFreeURI(uri);
7778 }
7779
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007781 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007783 for (j = 1;j <= nbNs;j++)
7784 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7785 break;
7786 if (j <= nbNs)
7787 xmlErrAttributeDup(ctxt, aprefix, attname);
7788 else
7789 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007790 if (alloc != 0) xmlFree(attvalue);
7791 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007792 continue;
7793 }
7794
7795 /*
7796 * Add the pair to atts
7797 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7799 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800 if (attvalue[len] == 0)
7801 xmlFree(attvalue);
7802 goto failed;
7803 }
7804 maxatts = ctxt->maxatts;
7805 atts = ctxt->atts;
7806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007807 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 atts[nbatts++] = attname;
7809 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007810 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007811 atts[nbatts++] = attvalue;
7812 attvalue += len;
7813 atts[nbatts++] = attvalue;
7814 /*
7815 * tag if some deallocation is needed
7816 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007817 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007818 } else {
7819 if ((attvalue != NULL) && (attvalue[len] == 0))
7820 xmlFree(attvalue);
7821 }
7822
7823failed:
7824
7825 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007826 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007827 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7828 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007829 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007830 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7831 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007832 }
7833 SKIP_BLANKS;
7834 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7835 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007836 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007837 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007838 break;
7839 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007840 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007841 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007842 }
7843
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007845 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007846 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007847 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007848 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7849 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007850 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007851 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007852 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007853 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007854 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007855 /*
7856 * [ WFC: Unique Att Spec ]
7857 * No attribute name may appear more than once in the same
7858 * start-tag or empty-element tag.
7859 * As extended by the Namespace in XML REC.
7860 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007861 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007862 if (atts[i] == atts[j]) {
7863 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007864 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007865 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007868 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007869 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007870 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007871 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872 }
7873 }
7874 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007875 }
7876
7877 /*
7878 * The attributes defaulting
7879 */
7880 if (ctxt->attsDefault != NULL) {
7881 xmlDefAttrsPtr defaults;
7882
7883 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7884 if (defaults != NULL) {
7885 for (i = 0;i < defaults->nbAttrs;i++) {
7886 attname = defaults->values[4 * i];
7887 aprefix = defaults->values[4 * i + 1];
7888
7889 /*
7890 * special work for namespaces defaulted defs
7891 */
7892 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7893 /*
7894 * check that it's not a defined namespace
7895 */
7896 for (j = 1;j <= nbNs;j++)
7897 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7898 break;
7899 if (j <= nbNs) continue;
7900
7901 nsname = xmlGetNamespace(ctxt, NULL);
7902 if (nsname != defaults->values[4 * i + 2]) {
7903 if (nsPush(ctxt, NULL,
7904 defaults->values[4 * i + 2]) > 0)
7905 nbNs++;
7906 }
7907 } else if (aprefix == ctxt->str_xmlns) {
7908 /*
7909 * check that it's not a defined namespace
7910 */
7911 for (j = 1;j <= nbNs;j++)
7912 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7913 break;
7914 if (j <= nbNs) continue;
7915
7916 nsname = xmlGetNamespace(ctxt, attname);
7917 if (nsname != defaults->values[2]) {
7918 if (nsPush(ctxt, attname,
7919 defaults->values[4 * i + 2]) > 0)
7920 nbNs++;
7921 }
7922 } else {
7923 /*
7924 * check that it's not a defined attribute
7925 */
7926 for (j = 0;j < nbatts;j+=5) {
7927 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7928 break;
7929 }
7930 if (j < nbatts) continue;
7931
7932 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7933 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007934 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007935 }
7936 maxatts = ctxt->maxatts;
7937 atts = ctxt->atts;
7938 }
7939 atts[nbatts++] = attname;
7940 atts[nbatts++] = aprefix;
7941 if (aprefix == NULL)
7942 atts[nbatts++] = NULL;
7943 else
7944 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7945 atts[nbatts++] = defaults->values[4 * i + 2];
7946 atts[nbatts++] = defaults->values[4 * i + 3];
7947 nbdef++;
7948 }
7949 }
7950 }
7951 }
7952
7953 nsname = xmlGetNamespace(ctxt, prefix);
7954 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007955 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7956 "Namespace prefix %s on %s is not defined\n",
7957 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007958 }
7959 *pref = prefix;
7960 *URI = nsname;
7961
7962 /*
7963 * SAX: Start of Element !
7964 */
7965 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7966 (!ctxt->disableSAX)) {
7967 if (nbNs > 0)
7968 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7969 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7970 nbatts / 5, nbdef, atts);
7971 else
7972 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7973 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7974 }
7975
7976 /*
7977 * Free up attribute allocated strings if needed
7978 */
7979 if (attval != 0) {
7980 for (i = 3,j = 0; j < nratts;i += 5,j++)
7981 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7982 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 }
7984
7985 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007986
7987base_changed:
7988 /*
7989 * the attribute strings are valid iif the base didn't changed
7990 */
7991 if (attval != 0) {
7992 for (i = 3,j = 0; j < nratts;i += 5,j++)
7993 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7994 xmlFree((xmlChar *) atts[i]);
7995 }
7996 ctxt->input->cur = ctxt->input->base + cur;
7997 if (ctxt->wellFormed == 1) {
7998 goto reparse;
7999 }
8000 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008001}
8002
8003/**
8004 * xmlParseEndTag2:
8005 * @ctxt: an XML parser context
8006 * @line: line of the start tag
8007 * @nsNr: number of namespaces on the start tag
8008 *
8009 * parse an end of tag
8010 *
8011 * [42] ETag ::= '</' Name S? '>'
8012 *
8013 * With namespace
8014 *
8015 * [NS 9] ETag ::= '</' QName S? '>'
8016 */
8017
8018static void
8019xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8020 const xmlChar *URI, int line, int nsNr) {
8021 const xmlChar *name;
8022
8023 GROW;
8024 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008025 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026 return;
8027 }
8028 SKIP(2);
8029
8030 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8031
8032 /*
8033 * We should definitely be at the ending "S? '>'" part
8034 */
8035 GROW;
8036 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008037 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008038 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008039 } else
8040 NEXT1;
8041
8042 /*
8043 * [ WFC: Element Type Match ]
8044 * The Name in an element's end-tag must match the element type in the
8045 * start-tag.
8046 *
8047 */
8048 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008049 if (name == NULL) name = BAD_CAST "unparseable";
8050 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008052 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008053 }
8054
8055 /*
8056 * SAX: End of Tag
8057 */
8058 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8059 (!ctxt->disableSAX))
8060 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8061
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 spacePop(ctxt);
8063 if (nsNr != 0)
8064 nsPop(ctxt, nsNr);
8065 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008066}
8067
8068/**
Owen Taylor3473f882001-02-23 17:55:21 +00008069 * xmlParseCDSect:
8070 * @ctxt: an XML parser context
8071 *
8072 * Parse escaped pure raw content.
8073 *
8074 * [18] CDSect ::= CDStart CData CDEnd
8075 *
8076 * [19] CDStart ::= '<![CDATA['
8077 *
8078 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8079 *
8080 * [21] CDEnd ::= ']]>'
8081 */
8082void
8083xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8084 xmlChar *buf = NULL;
8085 int len = 0;
8086 int size = XML_PARSER_BUFFER_SIZE;
8087 int r, rl;
8088 int s, sl;
8089 int cur, l;
8090 int count = 0;
8091
Daniel Veillard8f597c32003-10-06 08:19:27 +00008092 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008093 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008094 SKIP(9);
8095 } else
8096 return;
8097
8098 ctxt->instate = XML_PARSER_CDATA_SECTION;
8099 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008100 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008101 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008102 ctxt->instate = XML_PARSER_CONTENT;
8103 return;
8104 }
8105 NEXTL(rl);
8106 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008107 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008108 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008109 ctxt->instate = XML_PARSER_CONTENT;
8110 return;
8111 }
8112 NEXTL(sl);
8113 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008114 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008115 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008116 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008117 return;
8118 }
William M. Brack871611b2003-10-18 04:53:14 +00008119 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008120 ((r != ']') || (s != ']') || (cur != '>'))) {
8121 if (len + 5 >= size) {
8122 size *= 2;
8123 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008126 return;
8127 }
8128 }
8129 COPY_BUF(rl,buf,len,r);
8130 r = s;
8131 rl = sl;
8132 s = cur;
8133 sl = l;
8134 count++;
8135 if (count > 50) {
8136 GROW;
8137 count = 0;
8138 }
8139 NEXTL(l);
8140 cur = CUR_CHAR(l);
8141 }
8142 buf[len] = 0;
8143 ctxt->instate = XML_PARSER_CONTENT;
8144 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008145 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008146 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008147 xmlFree(buf);
8148 return;
8149 }
8150 NEXTL(l);
8151
8152 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008153 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008154 */
8155 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8156 if (ctxt->sax->cdataBlock != NULL)
8157 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008158 else if (ctxt->sax->characters != NULL)
8159 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008160 }
8161 xmlFree(buf);
8162}
8163
8164/**
8165 * xmlParseContent:
8166 * @ctxt: an XML parser context
8167 *
8168 * Parse a content:
8169 *
8170 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8171 */
8172
8173void
8174xmlParseContent(xmlParserCtxtPtr ctxt) {
8175 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008176 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008177 ((RAW != '<') || (NXT(1) != '/'))) {
8178 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008179 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008180 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008181
8182 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008183 * First case : a Processing Instruction.
8184 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008185 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008186 xmlParsePI(ctxt);
8187 }
8188
8189 /*
8190 * Second case : a CDSection
8191 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008192 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008193 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008194 xmlParseCDSect(ctxt);
8195 }
8196
8197 /*
8198 * Third case : a comment
8199 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008200 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008201 (NXT(2) == '-') && (NXT(3) == '-')) {
8202 xmlParseComment(ctxt);
8203 ctxt->instate = XML_PARSER_CONTENT;
8204 }
8205
8206 /*
8207 * Fourth case : a sub-element.
8208 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008209 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008210 xmlParseElement(ctxt);
8211 }
8212
8213 /*
8214 * Fifth case : a reference. If if has not been resolved,
8215 * parsing returns it's Name, create the node
8216 */
8217
Daniel Veillard21a0f912001-02-25 19:54:14 +00008218 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008219 xmlParseReference(ctxt);
8220 }
8221
8222 /*
8223 * Last case, text. Note that References are handled directly.
8224 */
8225 else {
8226 xmlParseCharData(ctxt, 0);
8227 }
8228
8229 GROW;
8230 /*
8231 * Pop-up of finished entities.
8232 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008233 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008234 xmlPopInput(ctxt);
8235 SHRINK;
8236
Daniel Veillardfdc91562002-07-01 21:52:03 +00008237 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008238 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8239 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008240 ctxt->instate = XML_PARSER_EOF;
8241 break;
8242 }
8243 }
8244}
8245
8246/**
8247 * xmlParseElement:
8248 * @ctxt: an XML parser context
8249 *
8250 * parse an XML element, this is highly recursive
8251 *
8252 * [39] element ::= EmptyElemTag | STag content ETag
8253 *
8254 * [ WFC: Element Type Match ]
8255 * The Name in an element's end-tag must match the element type in the
8256 * start-tag.
8257 *
Owen Taylor3473f882001-02-23 17:55:21 +00008258 */
8259
8260void
8261xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008262 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008263 const xmlChar *prefix;
8264 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008265 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008266 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008267 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008269
8270 /* Capture start position */
8271 if (ctxt->record_info) {
8272 node_info.begin_pos = ctxt->input->consumed +
8273 (CUR_PTR - ctxt->input->base);
8274 node_info.begin_line = ctxt->input->line;
8275 }
8276
8277 if (ctxt->spaceNr == 0)
8278 spacePush(ctxt, -1);
8279 else
8280 spacePush(ctxt, *ctxt->space);
8281
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008282 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008283#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008284 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008285#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008286 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008287#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 else
8289 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008290#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008291 if (name == NULL) {
8292 spacePop(ctxt);
8293 return;
8294 }
8295 namePush(ctxt, name);
8296 ret = ctxt->node;
8297
Daniel Veillard4432df22003-09-28 18:58:27 +00008298#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008299 /*
8300 * [ VC: Root Element Type ]
8301 * The Name in the document type declaration must match the element
8302 * type of the root element.
8303 */
8304 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8305 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8306 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008307#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008308
8309 /*
8310 * Check for an Empty Element.
8311 */
8312 if ((RAW == '/') && (NXT(1) == '>')) {
8313 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 if (ctxt->sax2) {
8315 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8316 (!ctxt->disableSAX))
8317 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008318#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008319 } else {
8320 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8321 (!ctxt->disableSAX))
8322 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008323#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008324 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008325 namePop(ctxt);
8326 spacePop(ctxt);
8327 if (nsNr != ctxt->nsNr)
8328 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 if ( ret != NULL && ctxt->record_info ) {
8330 node_info.end_pos = ctxt->input->consumed +
8331 (CUR_PTR - ctxt->input->base);
8332 node_info.end_line = ctxt->input->line;
8333 node_info.node = ret;
8334 xmlParserAddNodeInfo(ctxt, &node_info);
8335 }
8336 return;
8337 }
8338 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008339 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008340 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008341 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8342 "Couldn't find end of Start Tag %s line %d\n",
8343 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008344
8345 /*
8346 * end of parsing of this node.
8347 */
8348 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008349 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008350 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008351 if (nsNr != ctxt->nsNr)
8352 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008353
8354 /*
8355 * Capture end position and add node
8356 */
8357 if ( ret != NULL && ctxt->record_info ) {
8358 node_info.end_pos = ctxt->input->consumed +
8359 (CUR_PTR - ctxt->input->base);
8360 node_info.end_line = ctxt->input->line;
8361 node_info.node = ret;
8362 xmlParserAddNodeInfo(ctxt, &node_info);
8363 }
8364 return;
8365 }
8366
8367 /*
8368 * Parse the content of the element:
8369 */
8370 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008371 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008372 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008373 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008374 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008375
8376 /*
8377 * end of parsing of this node.
8378 */
8379 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008380 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008381 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008382 if (nsNr != ctxt->nsNr)
8383 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 return;
8385 }
8386
8387 /*
8388 * parse the end of tag: '</' should be here.
8389 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008390 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008392 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008393 }
8394#ifdef LIBXML_SAX1_ENABLED
8395 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008396 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008397#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008398
8399 /*
8400 * Capture end position and add node
8401 */
8402 if ( ret != NULL && ctxt->record_info ) {
8403 node_info.end_pos = ctxt->input->consumed +
8404 (CUR_PTR - ctxt->input->base);
8405 node_info.end_line = ctxt->input->line;
8406 node_info.node = ret;
8407 xmlParserAddNodeInfo(ctxt, &node_info);
8408 }
8409}
8410
8411/**
8412 * xmlParseVersionNum:
8413 * @ctxt: an XML parser context
8414 *
8415 * parse the XML version value.
8416 *
8417 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8418 *
8419 * Returns the string giving the XML version number, or NULL
8420 */
8421xmlChar *
8422xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8423 xmlChar *buf = NULL;
8424 int len = 0;
8425 int size = 10;
8426 xmlChar cur;
8427
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008428 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008429 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008430 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008431 return(NULL);
8432 }
8433 cur = CUR;
8434 while (((cur >= 'a') && (cur <= 'z')) ||
8435 ((cur >= 'A') && (cur <= 'Z')) ||
8436 ((cur >= '0') && (cur <= '9')) ||
8437 (cur == '_') || (cur == '.') ||
8438 (cur == ':') || (cur == '-')) {
8439 if (len + 1 >= size) {
8440 size *= 2;
8441 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8442 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 return(NULL);
8445 }
8446 }
8447 buf[len++] = cur;
8448 NEXT;
8449 cur=CUR;
8450 }
8451 buf[len] = 0;
8452 return(buf);
8453}
8454
8455/**
8456 * xmlParseVersionInfo:
8457 * @ctxt: an XML parser context
8458 *
8459 * parse the XML version.
8460 *
8461 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8462 *
8463 * [25] Eq ::= S? '=' S?
8464 *
8465 * Returns the version string, e.g. "1.0"
8466 */
8467
8468xmlChar *
8469xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8470 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008471
Daniel Veillarda07050d2003-10-19 14:46:32 +00008472 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008473 SKIP(7);
8474 SKIP_BLANKS;
8475 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008476 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008477 return(NULL);
8478 }
8479 NEXT;
8480 SKIP_BLANKS;
8481 if (RAW == '"') {
8482 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008483 version = xmlParseVersionNum(ctxt);
8484 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008485 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008486 } else
8487 NEXT;
8488 } else if (RAW == '\''){
8489 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008490 version = xmlParseVersionNum(ctxt);
8491 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008492 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008493 } else
8494 NEXT;
8495 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008496 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 }
8498 }
8499 return(version);
8500}
8501
8502/**
8503 * xmlParseEncName:
8504 * @ctxt: an XML parser context
8505 *
8506 * parse the XML encoding name
8507 *
8508 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8509 *
8510 * Returns the encoding name value or NULL
8511 */
8512xmlChar *
8513xmlParseEncName(xmlParserCtxtPtr ctxt) {
8514 xmlChar *buf = NULL;
8515 int len = 0;
8516 int size = 10;
8517 xmlChar cur;
8518
8519 cur = CUR;
8520 if (((cur >= 'a') && (cur <= 'z')) ||
8521 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008522 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008523 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008524 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008525 return(NULL);
8526 }
8527
8528 buf[len++] = cur;
8529 NEXT;
8530 cur = CUR;
8531 while (((cur >= 'a') && (cur <= 'z')) ||
8532 ((cur >= 'A') && (cur <= 'Z')) ||
8533 ((cur >= '0') && (cur <= '9')) ||
8534 (cur == '.') || (cur == '_') ||
8535 (cur == '-')) {
8536 if (len + 1 >= size) {
8537 size *= 2;
8538 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8539 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008540 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008541 return(NULL);
8542 }
8543 }
8544 buf[len++] = cur;
8545 NEXT;
8546 cur = CUR;
8547 if (cur == 0) {
8548 SHRINK;
8549 GROW;
8550 cur = CUR;
8551 }
8552 }
8553 buf[len] = 0;
8554 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008555 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008556 }
8557 return(buf);
8558}
8559
8560/**
8561 * xmlParseEncodingDecl:
8562 * @ctxt: an XML parser context
8563 *
8564 * parse the XML encoding declaration
8565 *
8566 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8567 *
8568 * this setups the conversion filters.
8569 *
8570 * Returns the encoding value or NULL
8571 */
8572
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008573const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008574xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8575 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008576
8577 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008578 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008579 SKIP(8);
8580 SKIP_BLANKS;
8581 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008582 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 return(NULL);
8584 }
8585 NEXT;
8586 SKIP_BLANKS;
8587 if (RAW == '"') {
8588 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008589 encoding = xmlParseEncName(ctxt);
8590 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008591 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 } else
8593 NEXT;
8594 } else if (RAW == '\''){
8595 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008596 encoding = xmlParseEncName(ctxt);
8597 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 } else
8600 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008601 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008602 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008604 /*
8605 * UTF-16 encoding stwich has already taken place at this stage,
8606 * more over the little-endian/big-endian selection is already done
8607 */
8608 if ((encoding != NULL) &&
8609 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8610 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008611 if (ctxt->encoding != NULL)
8612 xmlFree((xmlChar *) ctxt->encoding);
8613 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008614 }
8615 /*
8616 * UTF-8 encoding is handled natively
8617 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008618 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008619 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8620 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008621 if (ctxt->encoding != NULL)
8622 xmlFree((xmlChar *) ctxt->encoding);
8623 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008624 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008625 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008626 xmlCharEncodingHandlerPtr handler;
8627
8628 if (ctxt->input->encoding != NULL)
8629 xmlFree((xmlChar *) ctxt->input->encoding);
8630 ctxt->input->encoding = encoding;
8631
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008632 handler = xmlFindCharEncodingHandler((const char *) encoding);
8633 if (handler != NULL) {
8634 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008635 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008636 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008637 "Unsupported encoding %s\n", encoding);
8638 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 }
8640 }
8641 }
8642 return(encoding);
8643}
8644
8645/**
8646 * xmlParseSDDecl:
8647 * @ctxt: an XML parser context
8648 *
8649 * parse the XML standalone declaration
8650 *
8651 * [32] SDDecl ::= S 'standalone' Eq
8652 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8653 *
8654 * [ VC: Standalone Document Declaration ]
8655 * TODO The standalone document declaration must have the value "no"
8656 * if any external markup declarations contain declarations of:
8657 * - attributes with default values, if elements to which these
8658 * attributes apply appear in the document without specifications
8659 * of values for these attributes, or
8660 * - entities (other than amp, lt, gt, apos, quot), if references
8661 * to those entities appear in the document, or
8662 * - attributes with values subject to normalization, where the
8663 * attribute appears in the document with a value which will change
8664 * as a result of normalization, or
8665 * - element types with element content, if white space occurs directly
8666 * within any instance of those types.
8667 *
8668 * Returns 1 if standalone, 0 otherwise
8669 */
8670
8671int
8672xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8673 int standalone = -1;
8674
8675 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008676 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008677 SKIP(10);
8678 SKIP_BLANKS;
8679 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008680 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008681 return(standalone);
8682 }
8683 NEXT;
8684 SKIP_BLANKS;
8685 if (RAW == '\''){
8686 NEXT;
8687 if ((RAW == 'n') && (NXT(1) == 'o')) {
8688 standalone = 0;
8689 SKIP(2);
8690 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8691 (NXT(2) == 's')) {
8692 standalone = 1;
8693 SKIP(3);
8694 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 }
8697 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 } else
8700 NEXT;
8701 } else if (RAW == '"'){
8702 NEXT;
8703 if ((RAW == 'n') && (NXT(1) == 'o')) {
8704 standalone = 0;
8705 SKIP(2);
8706 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8707 (NXT(2) == 's')) {
8708 standalone = 1;
8709 SKIP(3);
8710 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008711 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008712 }
8713 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008714 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008715 } else
8716 NEXT;
8717 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008718 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008719 }
8720 }
8721 return(standalone);
8722}
8723
8724/**
8725 * xmlParseXMLDecl:
8726 * @ctxt: an XML parser context
8727 *
8728 * parse an XML declaration header
8729 *
8730 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8731 */
8732
8733void
8734xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8735 xmlChar *version;
8736
8737 /*
8738 * We know that '<?xml' is here.
8739 */
8740 SKIP(5);
8741
William M. Brack76e95df2003-10-18 16:20:14 +00008742 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8744 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008745 }
8746 SKIP_BLANKS;
8747
8748 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008749 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008750 */
8751 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008752 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008753 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008754 } else {
8755 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8756 /*
8757 * TODO: Blueberry should be detected here
8758 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008759 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8760 "Unsupported version '%s'\n",
8761 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008762 }
8763 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008764 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008765 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008766 }
Owen Taylor3473f882001-02-23 17:55:21 +00008767
8768 /*
8769 * We may have the encoding declaration
8770 */
William M. Brack76e95df2003-10-18 16:20:14 +00008771 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008772 if ((RAW == '?') && (NXT(1) == '>')) {
8773 SKIP(2);
8774 return;
8775 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008777 }
8778 xmlParseEncodingDecl(ctxt);
8779 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8780 /*
8781 * The XML REC instructs us to stop parsing right here
8782 */
8783 return;
8784 }
8785
8786 /*
8787 * We may have the standalone status.
8788 */
William M. Brack76e95df2003-10-18 16:20:14 +00008789 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008790 if ((RAW == '?') && (NXT(1) == '>')) {
8791 SKIP(2);
8792 return;
8793 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008794 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008795 }
8796 SKIP_BLANKS;
8797 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8798
8799 SKIP_BLANKS;
8800 if ((RAW == '?') && (NXT(1) == '>')) {
8801 SKIP(2);
8802 } else if (RAW == '>') {
8803 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008804 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008805 NEXT;
8806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008807 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008808 MOVETO_ENDTAG(CUR_PTR);
8809 NEXT;
8810 }
8811}
8812
8813/**
8814 * xmlParseMisc:
8815 * @ctxt: an XML parser context
8816 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008817 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008818 *
8819 * [27] Misc ::= Comment | PI | S
8820 */
8821
8822void
8823xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008824 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008825 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008826 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008827 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008828 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008829 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008830 NEXT;
8831 } else
8832 xmlParseComment(ctxt);
8833 }
8834}
8835
8836/**
8837 * xmlParseDocument:
8838 * @ctxt: an XML parser context
8839 *
8840 * parse an XML document (and build a tree if using the standard SAX
8841 * interface).
8842 *
8843 * [1] document ::= prolog element Misc*
8844 *
8845 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8846 *
8847 * Returns 0, -1 in case of error. the parser context is augmented
8848 * as a result of the parsing.
8849 */
8850
8851int
8852xmlParseDocument(xmlParserCtxtPtr ctxt) {
8853 xmlChar start[4];
8854 xmlCharEncoding enc;
8855
8856 xmlInitParser();
8857
8858 GROW;
8859
8860 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861 * SAX: detecting the level.
8862 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864
8865 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008866 * SAX: beginning of the document processing.
8867 */
8868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8869 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8870
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008871 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8872 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008873 /*
8874 * Get the 4 first bytes and decode the charset
8875 * if enc != XML_CHAR_ENCODING_NONE
8876 * plug some encoding conversion routines.
8877 */
8878 start[0] = RAW;
8879 start[1] = NXT(1);
8880 start[2] = NXT(2);
8881 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008882 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008883 if (enc != XML_CHAR_ENCODING_NONE) {
8884 xmlSwitchEncoding(ctxt, enc);
8885 }
Owen Taylor3473f882001-02-23 17:55:21 +00008886 }
8887
8888
8889 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008890 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008891 }
8892
8893 /*
8894 * Check for the XMLDecl in the Prolog.
8895 */
8896 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008897 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008898
8899 /*
8900 * Note that we will switch encoding on the fly.
8901 */
8902 xmlParseXMLDecl(ctxt);
8903 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8904 /*
8905 * The XML REC instructs us to stop parsing right here
8906 */
8907 return(-1);
8908 }
8909 ctxt->standalone = ctxt->input->standalone;
8910 SKIP_BLANKS;
8911 } else {
8912 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8913 }
8914 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8915 ctxt->sax->startDocument(ctxt->userData);
8916
8917 /*
8918 * The Misc part of the Prolog
8919 */
8920 GROW;
8921 xmlParseMisc(ctxt);
8922
8923 /*
8924 * Then possibly doc type declaration(s) and more Misc
8925 * (doctypedecl Misc*)?
8926 */
8927 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008928 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008929
8930 ctxt->inSubset = 1;
8931 xmlParseDocTypeDecl(ctxt);
8932 if (RAW == '[') {
8933 ctxt->instate = XML_PARSER_DTD;
8934 xmlParseInternalSubset(ctxt);
8935 }
8936
8937 /*
8938 * Create and update the external subset.
8939 */
8940 ctxt->inSubset = 2;
8941 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8942 (!ctxt->disableSAX))
8943 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8944 ctxt->extSubSystem, ctxt->extSubURI);
8945 ctxt->inSubset = 0;
8946
8947
8948 ctxt->instate = XML_PARSER_PROLOG;
8949 xmlParseMisc(ctxt);
8950 }
8951
8952 /*
8953 * Time to start parsing the tree itself
8954 */
8955 GROW;
8956 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008957 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8958 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008959 } else {
8960 ctxt->instate = XML_PARSER_CONTENT;
8961 xmlParseElement(ctxt);
8962 ctxt->instate = XML_PARSER_EPILOG;
8963
8964
8965 /*
8966 * The Misc part at the end
8967 */
8968 xmlParseMisc(ctxt);
8969
Daniel Veillard561b7f82002-03-20 21:55:57 +00008970 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008971 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008972 }
8973 ctxt->instate = XML_PARSER_EOF;
8974 }
8975
8976 /*
8977 * SAX: end of the document processing.
8978 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008979 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008980 ctxt->sax->endDocument(ctxt->userData);
8981
Daniel Veillard5997aca2002-03-18 18:36:20 +00008982 /*
8983 * Remove locally kept entity definitions if the tree was not built
8984 */
8985 if ((ctxt->myDoc != NULL) &&
8986 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8987 xmlFreeDoc(ctxt->myDoc);
8988 ctxt->myDoc = NULL;
8989 }
8990
Daniel Veillardc7612992002-02-17 22:47:37 +00008991 if (! ctxt->wellFormed) {
8992 ctxt->valid = 0;
8993 return(-1);
8994 }
Owen Taylor3473f882001-02-23 17:55:21 +00008995 return(0);
8996}
8997
8998/**
8999 * xmlParseExtParsedEnt:
9000 * @ctxt: an XML parser context
9001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009002 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009003 * An external general parsed entity is well-formed if it matches the
9004 * production labeled extParsedEnt.
9005 *
9006 * [78] extParsedEnt ::= TextDecl? content
9007 *
9008 * Returns 0, -1 in case of error. the parser context is augmented
9009 * as a result of the parsing.
9010 */
9011
9012int
9013xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9014 xmlChar start[4];
9015 xmlCharEncoding enc;
9016
9017 xmlDefaultSAXHandlerInit();
9018
Daniel Veillard309f81d2003-09-23 09:02:53 +00009019 xmlDetectSAX2(ctxt);
9020
Owen Taylor3473f882001-02-23 17:55:21 +00009021 GROW;
9022
9023 /*
9024 * SAX: beginning of the document processing.
9025 */
9026 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9027 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9028
9029 /*
9030 * Get the 4 first bytes and decode the charset
9031 * if enc != XML_CHAR_ENCODING_NONE
9032 * plug some encoding conversion routines.
9033 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009034 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9035 start[0] = RAW;
9036 start[1] = NXT(1);
9037 start[2] = NXT(2);
9038 start[3] = NXT(3);
9039 enc = xmlDetectCharEncoding(start, 4);
9040 if (enc != XML_CHAR_ENCODING_NONE) {
9041 xmlSwitchEncoding(ctxt, enc);
9042 }
Owen Taylor3473f882001-02-23 17:55:21 +00009043 }
9044
9045
9046 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009047 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009048 }
9049
9050 /*
9051 * Check for the XMLDecl in the Prolog.
9052 */
9053 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009054 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009055
9056 /*
9057 * Note that we will switch encoding on the fly.
9058 */
9059 xmlParseXMLDecl(ctxt);
9060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9061 /*
9062 * The XML REC instructs us to stop parsing right here
9063 */
9064 return(-1);
9065 }
9066 SKIP_BLANKS;
9067 } else {
9068 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9069 }
9070 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9071 ctxt->sax->startDocument(ctxt->userData);
9072
9073 /*
9074 * Doing validity checking on chunk doesn't make sense
9075 */
9076 ctxt->instate = XML_PARSER_CONTENT;
9077 ctxt->validate = 0;
9078 ctxt->loadsubset = 0;
9079 ctxt->depth = 0;
9080
9081 xmlParseContent(ctxt);
9082
9083 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009084 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009085 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009086 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009087 }
9088
9089 /*
9090 * SAX: end of the document processing.
9091 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009092 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009093 ctxt->sax->endDocument(ctxt->userData);
9094
9095 if (! ctxt->wellFormed) return(-1);
9096 return(0);
9097}
9098
Daniel Veillard73b013f2003-09-30 12:36:01 +00009099#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009100/************************************************************************
9101 * *
9102 * Progressive parsing interfaces *
9103 * *
9104 ************************************************************************/
9105
9106/**
9107 * xmlParseLookupSequence:
9108 * @ctxt: an XML parser context
9109 * @first: the first char to lookup
9110 * @next: the next char to lookup or zero
9111 * @third: the next char to lookup or zero
9112 *
9113 * Try to find if a sequence (first, next, third) or just (first next) or
9114 * (first) is available in the input stream.
9115 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9116 * to avoid rescanning sequences of bytes, it DOES change the state of the
9117 * parser, do not use liberally.
9118 *
9119 * Returns the index to the current parsing point if the full sequence
9120 * is available, -1 otherwise.
9121 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009122static int
Owen Taylor3473f882001-02-23 17:55:21 +00009123xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9124 xmlChar next, xmlChar third) {
9125 int base, len;
9126 xmlParserInputPtr in;
9127 const xmlChar *buf;
9128
9129 in = ctxt->input;
9130 if (in == NULL) return(-1);
9131 base = in->cur - in->base;
9132 if (base < 0) return(-1);
9133 if (ctxt->checkIndex > base)
9134 base = ctxt->checkIndex;
9135 if (in->buf == NULL) {
9136 buf = in->base;
9137 len = in->length;
9138 } else {
9139 buf = in->buf->buffer->content;
9140 len = in->buf->buffer->use;
9141 }
9142 /* take into account the sequence length */
9143 if (third) len -= 2;
9144 else if (next) len --;
9145 for (;base < len;base++) {
9146 if (buf[base] == first) {
9147 if (third != 0) {
9148 if ((buf[base + 1] != next) ||
9149 (buf[base + 2] != third)) continue;
9150 } else if (next != 0) {
9151 if (buf[base + 1] != next) continue;
9152 }
9153 ctxt->checkIndex = 0;
9154#ifdef DEBUG_PUSH
9155 if (next == 0)
9156 xmlGenericError(xmlGenericErrorContext,
9157 "PP: lookup '%c' found at %d\n",
9158 first, base);
9159 else if (third == 0)
9160 xmlGenericError(xmlGenericErrorContext,
9161 "PP: lookup '%c%c' found at %d\n",
9162 first, next, base);
9163 else
9164 xmlGenericError(xmlGenericErrorContext,
9165 "PP: lookup '%c%c%c' found at %d\n",
9166 first, next, third, base);
9167#endif
9168 return(base - (in->cur - in->base));
9169 }
9170 }
9171 ctxt->checkIndex = base;
9172#ifdef DEBUG_PUSH
9173 if (next == 0)
9174 xmlGenericError(xmlGenericErrorContext,
9175 "PP: lookup '%c' failed\n", first);
9176 else if (third == 0)
9177 xmlGenericError(xmlGenericErrorContext,
9178 "PP: lookup '%c%c' failed\n", first, next);
9179 else
9180 xmlGenericError(xmlGenericErrorContext,
9181 "PP: lookup '%c%c%c' failed\n", first, next, third);
9182#endif
9183 return(-1);
9184}
9185
9186/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009187 * xmlParseGetLasts:
9188 * @ctxt: an XML parser context
9189 * @lastlt: pointer to store the last '<' from the input
9190 * @lastgt: pointer to store the last '>' from the input
9191 *
9192 * Lookup the last < and > in the current chunk
9193 */
9194static void
9195xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9196 const xmlChar **lastgt) {
9197 const xmlChar *tmp;
9198
9199 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9200 xmlGenericError(xmlGenericErrorContext,
9201 "Internal error: xmlParseGetLasts\n");
9202 return;
9203 }
9204 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9205 tmp = ctxt->input->end;
9206 tmp--;
9207 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9208 (*tmp != '>')) tmp--;
9209 if (tmp < ctxt->input->base) {
9210 *lastlt = NULL;
9211 *lastgt = NULL;
9212 } else if (*tmp == '<') {
9213 *lastlt = tmp;
9214 tmp--;
9215 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9216 if (tmp < ctxt->input->base)
9217 *lastgt = NULL;
9218 else
9219 *lastgt = tmp;
9220 } else {
9221 *lastgt = tmp;
9222 tmp--;
9223 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9224 if (tmp < ctxt->input->base)
9225 *lastlt = NULL;
9226 else
9227 *lastlt = tmp;
9228 }
9229
9230 } else {
9231 *lastlt = NULL;
9232 *lastgt = NULL;
9233 }
9234}
9235/**
Owen Taylor3473f882001-02-23 17:55:21 +00009236 * xmlParseTryOrFinish:
9237 * @ctxt: an XML parser context
9238 * @terminate: last chunk indicator
9239 *
9240 * Try to progress on parsing
9241 *
9242 * Returns zero if no parsing was possible
9243 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009244static int
Owen Taylor3473f882001-02-23 17:55:21 +00009245xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9246 int ret = 0;
9247 int avail;
9248 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009249 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009250
9251#ifdef DEBUG_PUSH
9252 switch (ctxt->instate) {
9253 case XML_PARSER_EOF:
9254 xmlGenericError(xmlGenericErrorContext,
9255 "PP: try EOF\n"); break;
9256 case XML_PARSER_START:
9257 xmlGenericError(xmlGenericErrorContext,
9258 "PP: try START\n"); break;
9259 case XML_PARSER_MISC:
9260 xmlGenericError(xmlGenericErrorContext,
9261 "PP: try MISC\n");break;
9262 case XML_PARSER_COMMENT:
9263 xmlGenericError(xmlGenericErrorContext,
9264 "PP: try COMMENT\n");break;
9265 case XML_PARSER_PROLOG:
9266 xmlGenericError(xmlGenericErrorContext,
9267 "PP: try PROLOG\n");break;
9268 case XML_PARSER_START_TAG:
9269 xmlGenericError(xmlGenericErrorContext,
9270 "PP: try START_TAG\n");break;
9271 case XML_PARSER_CONTENT:
9272 xmlGenericError(xmlGenericErrorContext,
9273 "PP: try CONTENT\n");break;
9274 case XML_PARSER_CDATA_SECTION:
9275 xmlGenericError(xmlGenericErrorContext,
9276 "PP: try CDATA_SECTION\n");break;
9277 case XML_PARSER_END_TAG:
9278 xmlGenericError(xmlGenericErrorContext,
9279 "PP: try END_TAG\n");break;
9280 case XML_PARSER_ENTITY_DECL:
9281 xmlGenericError(xmlGenericErrorContext,
9282 "PP: try ENTITY_DECL\n");break;
9283 case XML_PARSER_ENTITY_VALUE:
9284 xmlGenericError(xmlGenericErrorContext,
9285 "PP: try ENTITY_VALUE\n");break;
9286 case XML_PARSER_ATTRIBUTE_VALUE:
9287 xmlGenericError(xmlGenericErrorContext,
9288 "PP: try ATTRIBUTE_VALUE\n");break;
9289 case XML_PARSER_DTD:
9290 xmlGenericError(xmlGenericErrorContext,
9291 "PP: try DTD\n");break;
9292 case XML_PARSER_EPILOG:
9293 xmlGenericError(xmlGenericErrorContext,
9294 "PP: try EPILOG\n");break;
9295 case XML_PARSER_PI:
9296 xmlGenericError(xmlGenericErrorContext,
9297 "PP: try PI\n");break;
9298 case XML_PARSER_IGNORE:
9299 xmlGenericError(xmlGenericErrorContext,
9300 "PP: try IGNORE\n");break;
9301 }
9302#endif
9303
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009304 if ((ctxt->input != NULL) &&
9305 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009306 xmlSHRINK(ctxt);
9307 ctxt->checkIndex = 0;
9308 }
9309 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009310
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009312 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9313 return(0);
9314
9315
Owen Taylor3473f882001-02-23 17:55:21 +00009316 /*
9317 * Pop-up of finished entities.
9318 */
9319 while ((RAW == 0) && (ctxt->inputNr > 1))
9320 xmlPopInput(ctxt);
9321
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009322 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009323 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009324 avail = ctxt->input->length -
9325 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009326 else {
9327 /*
9328 * If we are operating on converted input, try to flush
9329 * remainng chars to avoid them stalling in the non-converted
9330 * buffer.
9331 */
9332 if ((ctxt->input->buf->raw != NULL) &&
9333 (ctxt->input->buf->raw->use > 0)) {
9334 int base = ctxt->input->base -
9335 ctxt->input->buf->buffer->content;
9336 int current = ctxt->input->cur - ctxt->input->base;
9337
9338 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9339 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9340 ctxt->input->cur = ctxt->input->base + current;
9341 ctxt->input->end =
9342 &ctxt->input->buf->buffer->content[
9343 ctxt->input->buf->buffer->use];
9344 }
9345 avail = ctxt->input->buf->buffer->use -
9346 (ctxt->input->cur - ctxt->input->base);
9347 }
Owen Taylor3473f882001-02-23 17:55:21 +00009348 if (avail < 1)
9349 goto done;
9350 switch (ctxt->instate) {
9351 case XML_PARSER_EOF:
9352 /*
9353 * Document parsing is done !
9354 */
9355 goto done;
9356 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009357 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9358 xmlChar start[4];
9359 xmlCharEncoding enc;
9360
9361 /*
9362 * Very first chars read from the document flow.
9363 */
9364 if (avail < 4)
9365 goto done;
9366
9367 /*
9368 * Get the 4 first bytes and decode the charset
9369 * if enc != XML_CHAR_ENCODING_NONE
9370 * plug some encoding conversion routines.
9371 */
9372 start[0] = RAW;
9373 start[1] = NXT(1);
9374 start[2] = NXT(2);
9375 start[3] = NXT(3);
9376 enc = xmlDetectCharEncoding(start, 4);
9377 if (enc != XML_CHAR_ENCODING_NONE) {
9378 xmlSwitchEncoding(ctxt, enc);
9379 }
9380 break;
9381 }
Owen Taylor3473f882001-02-23 17:55:21 +00009382
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009383 if (avail < 2)
9384 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009385 cur = ctxt->input->cur[0];
9386 next = ctxt->input->cur[1];
9387 if (cur == 0) {
9388 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9389 ctxt->sax->setDocumentLocator(ctxt->userData,
9390 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009391 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009392 ctxt->instate = XML_PARSER_EOF;
9393#ifdef DEBUG_PUSH
9394 xmlGenericError(xmlGenericErrorContext,
9395 "PP: entering EOF\n");
9396#endif
9397 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9398 ctxt->sax->endDocument(ctxt->userData);
9399 goto done;
9400 }
9401 if ((cur == '<') && (next == '?')) {
9402 /* PI or XML decl */
9403 if (avail < 5) return(ret);
9404 if ((!terminate) &&
9405 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9406 return(ret);
9407 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9408 ctxt->sax->setDocumentLocator(ctxt->userData,
9409 &xmlDefaultSAXLocator);
9410 if ((ctxt->input->cur[2] == 'x') &&
9411 (ctxt->input->cur[3] == 'm') &&
9412 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009413 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009414 ret += 5;
9415#ifdef DEBUG_PUSH
9416 xmlGenericError(xmlGenericErrorContext,
9417 "PP: Parsing XML Decl\n");
9418#endif
9419 xmlParseXMLDecl(ctxt);
9420 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9421 /*
9422 * The XML REC instructs us to stop parsing right
9423 * here
9424 */
9425 ctxt->instate = XML_PARSER_EOF;
9426 return(0);
9427 }
9428 ctxt->standalone = ctxt->input->standalone;
9429 if ((ctxt->encoding == NULL) &&
9430 (ctxt->input->encoding != NULL))
9431 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9432 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9433 (!ctxt->disableSAX))
9434 ctxt->sax->startDocument(ctxt->userData);
9435 ctxt->instate = XML_PARSER_MISC;
9436#ifdef DEBUG_PUSH
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: entering MISC\n");
9439#endif
9440 } else {
9441 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9442 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9443 (!ctxt->disableSAX))
9444 ctxt->sax->startDocument(ctxt->userData);
9445 ctxt->instate = XML_PARSER_MISC;
9446#ifdef DEBUG_PUSH
9447 xmlGenericError(xmlGenericErrorContext,
9448 "PP: entering MISC\n");
9449#endif
9450 }
9451 } else {
9452 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9453 ctxt->sax->setDocumentLocator(ctxt->userData,
9454 &xmlDefaultSAXLocator);
9455 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9456 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9457 (!ctxt->disableSAX))
9458 ctxt->sax->startDocument(ctxt->userData);
9459 ctxt->instate = XML_PARSER_MISC;
9460#ifdef DEBUG_PUSH
9461 xmlGenericError(xmlGenericErrorContext,
9462 "PP: entering MISC\n");
9463#endif
9464 }
9465 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009467 const xmlChar *name;
9468 const xmlChar *prefix;
9469 const xmlChar *URI;
9470 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009471
9472 if ((avail < 2) && (ctxt->inputNr == 1))
9473 goto done;
9474 cur = ctxt->input->cur[0];
9475 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009476 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009478 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9479 ctxt->sax->endDocument(ctxt->userData);
9480 goto done;
9481 }
9482 if (!terminate) {
9483 if (ctxt->progressive) {
9484 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9485 goto done;
9486 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9487 goto done;
9488 }
9489 }
9490 if (ctxt->spaceNr == 0)
9491 spacePush(ctxt, -1);
9492 else
9493 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009494#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009495 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009496#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009497 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009498#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499 else
9500 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009501#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009502 if (name == NULL) {
9503 spacePop(ctxt);
9504 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009505 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9506 ctxt->sax->endDocument(ctxt->userData);
9507 goto done;
9508 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009509#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009510 /*
9511 * [ VC: Root Element Type ]
9512 * The Name in the document type declaration must match
9513 * the element type of the root element.
9514 */
9515 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9516 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9517 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009518#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009519
9520 /*
9521 * Check for an Empty Element.
9522 */
9523 if ((RAW == '/') && (NXT(1) == '>')) {
9524 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009525
9526 if (ctxt->sax2) {
9527 if ((ctxt->sax != NULL) &&
9528 (ctxt->sax->endElementNs != NULL) &&
9529 (!ctxt->disableSAX))
9530 ctxt->sax->endElementNs(ctxt->userData, name,
9531 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009532#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009533 } else {
9534 if ((ctxt->sax != NULL) &&
9535 (ctxt->sax->endElement != NULL) &&
9536 (!ctxt->disableSAX))
9537 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009540 spacePop(ctxt);
9541 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009543 } else {
9544 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009545 }
9546 break;
9547 }
9548 if (RAW == '>') {
9549 NEXT;
9550 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009551 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 "Couldn't find end of Start Tag %s\n",
9553 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009556 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 if (ctxt->sax2)
9558 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009559#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560 else
9561 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009562#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009563
Daniel Veillarda880b122003-04-21 21:36:41 +00009564 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009565 break;
9566 }
9567 case XML_PARSER_CONTENT: {
9568 const xmlChar *test;
9569 unsigned int cons;
9570 if ((avail < 2) && (ctxt->inputNr == 1))
9571 goto done;
9572 cur = ctxt->input->cur[0];
9573 next = ctxt->input->cur[1];
9574
9575 test = CUR_PTR;
9576 cons = ctxt->input->consumed;
9577 if ((cur == '<') && (next == '/')) {
9578 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009579 break;
9580 } else if ((cur == '<') && (next == '?')) {
9581 if ((!terminate) &&
9582 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9583 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 xmlParsePI(ctxt);
9585 } else if ((cur == '<') && (next != '!')) {
9586 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 break;
9588 } else if ((cur == '<') && (next == '!') &&
9589 (ctxt->input->cur[2] == '-') &&
9590 (ctxt->input->cur[3] == '-')) {
9591 if ((!terminate) &&
9592 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9593 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009594 xmlParseComment(ctxt);
9595 ctxt->instate = XML_PARSER_CONTENT;
9596 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9597 (ctxt->input->cur[2] == '[') &&
9598 (ctxt->input->cur[3] == 'C') &&
9599 (ctxt->input->cur[4] == 'D') &&
9600 (ctxt->input->cur[5] == 'A') &&
9601 (ctxt->input->cur[6] == 'T') &&
9602 (ctxt->input->cur[7] == 'A') &&
9603 (ctxt->input->cur[8] == '[')) {
9604 SKIP(9);
9605 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009606 break;
9607 } else if ((cur == '<') && (next == '!') &&
9608 (avail < 9)) {
9609 goto done;
9610 } else if (cur == '&') {
9611 if ((!terminate) &&
9612 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9613 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009614 xmlParseReference(ctxt);
9615 } else {
9616 /* TODO Avoid the extra copy, handle directly !!! */
9617 /*
9618 * Goal of the following test is:
9619 * - minimize calls to the SAX 'character' callback
9620 * when they are mergeable
9621 * - handle an problem for isBlank when we only parse
9622 * a sequence of blank chars and the next one is
9623 * not available to check against '<' presence.
9624 * - tries to homogenize the differences in SAX
9625 * callbacks between the push and pull versions
9626 * of the parser.
9627 */
9628 if ((ctxt->inputNr == 1) &&
9629 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9630 if (!terminate) {
9631 if (ctxt->progressive) {
9632 if ((lastlt == NULL) ||
9633 (ctxt->input->cur > lastlt))
9634 goto done;
9635 } else if (xmlParseLookupSequence(ctxt,
9636 '<', 0, 0) < 0) {
9637 goto done;
9638 }
9639 }
9640 }
9641 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009642 xmlParseCharData(ctxt, 0);
9643 }
9644 /*
9645 * Pop-up of finished entities.
9646 */
9647 while ((RAW == 0) && (ctxt->inputNr > 1))
9648 xmlPopInput(ctxt);
9649 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009650 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9651 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009652 ctxt->instate = XML_PARSER_EOF;
9653 break;
9654 }
9655 break;
9656 }
9657 case XML_PARSER_END_TAG:
9658 if (avail < 2)
9659 goto done;
9660 if (!terminate) {
9661 if (ctxt->progressive) {
9662 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9663 goto done;
9664 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9665 goto done;
9666 }
9667 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009668 if (ctxt->sax2) {
9669 xmlParseEndTag2(ctxt,
9670 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9671 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9672 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9673 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009674 }
9675#ifdef LIBXML_SAX1_ENABLED
9676 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009677 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009678#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009679 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 } else {
9682 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009683 }
9684 break;
9685 case XML_PARSER_CDATA_SECTION: {
9686 /*
9687 * The Push mode need to have the SAX callback for
9688 * cdataBlock merge back contiguous callbacks.
9689 */
9690 int base;
9691
9692 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9693 if (base < 0) {
9694 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9695 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9696 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009697 ctxt->sax->cdataBlock(ctxt->userData,
9698 ctxt->input->cur,
9699 XML_PARSER_BIG_BUFFER_SIZE);
9700 else if (ctxt->sax->characters != NULL)
9701 ctxt->sax->characters(ctxt->userData,
9702 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009703 XML_PARSER_BIG_BUFFER_SIZE);
9704 }
9705 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9706 ctxt->checkIndex = 0;
9707 }
9708 goto done;
9709 } else {
9710 if ((ctxt->sax != NULL) && (base > 0) &&
9711 (!ctxt->disableSAX)) {
9712 if (ctxt->sax->cdataBlock != NULL)
9713 ctxt->sax->cdataBlock(ctxt->userData,
9714 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009715 else if (ctxt->sax->characters != NULL)
9716 ctxt->sax->characters(ctxt->userData,
9717 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009718 }
9719 SKIP(base + 3);
9720 ctxt->checkIndex = 0;
9721 ctxt->instate = XML_PARSER_CONTENT;
9722#ifdef DEBUG_PUSH
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: entering CONTENT\n");
9725#endif
9726 }
9727 break;
9728 }
Owen Taylor3473f882001-02-23 17:55:21 +00009729 case XML_PARSER_MISC:
9730 SKIP_BLANKS;
9731 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009732 avail = ctxt->input->length -
9733 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009734 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009735 avail = ctxt->input->buf->buffer->use -
9736 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009737 if (avail < 2)
9738 goto done;
9739 cur = ctxt->input->cur[0];
9740 next = ctxt->input->cur[1];
9741 if ((cur == '<') && (next == '?')) {
9742 if ((!terminate) &&
9743 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9744 goto done;
9745#ifdef DEBUG_PUSH
9746 xmlGenericError(xmlGenericErrorContext,
9747 "PP: Parsing PI\n");
9748#endif
9749 xmlParsePI(ctxt);
9750 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 (ctxt->input->cur[2] == '-') &&
9752 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 if ((!terminate) &&
9754 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9755 goto done;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: Parsing Comment\n");
9759#endif
9760 xmlParseComment(ctxt);
9761 ctxt->instate = XML_PARSER_MISC;
9762 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009763 (ctxt->input->cur[2] == 'D') &&
9764 (ctxt->input->cur[3] == 'O') &&
9765 (ctxt->input->cur[4] == 'C') &&
9766 (ctxt->input->cur[5] == 'T') &&
9767 (ctxt->input->cur[6] == 'Y') &&
9768 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009769 (ctxt->input->cur[8] == 'E')) {
9770 if ((!terminate) &&
9771 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9772 goto done;
9773#ifdef DEBUG_PUSH
9774 xmlGenericError(xmlGenericErrorContext,
9775 "PP: Parsing internal subset\n");
9776#endif
9777 ctxt->inSubset = 1;
9778 xmlParseDocTypeDecl(ctxt);
9779 if (RAW == '[') {
9780 ctxt->instate = XML_PARSER_DTD;
9781#ifdef DEBUG_PUSH
9782 xmlGenericError(xmlGenericErrorContext,
9783 "PP: entering DTD\n");
9784#endif
9785 } else {
9786 /*
9787 * Create and update the external subset.
9788 */
9789 ctxt->inSubset = 2;
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9791 (ctxt->sax->externalSubset != NULL))
9792 ctxt->sax->externalSubset(ctxt->userData,
9793 ctxt->intSubName, ctxt->extSubSystem,
9794 ctxt->extSubURI);
9795 ctxt->inSubset = 0;
9796 ctxt->instate = XML_PARSER_PROLOG;
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: entering PROLOG\n");
9800#endif
9801 }
9802 } else if ((cur == '<') && (next == '!') &&
9803 (avail < 9)) {
9804 goto done;
9805 } else {
9806 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009807 ctxt->progressive = 1;
9808 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009809#ifdef DEBUG_PUSH
9810 xmlGenericError(xmlGenericErrorContext,
9811 "PP: entering START_TAG\n");
9812#endif
9813 }
9814 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009815 case XML_PARSER_PROLOG:
9816 SKIP_BLANKS;
9817 if (ctxt->input->buf == NULL)
9818 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9819 else
9820 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9821 if (avail < 2)
9822 goto done;
9823 cur = ctxt->input->cur[0];
9824 next = ctxt->input->cur[1];
9825 if ((cur == '<') && (next == '?')) {
9826 if ((!terminate) &&
9827 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9828 goto done;
9829#ifdef DEBUG_PUSH
9830 xmlGenericError(xmlGenericErrorContext,
9831 "PP: Parsing PI\n");
9832#endif
9833 xmlParsePI(ctxt);
9834 } else if ((cur == '<') && (next == '!') &&
9835 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9836 if ((!terminate) &&
9837 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9838 goto done;
9839#ifdef DEBUG_PUSH
9840 xmlGenericError(xmlGenericErrorContext,
9841 "PP: Parsing Comment\n");
9842#endif
9843 xmlParseComment(ctxt);
9844 ctxt->instate = XML_PARSER_PROLOG;
9845 } else if ((cur == '<') && (next == '!') &&
9846 (avail < 4)) {
9847 goto done;
9848 } else {
9849 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 ctxt->progressive = 1;
9851 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009852#ifdef DEBUG_PUSH
9853 xmlGenericError(xmlGenericErrorContext,
9854 "PP: entering START_TAG\n");
9855#endif
9856 }
9857 break;
9858 case XML_PARSER_EPILOG:
9859 SKIP_BLANKS;
9860 if (ctxt->input->buf == NULL)
9861 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9862 else
9863 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9864 if (avail < 2)
9865 goto done;
9866 cur = ctxt->input->cur[0];
9867 next = ctxt->input->cur[1];
9868 if ((cur == '<') && (next == '?')) {
9869 if ((!terminate) &&
9870 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9871 goto done;
9872#ifdef DEBUG_PUSH
9873 xmlGenericError(xmlGenericErrorContext,
9874 "PP: Parsing PI\n");
9875#endif
9876 xmlParsePI(ctxt);
9877 ctxt->instate = XML_PARSER_EPILOG;
9878 } else if ((cur == '<') && (next == '!') &&
9879 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9880 if ((!terminate) &&
9881 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9882 goto done;
9883#ifdef DEBUG_PUSH
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: Parsing Comment\n");
9886#endif
9887 xmlParseComment(ctxt);
9888 ctxt->instate = XML_PARSER_EPILOG;
9889 } else if ((cur == '<') && (next == '!') &&
9890 (avail < 4)) {
9891 goto done;
9892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009893 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009894 ctxt->instate = XML_PARSER_EOF;
9895#ifdef DEBUG_PUSH
9896 xmlGenericError(xmlGenericErrorContext,
9897 "PP: entering EOF\n");
9898#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009899 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009900 ctxt->sax->endDocument(ctxt->userData);
9901 goto done;
9902 }
9903 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009904 case XML_PARSER_DTD: {
9905 /*
9906 * Sorry but progressive parsing of the internal subset
9907 * is not expected to be supported. We first check that
9908 * the full content of the internal subset is available and
9909 * the parsing is launched only at that point.
9910 * Internal subset ends up with "']' S? '>'" in an unescaped
9911 * section and not in a ']]>' sequence which are conditional
9912 * sections (whoever argued to keep that crap in XML deserve
9913 * a place in hell !).
9914 */
9915 int base, i;
9916 xmlChar *buf;
9917 xmlChar quote = 0;
9918
9919 base = ctxt->input->cur - ctxt->input->base;
9920 if (base < 0) return(0);
9921 if (ctxt->checkIndex > base)
9922 base = ctxt->checkIndex;
9923 buf = ctxt->input->buf->buffer->content;
9924 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9925 base++) {
9926 if (quote != 0) {
9927 if (buf[base] == quote)
9928 quote = 0;
9929 continue;
9930 }
9931 if (buf[base] == '"') {
9932 quote = '"';
9933 continue;
9934 }
9935 if (buf[base] == '\'') {
9936 quote = '\'';
9937 continue;
9938 }
9939 if (buf[base] == ']') {
9940 if ((unsigned int) base +1 >=
9941 ctxt->input->buf->buffer->use)
9942 break;
9943 if (buf[base + 1] == ']') {
9944 /* conditional crap, skip both ']' ! */
9945 base++;
9946 continue;
9947 }
9948 for (i = 0;
9949 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9950 i++) {
9951 if (buf[base + i] == '>')
9952 goto found_end_int_subset;
9953 }
9954 break;
9955 }
9956 }
9957 /*
9958 * We didn't found the end of the Internal subset
9959 */
9960 if (quote == 0)
9961 ctxt->checkIndex = base;
9962#ifdef DEBUG_PUSH
9963 if (next == 0)
9964 xmlGenericError(xmlGenericErrorContext,
9965 "PP: lookup of int subset end filed\n");
9966#endif
9967 goto done;
9968
9969found_end_int_subset:
9970 xmlParseInternalSubset(ctxt);
9971 ctxt->inSubset = 2;
9972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9973 (ctxt->sax->externalSubset != NULL))
9974 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9975 ctxt->extSubSystem, ctxt->extSubURI);
9976 ctxt->inSubset = 0;
9977 ctxt->instate = XML_PARSER_PROLOG;
9978 ctxt->checkIndex = 0;
9979#ifdef DEBUG_PUSH
9980 xmlGenericError(xmlGenericErrorContext,
9981 "PP: entering PROLOG\n");
9982#endif
9983 break;
9984 }
9985 case XML_PARSER_COMMENT:
9986 xmlGenericError(xmlGenericErrorContext,
9987 "PP: internal error, state == COMMENT\n");
9988 ctxt->instate = XML_PARSER_CONTENT;
9989#ifdef DEBUG_PUSH
9990 xmlGenericError(xmlGenericErrorContext,
9991 "PP: entering CONTENT\n");
9992#endif
9993 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009994 case XML_PARSER_IGNORE:
9995 xmlGenericError(xmlGenericErrorContext,
9996 "PP: internal error, state == IGNORE");
9997 ctxt->instate = XML_PARSER_DTD;
9998#ifdef DEBUG_PUSH
9999 xmlGenericError(xmlGenericErrorContext,
10000 "PP: entering DTD\n");
10001#endif
10002 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010003 case XML_PARSER_PI:
10004 xmlGenericError(xmlGenericErrorContext,
10005 "PP: internal error, state == PI\n");
10006 ctxt->instate = XML_PARSER_CONTENT;
10007#ifdef DEBUG_PUSH
10008 xmlGenericError(xmlGenericErrorContext,
10009 "PP: entering CONTENT\n");
10010#endif
10011 break;
10012 case XML_PARSER_ENTITY_DECL:
10013 xmlGenericError(xmlGenericErrorContext,
10014 "PP: internal error, state == ENTITY_DECL\n");
10015 ctxt->instate = XML_PARSER_DTD;
10016#ifdef DEBUG_PUSH
10017 xmlGenericError(xmlGenericErrorContext,
10018 "PP: entering DTD\n");
10019#endif
10020 break;
10021 case XML_PARSER_ENTITY_VALUE:
10022 xmlGenericError(xmlGenericErrorContext,
10023 "PP: internal error, state == ENTITY_VALUE\n");
10024 ctxt->instate = XML_PARSER_CONTENT;
10025#ifdef DEBUG_PUSH
10026 xmlGenericError(xmlGenericErrorContext,
10027 "PP: entering DTD\n");
10028#endif
10029 break;
10030 case XML_PARSER_ATTRIBUTE_VALUE:
10031 xmlGenericError(xmlGenericErrorContext,
10032 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10033 ctxt->instate = XML_PARSER_START_TAG;
10034#ifdef DEBUG_PUSH
10035 xmlGenericError(xmlGenericErrorContext,
10036 "PP: entering START_TAG\n");
10037#endif
10038 break;
10039 case XML_PARSER_SYSTEM_LITERAL:
10040 xmlGenericError(xmlGenericErrorContext,
10041 "PP: internal error, state == SYSTEM_LITERAL\n");
10042 ctxt->instate = XML_PARSER_START_TAG;
10043#ifdef DEBUG_PUSH
10044 xmlGenericError(xmlGenericErrorContext,
10045 "PP: entering START_TAG\n");
10046#endif
10047 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010048 case XML_PARSER_PUBLIC_LITERAL:
10049 xmlGenericError(xmlGenericErrorContext,
10050 "PP: internal error, state == PUBLIC_LITERAL\n");
10051 ctxt->instate = XML_PARSER_START_TAG;
10052#ifdef DEBUG_PUSH
10053 xmlGenericError(xmlGenericErrorContext,
10054 "PP: entering START_TAG\n");
10055#endif
10056 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010057 }
10058 }
10059done:
10060#ifdef DEBUG_PUSH
10061 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10062#endif
10063 return(ret);
10064}
10065
10066/**
Owen Taylor3473f882001-02-23 17:55:21 +000010067 * xmlParseChunk:
10068 * @ctxt: an XML parser context
10069 * @chunk: an char array
10070 * @size: the size in byte of the chunk
10071 * @terminate: last chunk indicator
10072 *
10073 * Parse a Chunk of memory
10074 *
10075 * Returns zero if no error, the xmlParserErrors otherwise.
10076 */
10077int
10078xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10079 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010080 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10081 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010082 if (ctxt->instate == XML_PARSER_START)
10083 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010084 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10085 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10086 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10087 int cur = ctxt->input->cur - ctxt->input->base;
10088
10089 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10090 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10091 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010092 ctxt->input->end =
10093 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010094#ifdef DEBUG_PUSH
10095 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10096#endif
10097
Owen Taylor3473f882001-02-23 17:55:21 +000010098 } else if (ctxt->instate != XML_PARSER_EOF) {
10099 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10100 xmlParserInputBufferPtr in = ctxt->input->buf;
10101 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10102 (in->raw != NULL)) {
10103 int nbchars;
10104
10105 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10106 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010107 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010108 xmlGenericError(xmlGenericErrorContext,
10109 "xmlParseChunk: encoder error\n");
10110 return(XML_ERR_INVALID_ENCODING);
10111 }
10112 }
10113 }
10114 }
10115 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010116 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10117 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 if (terminate) {
10119 /*
10120 * Check for termination
10121 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010122 int avail = 0;
10123 if (ctxt->input->buf == NULL)
10124 avail = ctxt->input->length -
10125 (ctxt->input->cur - ctxt->input->base);
10126 else
10127 avail = ctxt->input->buf->buffer->use -
10128 (ctxt->input->cur - ctxt->input->base);
10129
Owen Taylor3473f882001-02-23 17:55:21 +000010130 if ((ctxt->instate != XML_PARSER_EOF) &&
10131 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010132 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010134 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010135 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010136 }
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010138 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010139 ctxt->sax->endDocument(ctxt->userData);
10140 }
10141 ctxt->instate = XML_PARSER_EOF;
10142 }
10143 return((xmlParserErrors) ctxt->errNo);
10144}
10145
10146/************************************************************************
10147 * *
10148 * I/O front end functions to the parser *
10149 * *
10150 ************************************************************************/
10151
10152/**
10153 * xmlStopParser:
10154 * @ctxt: an XML parser context
10155 *
10156 * Blocks further parser processing
10157 */
10158void
10159xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +000010160 if (ctxt == NULL)
10161 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010162 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +000010163 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010164 if (ctxt->input != NULL)
10165 ctxt->input->cur = BAD_CAST"";
10166}
10167
10168/**
10169 * xmlCreatePushParserCtxt:
10170 * @sax: a SAX handler
10171 * @user_data: The user data returned on SAX callbacks
10172 * @chunk: a pointer to an array of chars
10173 * @size: number of chars in the array
10174 * @filename: an optional file name or URI
10175 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010176 * Create a parser context for using the XML parser in push mode.
10177 * If @buffer and @size are non-NULL, the data is used to detect
10178 * the encoding. The remaining characters will be parsed so they
10179 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010180 * To allow content encoding detection, @size should be >= 4
10181 * The value of @filename is used for fetching external entities
10182 * and error/warning reports.
10183 *
10184 * Returns the new parser context or NULL
10185 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010186
Owen Taylor3473f882001-02-23 17:55:21 +000010187xmlParserCtxtPtr
10188xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10189 const char *chunk, int size, const char *filename) {
10190 xmlParserCtxtPtr ctxt;
10191 xmlParserInputPtr inputStream;
10192 xmlParserInputBufferPtr buf;
10193 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10194
10195 /*
10196 * plug some encoding conversion routines
10197 */
10198 if ((chunk != NULL) && (size >= 4))
10199 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10200
10201 buf = xmlAllocParserInputBuffer(enc);
10202 if (buf == NULL) return(NULL);
10203
10204 ctxt = xmlNewParserCtxt();
10205 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010206 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010207 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 return(NULL);
10209 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010210 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10211 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010212 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010213 xmlFreeParserInputBuffer(buf);
10214 xmlFreeParserCtxt(ctxt);
10215 return(NULL);
10216 }
Owen Taylor3473f882001-02-23 17:55:21 +000010217 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010218#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010219 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010220#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010221 xmlFree(ctxt->sax);
10222 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10223 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010224 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010225 xmlFreeParserInputBuffer(buf);
10226 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return(NULL);
10228 }
10229 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10230 if (user_data != NULL)
10231 ctxt->userData = user_data;
10232 }
10233 if (filename == NULL) {
10234 ctxt->directory = NULL;
10235 } else {
10236 ctxt->directory = xmlParserGetDirectory(filename);
10237 }
10238
10239 inputStream = xmlNewInputStream(ctxt);
10240 if (inputStream == NULL) {
10241 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010242 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010243 return(NULL);
10244 }
10245
10246 if (filename == NULL)
10247 inputStream->filename = NULL;
10248 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010249 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010250 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010251 inputStream->buf = buf;
10252 inputStream->base = inputStream->buf->buffer->content;
10253 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010254 inputStream->end =
10255 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010256
10257 inputPush(ctxt, inputStream);
10258
10259 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10260 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010261 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10262 int cur = ctxt->input->cur - ctxt->input->base;
10263
Owen Taylor3473f882001-02-23 17:55:21 +000010264 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010265
10266 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10267 ctxt->input->cur = ctxt->input->base + cur;
10268 ctxt->input->end =
10269 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010270#ifdef DEBUG_PUSH
10271 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10272#endif
10273 }
10274
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010275 if (enc != XML_CHAR_ENCODING_NONE) {
10276 xmlSwitchEncoding(ctxt, enc);
10277 }
10278
Owen Taylor3473f882001-02-23 17:55:21 +000010279 return(ctxt);
10280}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010281#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283/**
10284 * xmlCreateIOParserCtxt:
10285 * @sax: a SAX handler
10286 * @user_data: The user data returned on SAX callbacks
10287 * @ioread: an I/O read function
10288 * @ioclose: an I/O close function
10289 * @ioctx: an I/O handler
10290 * @enc: the charset encoding if known
10291 *
10292 * Create a parser context for using the XML parser with an existing
10293 * I/O stream
10294 *
10295 * Returns the new parser context or NULL
10296 */
10297xmlParserCtxtPtr
10298xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10299 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10300 void *ioctx, xmlCharEncoding enc) {
10301 xmlParserCtxtPtr ctxt;
10302 xmlParserInputPtr inputStream;
10303 xmlParserInputBufferPtr buf;
10304
10305 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10306 if (buf == NULL) return(NULL);
10307
10308 ctxt = xmlNewParserCtxt();
10309 if (ctxt == NULL) {
10310 xmlFree(buf);
10311 return(NULL);
10312 }
10313 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010314#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010315 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010316#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010317 xmlFree(ctxt->sax);
10318 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10319 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010320 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010321 xmlFree(ctxt);
10322 return(NULL);
10323 }
10324 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10325 if (user_data != NULL)
10326 ctxt->userData = user_data;
10327 }
10328
10329 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10330 if (inputStream == NULL) {
10331 xmlFreeParserCtxt(ctxt);
10332 return(NULL);
10333 }
10334 inputPush(ctxt, inputStream);
10335
10336 return(ctxt);
10337}
10338
Daniel Veillard4432df22003-09-28 18:58:27 +000010339#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010340/************************************************************************
10341 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010342 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010343 * *
10344 ************************************************************************/
10345
10346/**
10347 * xmlIOParseDTD:
10348 * @sax: the SAX handler block or NULL
10349 * @input: an Input Buffer
10350 * @enc: the charset encoding if known
10351 *
10352 * Load and parse a DTD
10353 *
10354 * Returns the resulting xmlDtdPtr or NULL in case of error.
10355 * @input will be freed at parsing end.
10356 */
10357
10358xmlDtdPtr
10359xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10360 xmlCharEncoding enc) {
10361 xmlDtdPtr ret = NULL;
10362 xmlParserCtxtPtr ctxt;
10363 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010364 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010365
10366 if (input == NULL)
10367 return(NULL);
10368
10369 ctxt = xmlNewParserCtxt();
10370 if (ctxt == NULL) {
10371 return(NULL);
10372 }
10373
10374 /*
10375 * Set-up the SAX context
10376 */
10377 if (sax != NULL) {
10378 if (ctxt->sax != NULL)
10379 xmlFree(ctxt->sax);
10380 ctxt->sax = sax;
10381 ctxt->userData = NULL;
10382 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010383 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010384
10385 /*
10386 * generate a parser input from the I/O handler
10387 */
10388
10389 pinput = xmlNewIOInputStream(ctxt, input, enc);
10390 if (pinput == NULL) {
10391 if (sax != NULL) ctxt->sax = NULL;
10392 xmlFreeParserCtxt(ctxt);
10393 return(NULL);
10394 }
10395
10396 /*
10397 * plug some encoding conversion routines here.
10398 */
10399 xmlPushInput(ctxt, pinput);
10400
10401 pinput->filename = NULL;
10402 pinput->line = 1;
10403 pinput->col = 1;
10404 pinput->base = ctxt->input->cur;
10405 pinput->cur = ctxt->input->cur;
10406 pinput->free = NULL;
10407
10408 /*
10409 * let's parse that entity knowing it's an external subset.
10410 */
10411 ctxt->inSubset = 2;
10412 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10413 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10414 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010415
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010416 if ((enc == XML_CHAR_ENCODING_NONE) &&
10417 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010418 /*
10419 * Get the 4 first bytes and decode the charset
10420 * if enc != XML_CHAR_ENCODING_NONE
10421 * plug some encoding conversion routines.
10422 */
10423 start[0] = RAW;
10424 start[1] = NXT(1);
10425 start[2] = NXT(2);
10426 start[3] = NXT(3);
10427 enc = xmlDetectCharEncoding(start, 4);
10428 if (enc != XML_CHAR_ENCODING_NONE) {
10429 xmlSwitchEncoding(ctxt, enc);
10430 }
10431 }
10432
Owen Taylor3473f882001-02-23 17:55:21 +000010433 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10434
10435 if (ctxt->myDoc != NULL) {
10436 if (ctxt->wellFormed) {
10437 ret = ctxt->myDoc->extSubset;
10438 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010439 if (ret != NULL) {
10440 xmlNodePtr tmp;
10441
10442 ret->doc = NULL;
10443 tmp = ret->children;
10444 while (tmp != NULL) {
10445 tmp->doc = NULL;
10446 tmp = tmp->next;
10447 }
10448 }
Owen Taylor3473f882001-02-23 17:55:21 +000010449 } else {
10450 ret = NULL;
10451 }
10452 xmlFreeDoc(ctxt->myDoc);
10453 ctxt->myDoc = NULL;
10454 }
10455 if (sax != NULL) ctxt->sax = NULL;
10456 xmlFreeParserCtxt(ctxt);
10457
10458 return(ret);
10459}
10460
10461/**
10462 * xmlSAXParseDTD:
10463 * @sax: the SAX handler block
10464 * @ExternalID: a NAME* containing the External ID of the DTD
10465 * @SystemID: a NAME* containing the URL to the DTD
10466 *
10467 * Load and parse an external subset.
10468 *
10469 * Returns the resulting xmlDtdPtr or NULL in case of error.
10470 */
10471
10472xmlDtdPtr
10473xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10474 const xmlChar *SystemID) {
10475 xmlDtdPtr ret = NULL;
10476 xmlParserCtxtPtr ctxt;
10477 xmlParserInputPtr input = NULL;
10478 xmlCharEncoding enc;
10479
10480 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10481
10482 ctxt = xmlNewParserCtxt();
10483 if (ctxt == NULL) {
10484 return(NULL);
10485 }
10486
10487 /*
10488 * Set-up the SAX context
10489 */
10490 if (sax != NULL) {
10491 if (ctxt->sax != NULL)
10492 xmlFree(ctxt->sax);
10493 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010494 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010495 }
10496
10497 /*
10498 * Ask the Entity resolver to load the damn thing
10499 */
10500
10501 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010502 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010503 if (input == NULL) {
10504 if (sax != NULL) ctxt->sax = NULL;
10505 xmlFreeParserCtxt(ctxt);
10506 return(NULL);
10507 }
10508
10509 /*
10510 * plug some encoding conversion routines here.
10511 */
10512 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010513 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10514 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10515 xmlSwitchEncoding(ctxt, enc);
10516 }
Owen Taylor3473f882001-02-23 17:55:21 +000010517
10518 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010519 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010520 input->line = 1;
10521 input->col = 1;
10522 input->base = ctxt->input->cur;
10523 input->cur = ctxt->input->cur;
10524 input->free = NULL;
10525
10526 /*
10527 * let's parse that entity knowing it's an external subset.
10528 */
10529 ctxt->inSubset = 2;
10530 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10531 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10532 ExternalID, SystemID);
10533 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10534
10535 if (ctxt->myDoc != NULL) {
10536 if (ctxt->wellFormed) {
10537 ret = ctxt->myDoc->extSubset;
10538 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010539 if (ret != NULL) {
10540 xmlNodePtr tmp;
10541
10542 ret->doc = NULL;
10543 tmp = ret->children;
10544 while (tmp != NULL) {
10545 tmp->doc = NULL;
10546 tmp = tmp->next;
10547 }
10548 }
Owen Taylor3473f882001-02-23 17:55:21 +000010549 } else {
10550 ret = NULL;
10551 }
10552 xmlFreeDoc(ctxt->myDoc);
10553 ctxt->myDoc = NULL;
10554 }
10555 if (sax != NULL) ctxt->sax = NULL;
10556 xmlFreeParserCtxt(ctxt);
10557
10558 return(ret);
10559}
10560
Daniel Veillard4432df22003-09-28 18:58:27 +000010561
Owen Taylor3473f882001-02-23 17:55:21 +000010562/**
10563 * xmlParseDTD:
10564 * @ExternalID: a NAME* containing the External ID of the DTD
10565 * @SystemID: a NAME* containing the URL to the DTD
10566 *
10567 * Load and parse an external subset.
10568 *
10569 * Returns the resulting xmlDtdPtr or NULL in case of error.
10570 */
10571
10572xmlDtdPtr
10573xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10574 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10575}
Daniel Veillard4432df22003-09-28 18:58:27 +000010576#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010577
10578/************************************************************************
10579 * *
10580 * Front ends when parsing an Entity *
10581 * *
10582 ************************************************************************/
10583
10584/**
Owen Taylor3473f882001-02-23 17:55:21 +000010585 * xmlParseCtxtExternalEntity:
10586 * @ctx: the existing parsing context
10587 * @URL: the URL for the entity to load
10588 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010589 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010590 *
10591 * Parse an external general entity within an existing parsing context
10592 * An external general parsed entity is well-formed if it matches the
10593 * production labeled extParsedEnt.
10594 *
10595 * [78] extParsedEnt ::= TextDecl? content
10596 *
10597 * Returns 0 if the entity is well formed, -1 in case of args problem and
10598 * the parser error code otherwise
10599 */
10600
10601int
10602xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010603 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010604 xmlParserCtxtPtr ctxt;
10605 xmlDocPtr newDoc;
10606 xmlSAXHandlerPtr oldsax = NULL;
10607 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010608 xmlChar start[4];
10609 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010610
10611 if (ctx->depth > 40) {
10612 return(XML_ERR_ENTITY_LOOP);
10613 }
10614
Daniel Veillardcda96922001-08-21 10:56:31 +000010615 if (lst != NULL)
10616 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010617 if ((URL == NULL) && (ID == NULL))
10618 return(-1);
10619 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10620 return(-1);
10621
10622
10623 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10624 if (ctxt == NULL) return(-1);
10625 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010626 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010627 oldsax = ctxt->sax;
10628 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010629 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010630 newDoc = xmlNewDoc(BAD_CAST "1.0");
10631 if (newDoc == NULL) {
10632 xmlFreeParserCtxt(ctxt);
10633 return(-1);
10634 }
10635 if (ctx->myDoc != NULL) {
10636 newDoc->intSubset = ctx->myDoc->intSubset;
10637 newDoc->extSubset = ctx->myDoc->extSubset;
10638 }
10639 if (ctx->myDoc->URL != NULL) {
10640 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10641 }
10642 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10643 if (newDoc->children == NULL) {
10644 ctxt->sax = oldsax;
10645 xmlFreeParserCtxt(ctxt);
10646 newDoc->intSubset = NULL;
10647 newDoc->extSubset = NULL;
10648 xmlFreeDoc(newDoc);
10649 return(-1);
10650 }
10651 nodePush(ctxt, newDoc->children);
10652 if (ctx->myDoc == NULL) {
10653 ctxt->myDoc = newDoc;
10654 } else {
10655 ctxt->myDoc = ctx->myDoc;
10656 newDoc->children->doc = ctx->myDoc;
10657 }
10658
Daniel Veillard87a764e2001-06-20 17:41:10 +000010659 /*
10660 * Get the 4 first bytes and decode the charset
10661 * if enc != XML_CHAR_ENCODING_NONE
10662 * plug some encoding conversion routines.
10663 */
10664 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010665 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10666 start[0] = RAW;
10667 start[1] = NXT(1);
10668 start[2] = NXT(2);
10669 start[3] = NXT(3);
10670 enc = xmlDetectCharEncoding(start, 4);
10671 if (enc != XML_CHAR_ENCODING_NONE) {
10672 xmlSwitchEncoding(ctxt, enc);
10673 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010674 }
10675
Owen Taylor3473f882001-02-23 17:55:21 +000010676 /*
10677 * Parse a possible text declaration first
10678 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010679 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010680 xmlParseTextDecl(ctxt);
10681 }
10682
10683 /*
10684 * Doing validity checking on chunk doesn't make sense
10685 */
10686 ctxt->instate = XML_PARSER_CONTENT;
10687 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010688 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010689 ctxt->loadsubset = ctx->loadsubset;
10690 ctxt->depth = ctx->depth + 1;
10691 ctxt->replaceEntities = ctx->replaceEntities;
10692 if (ctxt->validate) {
10693 ctxt->vctxt.error = ctx->vctxt.error;
10694 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010695 } else {
10696 ctxt->vctxt.error = NULL;
10697 ctxt->vctxt.warning = NULL;
10698 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010699 ctxt->vctxt.nodeTab = NULL;
10700 ctxt->vctxt.nodeNr = 0;
10701 ctxt->vctxt.nodeMax = 0;
10702 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010703 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10704 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010705 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10706 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10707 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010708 ctxt->dictNames = ctx->dictNames;
10709 ctxt->attsDefault = ctx->attsDefault;
10710 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010711
10712 xmlParseContent(ctxt);
10713
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010714 ctx->validate = ctxt->validate;
10715 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010716 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010717 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010718 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010719 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010720 }
10721 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010722 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010723 }
10724
10725 if (!ctxt->wellFormed) {
10726 if (ctxt->errNo == 0)
10727 ret = 1;
10728 else
10729 ret = ctxt->errNo;
10730 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010731 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010732 xmlNodePtr cur;
10733
10734 /*
10735 * Return the newly created nodeset after unlinking it from
10736 * they pseudo parent.
10737 */
10738 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010739 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010740 while (cur != NULL) {
10741 cur->parent = NULL;
10742 cur = cur->next;
10743 }
10744 newDoc->children->children = NULL;
10745 }
10746 ret = 0;
10747 }
10748 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010749 ctxt->dict = NULL;
10750 ctxt->attsDefault = NULL;
10751 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010752 xmlFreeParserCtxt(ctxt);
10753 newDoc->intSubset = NULL;
10754 newDoc->extSubset = NULL;
10755 xmlFreeDoc(newDoc);
10756
10757 return(ret);
10758}
10759
10760/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010761 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010762 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010763 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010764 * @sax: the SAX handler bloc (possibly NULL)
10765 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10766 * @depth: Used for loop detection, use 0
10767 * @URL: the URL for the entity to load
10768 * @ID: the System ID for the entity to load
10769 * @list: the return value for the set of parsed nodes
10770 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010771 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010772 *
10773 * Returns 0 if the entity is well formed, -1 in case of args problem and
10774 * the parser error code otherwise
10775 */
10776
Daniel Veillard7d515752003-09-26 19:12:37 +000010777static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010778xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10779 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010780 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010781 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010782 xmlParserCtxtPtr ctxt;
10783 xmlDocPtr newDoc;
10784 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010785 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010786 xmlChar start[4];
10787 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010788
10789 if (depth > 40) {
10790 return(XML_ERR_ENTITY_LOOP);
10791 }
10792
10793
10794
10795 if (list != NULL)
10796 *list = NULL;
10797 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010798 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010799 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010800 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010801
10802
10803 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010804 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010805 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010806 if (oldctxt != NULL) {
10807 ctxt->_private = oldctxt->_private;
10808 ctxt->loadsubset = oldctxt->loadsubset;
10809 ctxt->validate = oldctxt->validate;
10810 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010811 ctxt->record_info = oldctxt->record_info;
10812 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10813 ctxt->node_seq.length = oldctxt->node_seq.length;
10814 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010815 } else {
10816 /*
10817 * Doing validity checking on chunk without context
10818 * doesn't make sense
10819 */
10820 ctxt->_private = NULL;
10821 ctxt->validate = 0;
10822 ctxt->external = 2;
10823 ctxt->loadsubset = 0;
10824 }
Owen Taylor3473f882001-02-23 17:55:21 +000010825 if (sax != NULL) {
10826 oldsax = ctxt->sax;
10827 ctxt->sax = sax;
10828 if (user_data != NULL)
10829 ctxt->userData = user_data;
10830 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010831 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010832 newDoc = xmlNewDoc(BAD_CAST "1.0");
10833 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010834 ctxt->node_seq.maximum = 0;
10835 ctxt->node_seq.length = 0;
10836 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010837 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010838 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010839 }
10840 if (doc != NULL) {
10841 newDoc->intSubset = doc->intSubset;
10842 newDoc->extSubset = doc->extSubset;
10843 }
10844 if (doc->URL != NULL) {
10845 newDoc->URL = xmlStrdup(doc->URL);
10846 }
10847 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10848 if (newDoc->children == NULL) {
10849 if (sax != NULL)
10850 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010851 ctxt->node_seq.maximum = 0;
10852 ctxt->node_seq.length = 0;
10853 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010854 xmlFreeParserCtxt(ctxt);
10855 newDoc->intSubset = NULL;
10856 newDoc->extSubset = NULL;
10857 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010858 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010859 }
10860 nodePush(ctxt, newDoc->children);
10861 if (doc == NULL) {
10862 ctxt->myDoc = newDoc;
10863 } else {
10864 ctxt->myDoc = doc;
10865 newDoc->children->doc = doc;
10866 }
10867
Daniel Veillard87a764e2001-06-20 17:41:10 +000010868 /*
10869 * Get the 4 first bytes and decode the charset
10870 * if enc != XML_CHAR_ENCODING_NONE
10871 * plug some encoding conversion routines.
10872 */
10873 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010874 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10875 start[0] = RAW;
10876 start[1] = NXT(1);
10877 start[2] = NXT(2);
10878 start[3] = NXT(3);
10879 enc = xmlDetectCharEncoding(start, 4);
10880 if (enc != XML_CHAR_ENCODING_NONE) {
10881 xmlSwitchEncoding(ctxt, enc);
10882 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010883 }
10884
Owen Taylor3473f882001-02-23 17:55:21 +000010885 /*
10886 * Parse a possible text declaration first
10887 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010888 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010889 xmlParseTextDecl(ctxt);
10890 }
10891
Owen Taylor3473f882001-02-23 17:55:21 +000010892 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010893 ctxt->depth = depth;
10894
10895 xmlParseContent(ctxt);
10896
Daniel Veillard561b7f82002-03-20 21:55:57 +000010897 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010898 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010899 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010900 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010901 }
10902 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010903 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010904 }
10905
10906 if (!ctxt->wellFormed) {
10907 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010908 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010909 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010910 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010911 } else {
10912 if (list != NULL) {
10913 xmlNodePtr cur;
10914
10915 /*
10916 * Return the newly created nodeset after unlinking it from
10917 * they pseudo parent.
10918 */
10919 cur = newDoc->children->children;
10920 *list = cur;
10921 while (cur != NULL) {
10922 cur->parent = NULL;
10923 cur = cur->next;
10924 }
10925 newDoc->children->children = NULL;
10926 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010927 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010928 }
10929 if (sax != NULL)
10930 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010931 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10932 oldctxt->node_seq.length = ctxt->node_seq.length;
10933 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010934 ctxt->node_seq.maximum = 0;
10935 ctxt->node_seq.length = 0;
10936 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010937 xmlFreeParserCtxt(ctxt);
10938 newDoc->intSubset = NULL;
10939 newDoc->extSubset = NULL;
10940 xmlFreeDoc(newDoc);
10941
10942 return(ret);
10943}
10944
Daniel Veillard81273902003-09-30 00:43:48 +000010945#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010946/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010947 * xmlParseExternalEntity:
10948 * @doc: the document the chunk pertains to
10949 * @sax: the SAX handler bloc (possibly NULL)
10950 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10951 * @depth: Used for loop detection, use 0
10952 * @URL: the URL for the entity to load
10953 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010954 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010955 *
10956 * Parse an external general entity
10957 * An external general parsed entity is well-formed if it matches the
10958 * production labeled extParsedEnt.
10959 *
10960 * [78] extParsedEnt ::= TextDecl? content
10961 *
10962 * Returns 0 if the entity is well formed, -1 in case of args problem and
10963 * the parser error code otherwise
10964 */
10965
10966int
10967xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010968 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010969 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010970 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010971}
10972
10973/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010974 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010975 * @doc: the document the chunk pertains to
10976 * @sax: the SAX handler bloc (possibly NULL)
10977 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10978 * @depth: Used for loop detection, use 0
10979 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010980 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010981 *
10982 * Parse a well-balanced chunk of an XML document
10983 * called by the parser
10984 * The allowed sequence for the Well Balanced Chunk is the one defined by
10985 * the content production in the XML grammar:
10986 *
10987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10988 *
10989 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10990 * the parser error code otherwise
10991 */
10992
10993int
10994xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010995 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010996 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10997 depth, string, lst, 0 );
10998}
Daniel Veillard81273902003-09-30 00:43:48 +000010999#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011000
11001/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011002 * xmlParseBalancedChunkMemoryInternal:
11003 * @oldctxt: the existing parsing context
11004 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11005 * @user_data: the user data field for the parser context
11006 * @lst: the return value for the set of parsed nodes
11007 *
11008 *
11009 * Parse a well-balanced chunk of an XML document
11010 * called by the parser
11011 * The allowed sequence for the Well Balanced Chunk is the one defined by
11012 * the content production in the XML grammar:
11013 *
11014 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11015 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011016 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11017 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011018 *
11019 * In case recover is set to 1, the nodelist will not be empty even if
11020 * the parsed chunk is not well balanced.
11021 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011022static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011023xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11024 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11025 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011026 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011027 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011028 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011029 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011030 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011031
11032 if (oldctxt->depth > 40) {
11033 return(XML_ERR_ENTITY_LOOP);
11034 }
11035
11036
11037 if (lst != NULL)
11038 *lst = NULL;
11039 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011040 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011041
11042 size = xmlStrlen(string);
11043
11044 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011045 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011046 if (user_data != NULL)
11047 ctxt->userData = user_data;
11048 else
11049 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011050 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11051 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011052 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11053 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11054 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011055
11056 oldsax = ctxt->sax;
11057 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011058 xmlDetectSAX2(ctxt);
11059
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011060 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011061 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011062 newDoc = xmlNewDoc(BAD_CAST "1.0");
11063 if (newDoc == NULL) {
11064 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011065 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011066 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011067 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011068 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011069 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011070 } else {
11071 ctxt->myDoc = oldctxt->myDoc;
11072 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011073 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011074 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011075 BAD_CAST "pseudoroot", NULL);
11076 if (ctxt->myDoc->children == NULL) {
11077 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011078 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011079 xmlFreeParserCtxt(ctxt);
11080 if (newDoc != NULL)
11081 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011082 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011083 }
11084 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011085 ctxt->instate = XML_PARSER_CONTENT;
11086 ctxt->depth = oldctxt->depth + 1;
11087
Daniel Veillard328f48c2002-11-15 15:24:34 +000011088 ctxt->validate = 0;
11089 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011090 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11091 /*
11092 * ID/IDREF registration will be done in xmlValidateElement below
11093 */
11094 ctxt->loadsubset |= XML_SKIP_IDS;
11095 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011096 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011097 ctxt->attsDefault = oldctxt->attsDefault;
11098 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011099
Daniel Veillard68e9e742002-11-16 15:35:11 +000011100 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011101 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011102 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011103 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011104 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011105 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011106 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011107 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011108 }
11109
11110 if (!ctxt->wellFormed) {
11111 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011112 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011113 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011114 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011115 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011116 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011117 }
11118
William M. Brack7b9154b2003-09-27 19:23:50 +000011119 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011120 xmlNodePtr cur;
11121
11122 /*
11123 * Return the newly created nodeset after unlinking it from
11124 * they pseudo parent.
11125 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011126 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011127 *lst = cur;
11128 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011129#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011130 if (oldctxt->validate && oldctxt->wellFormed &&
11131 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11132 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11133 oldctxt->myDoc, cur);
11134 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011135#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011136 cur->parent = NULL;
11137 cur = cur->next;
11138 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011139 ctxt->myDoc->children->children = NULL;
11140 }
11141 if (ctxt->myDoc != NULL) {
11142 xmlFreeNode(ctxt->myDoc->children);
11143 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011144 }
11145
11146 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011147 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011148 ctxt->attsDefault = NULL;
11149 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011150 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011151 if (newDoc != NULL)
11152 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011153
11154 return(ret);
11155}
11156
Daniel Veillard81273902003-09-30 00:43:48 +000011157#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011158/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011159 * xmlParseBalancedChunkMemoryRecover:
11160 * @doc: the document the chunk pertains to
11161 * @sax: the SAX handler bloc (possibly NULL)
11162 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11163 * @depth: Used for loop detection, use 0
11164 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11165 * @lst: the return value for the set of parsed nodes
11166 * @recover: return nodes even if the data is broken (use 0)
11167 *
11168 *
11169 * Parse a well-balanced chunk of an XML document
11170 * called by the parser
11171 * The allowed sequence for the Well Balanced Chunk is the one defined by
11172 * the content production in the XML grammar:
11173 *
11174 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11175 *
11176 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11177 * the parser error code otherwise
11178 *
11179 * In case recover is set to 1, the nodelist will not be empty even if
11180 * the parsed chunk is not well balanced.
11181 */
11182int
11183xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11184 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11185 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011186 xmlParserCtxtPtr ctxt;
11187 xmlDocPtr newDoc;
11188 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011189 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011190 int size;
11191 int ret = 0;
11192
11193 if (depth > 40) {
11194 return(XML_ERR_ENTITY_LOOP);
11195 }
11196
11197
Daniel Veillardcda96922001-08-21 10:56:31 +000011198 if (lst != NULL)
11199 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011200 if (string == NULL)
11201 return(-1);
11202
11203 size = xmlStrlen(string);
11204
11205 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11206 if (ctxt == NULL) return(-1);
11207 ctxt->userData = ctxt;
11208 if (sax != NULL) {
11209 oldsax = ctxt->sax;
11210 ctxt->sax = sax;
11211 if (user_data != NULL)
11212 ctxt->userData = user_data;
11213 }
11214 newDoc = xmlNewDoc(BAD_CAST "1.0");
11215 if (newDoc == NULL) {
11216 xmlFreeParserCtxt(ctxt);
11217 return(-1);
11218 }
11219 if (doc != NULL) {
11220 newDoc->intSubset = doc->intSubset;
11221 newDoc->extSubset = doc->extSubset;
11222 }
11223 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11224 if (newDoc->children == NULL) {
11225 if (sax != NULL)
11226 ctxt->sax = oldsax;
11227 xmlFreeParserCtxt(ctxt);
11228 newDoc->intSubset = NULL;
11229 newDoc->extSubset = NULL;
11230 xmlFreeDoc(newDoc);
11231 return(-1);
11232 }
11233 nodePush(ctxt, newDoc->children);
11234 if (doc == NULL) {
11235 ctxt->myDoc = newDoc;
11236 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011237 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011238 newDoc->children->doc = doc;
11239 }
11240 ctxt->instate = XML_PARSER_CONTENT;
11241 ctxt->depth = depth;
11242
11243 /*
11244 * Doing validity checking on chunk doesn't make sense
11245 */
11246 ctxt->validate = 0;
11247 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011248 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011249
Daniel Veillardb39bc392002-10-26 19:29:51 +000011250 if ( doc != NULL ){
11251 content = doc->children;
11252 doc->children = NULL;
11253 xmlParseContent(ctxt);
11254 doc->children = content;
11255 }
11256 else {
11257 xmlParseContent(ctxt);
11258 }
Owen Taylor3473f882001-02-23 17:55:21 +000011259 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011260 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011261 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011262 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011263 }
11264 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011265 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011266 }
11267
11268 if (!ctxt->wellFormed) {
11269 if (ctxt->errNo == 0)
11270 ret = 1;
11271 else
11272 ret = ctxt->errNo;
11273 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011274 ret = 0;
11275 }
11276
11277 if (lst != NULL && (ret == 0 || recover == 1)) {
11278 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011279
11280 /*
11281 * Return the newly created nodeset after unlinking it from
11282 * they pseudo parent.
11283 */
11284 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011285 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011286 while (cur != NULL) {
11287 cur->parent = NULL;
11288 cur = cur->next;
11289 }
11290 newDoc->children->children = NULL;
11291 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011292
Owen Taylor3473f882001-02-23 17:55:21 +000011293 if (sax != NULL)
11294 ctxt->sax = oldsax;
11295 xmlFreeParserCtxt(ctxt);
11296 newDoc->intSubset = NULL;
11297 newDoc->extSubset = NULL;
11298 xmlFreeDoc(newDoc);
11299
11300 return(ret);
11301}
11302
11303/**
11304 * xmlSAXParseEntity:
11305 * @sax: the SAX handler block
11306 * @filename: the filename
11307 *
11308 * parse an XML external entity out of context and build a tree.
11309 * It use the given SAX function block to handle the parsing callback.
11310 * If sax is NULL, fallback to the default DOM tree building routines.
11311 *
11312 * [78] extParsedEnt ::= TextDecl? content
11313 *
11314 * This correspond to a "Well Balanced" chunk
11315 *
11316 * Returns the resulting document tree
11317 */
11318
11319xmlDocPtr
11320xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11321 xmlDocPtr ret;
11322 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011323
11324 ctxt = xmlCreateFileParserCtxt(filename);
11325 if (ctxt == NULL) {
11326 return(NULL);
11327 }
11328 if (sax != NULL) {
11329 if (ctxt->sax != NULL)
11330 xmlFree(ctxt->sax);
11331 ctxt->sax = sax;
11332 ctxt->userData = NULL;
11333 }
11334
Owen Taylor3473f882001-02-23 17:55:21 +000011335 xmlParseExtParsedEnt(ctxt);
11336
11337 if (ctxt->wellFormed)
11338 ret = ctxt->myDoc;
11339 else {
11340 ret = NULL;
11341 xmlFreeDoc(ctxt->myDoc);
11342 ctxt->myDoc = NULL;
11343 }
11344 if (sax != NULL)
11345 ctxt->sax = NULL;
11346 xmlFreeParserCtxt(ctxt);
11347
11348 return(ret);
11349}
11350
11351/**
11352 * xmlParseEntity:
11353 * @filename: the filename
11354 *
11355 * parse an XML external entity out of context and build a tree.
11356 *
11357 * [78] extParsedEnt ::= TextDecl? content
11358 *
11359 * This correspond to a "Well Balanced" chunk
11360 *
11361 * Returns the resulting document tree
11362 */
11363
11364xmlDocPtr
11365xmlParseEntity(const char *filename) {
11366 return(xmlSAXParseEntity(NULL, filename));
11367}
Daniel Veillard81273902003-09-30 00:43:48 +000011368#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011369
11370/**
11371 * xmlCreateEntityParserCtxt:
11372 * @URL: the entity URL
11373 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011374 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011375 *
11376 * Create a parser context for an external entity
11377 * Automatic support for ZLIB/Compress compressed document is provided
11378 * by default if found at compile-time.
11379 *
11380 * Returns the new parser context or NULL
11381 */
11382xmlParserCtxtPtr
11383xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11384 const xmlChar *base) {
11385 xmlParserCtxtPtr ctxt;
11386 xmlParserInputPtr inputStream;
11387 char *directory = NULL;
11388 xmlChar *uri;
11389
11390 ctxt = xmlNewParserCtxt();
11391 if (ctxt == NULL) {
11392 return(NULL);
11393 }
11394
11395 uri = xmlBuildURI(URL, base);
11396
11397 if (uri == NULL) {
11398 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11399 if (inputStream == NULL) {
11400 xmlFreeParserCtxt(ctxt);
11401 return(NULL);
11402 }
11403
11404 inputPush(ctxt, inputStream);
11405
11406 if ((ctxt->directory == NULL) && (directory == NULL))
11407 directory = xmlParserGetDirectory((char *)URL);
11408 if ((ctxt->directory == NULL) && (directory != NULL))
11409 ctxt->directory = directory;
11410 } else {
11411 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11412 if (inputStream == NULL) {
11413 xmlFree(uri);
11414 xmlFreeParserCtxt(ctxt);
11415 return(NULL);
11416 }
11417
11418 inputPush(ctxt, inputStream);
11419
11420 if ((ctxt->directory == NULL) && (directory == NULL))
11421 directory = xmlParserGetDirectory((char *)uri);
11422 if ((ctxt->directory == NULL) && (directory != NULL))
11423 ctxt->directory = directory;
11424 xmlFree(uri);
11425 }
Owen Taylor3473f882001-02-23 17:55:21 +000011426 return(ctxt);
11427}
11428
11429/************************************************************************
11430 * *
11431 * Front ends when parsing from a file *
11432 * *
11433 ************************************************************************/
11434
11435/**
11436 * xmlCreateFileParserCtxt:
11437 * @filename: the filename
11438 *
11439 * Create a parser context for a file content.
11440 * Automatic support for ZLIB/Compress compressed document is provided
11441 * by default if found at compile-time.
11442 *
11443 * Returns the new parser context or NULL
11444 */
11445xmlParserCtxtPtr
11446xmlCreateFileParserCtxt(const char *filename)
11447{
11448 xmlParserCtxtPtr ctxt;
11449 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011450 char *directory = NULL;
11451
Owen Taylor3473f882001-02-23 17:55:21 +000011452 ctxt = xmlNewParserCtxt();
11453 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011454 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011455 return(NULL);
11456 }
11457
Igor Zlatkovicce076162003-02-23 13:39:39 +000011458
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011459 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011460 if (inputStream == NULL) {
11461 xmlFreeParserCtxt(ctxt);
11462 return(NULL);
11463 }
11464
Owen Taylor3473f882001-02-23 17:55:21 +000011465 inputPush(ctxt, inputStream);
11466 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011467 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011468 if ((ctxt->directory == NULL) && (directory != NULL))
11469 ctxt->directory = directory;
11470
11471 return(ctxt);
11472}
11473
Daniel Veillard81273902003-09-30 00:43:48 +000011474#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011475/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011476 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011477 * @sax: the SAX handler block
11478 * @filename: the filename
11479 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11480 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011481 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011482 *
11483 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11484 * compressed document is provided by default if found at compile-time.
11485 * It use the given SAX function block to handle the parsing callback.
11486 * If sax is NULL, fallback to the default DOM tree building routines.
11487 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011488 * User data (void *) is stored within the parser context in the
11489 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011490 *
Owen Taylor3473f882001-02-23 17:55:21 +000011491 * Returns the resulting document tree
11492 */
11493
11494xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011495xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11496 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011497 xmlDocPtr ret;
11498 xmlParserCtxtPtr ctxt;
11499 char *directory = NULL;
11500
Daniel Veillard635ef722001-10-29 11:48:19 +000011501 xmlInitParser();
11502
Owen Taylor3473f882001-02-23 17:55:21 +000011503 ctxt = xmlCreateFileParserCtxt(filename);
11504 if (ctxt == NULL) {
11505 return(NULL);
11506 }
11507 if (sax != NULL) {
11508 if (ctxt->sax != NULL)
11509 xmlFree(ctxt->sax);
11510 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011511 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011512 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011513 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011514 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011515 }
Owen Taylor3473f882001-02-23 17:55:21 +000011516
11517 if ((ctxt->directory == NULL) && (directory == NULL))
11518 directory = xmlParserGetDirectory(filename);
11519 if ((ctxt->directory == NULL) && (directory != NULL))
11520 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11521
Daniel Veillarddad3f682002-11-17 16:47:27 +000011522 ctxt->recovery = recovery;
11523
Owen Taylor3473f882001-02-23 17:55:21 +000011524 xmlParseDocument(ctxt);
11525
William M. Brackc07329e2003-09-08 01:57:30 +000011526 if ((ctxt->wellFormed) || recovery) {
11527 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011528 if (ret != NULL) {
11529 if (ctxt->input->buf->compressed > 0)
11530 ret->compression = 9;
11531 else
11532 ret->compression = ctxt->input->buf->compressed;
11533 }
William M. Brackc07329e2003-09-08 01:57:30 +000011534 }
Owen Taylor3473f882001-02-23 17:55:21 +000011535 else {
11536 ret = NULL;
11537 xmlFreeDoc(ctxt->myDoc);
11538 ctxt->myDoc = NULL;
11539 }
11540 if (sax != NULL)
11541 ctxt->sax = NULL;
11542 xmlFreeParserCtxt(ctxt);
11543
11544 return(ret);
11545}
11546
11547/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011548 * xmlSAXParseFile:
11549 * @sax: the SAX handler block
11550 * @filename: the filename
11551 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11552 * documents
11553 *
11554 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11555 * compressed document is provided by default if found at compile-time.
11556 * It use the given SAX function block to handle the parsing callback.
11557 * If sax is NULL, fallback to the default DOM tree building routines.
11558 *
11559 * Returns the resulting document tree
11560 */
11561
11562xmlDocPtr
11563xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11564 int recovery) {
11565 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11566}
11567
11568/**
Owen Taylor3473f882001-02-23 17:55:21 +000011569 * xmlRecoverDoc:
11570 * @cur: a pointer to an array of xmlChar
11571 *
11572 * parse an XML in-memory document and build a tree.
11573 * In the case the document is not Well Formed, a tree is built anyway
11574 *
11575 * Returns the resulting document tree
11576 */
11577
11578xmlDocPtr
11579xmlRecoverDoc(xmlChar *cur) {
11580 return(xmlSAXParseDoc(NULL, cur, 1));
11581}
11582
11583/**
11584 * xmlParseFile:
11585 * @filename: the filename
11586 *
11587 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11588 * compressed document is provided by default if found at compile-time.
11589 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011590 * Returns the resulting document tree if the file was wellformed,
11591 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011592 */
11593
11594xmlDocPtr
11595xmlParseFile(const char *filename) {
11596 return(xmlSAXParseFile(NULL, filename, 0));
11597}
11598
11599/**
11600 * xmlRecoverFile:
11601 * @filename: the filename
11602 *
11603 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11604 * compressed document is provided by default if found at compile-time.
11605 * In the case the document is not Well Formed, a tree is built anyway
11606 *
11607 * Returns the resulting document tree
11608 */
11609
11610xmlDocPtr
11611xmlRecoverFile(const char *filename) {
11612 return(xmlSAXParseFile(NULL, filename, 1));
11613}
11614
11615
11616/**
11617 * xmlSetupParserForBuffer:
11618 * @ctxt: an XML parser context
11619 * @buffer: a xmlChar * buffer
11620 * @filename: a file name
11621 *
11622 * Setup the parser context to parse a new buffer; Clears any prior
11623 * contents from the parser context. The buffer parameter must not be
11624 * NULL, but the filename parameter can be
11625 */
11626void
11627xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11628 const char* filename)
11629{
11630 xmlParserInputPtr input;
11631
11632 input = xmlNewInputStream(ctxt);
11633 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011634 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011635 xmlFree(ctxt);
11636 return;
11637 }
11638
11639 xmlClearParserCtxt(ctxt);
11640 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011641 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011642 input->base = buffer;
11643 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011644 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011645 inputPush(ctxt, input);
11646}
11647
11648/**
11649 * xmlSAXUserParseFile:
11650 * @sax: a SAX handler
11651 * @user_data: The user data returned on SAX callbacks
11652 * @filename: a file name
11653 *
11654 * parse an XML file and call the given SAX handler routines.
11655 * Automatic support for ZLIB/Compress compressed document is provided
11656 *
11657 * Returns 0 in case of success or a error number otherwise
11658 */
11659int
11660xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11661 const char *filename) {
11662 int ret = 0;
11663 xmlParserCtxtPtr ctxt;
11664
11665 ctxt = xmlCreateFileParserCtxt(filename);
11666 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011667#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011668 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011669#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011670 xmlFree(ctxt->sax);
11671 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011672 xmlDetectSAX2(ctxt);
11673
Owen Taylor3473f882001-02-23 17:55:21 +000011674 if (user_data != NULL)
11675 ctxt->userData = user_data;
11676
11677 xmlParseDocument(ctxt);
11678
11679 if (ctxt->wellFormed)
11680 ret = 0;
11681 else {
11682 if (ctxt->errNo != 0)
11683 ret = ctxt->errNo;
11684 else
11685 ret = -1;
11686 }
11687 if (sax != NULL)
11688 ctxt->sax = NULL;
11689 xmlFreeParserCtxt(ctxt);
11690
11691 return ret;
11692}
Daniel Veillard81273902003-09-30 00:43:48 +000011693#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011694
11695/************************************************************************
11696 * *
11697 * Front ends when parsing from memory *
11698 * *
11699 ************************************************************************/
11700
11701/**
11702 * xmlCreateMemoryParserCtxt:
11703 * @buffer: a pointer to a char array
11704 * @size: the size of the array
11705 *
11706 * Create a parser context for an XML in-memory document.
11707 *
11708 * Returns the new parser context or NULL
11709 */
11710xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011711xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011712 xmlParserCtxtPtr ctxt;
11713 xmlParserInputPtr input;
11714 xmlParserInputBufferPtr buf;
11715
11716 if (buffer == NULL)
11717 return(NULL);
11718 if (size <= 0)
11719 return(NULL);
11720
11721 ctxt = xmlNewParserCtxt();
11722 if (ctxt == NULL)
11723 return(NULL);
11724
Daniel Veillard53350552003-09-18 13:35:51 +000011725 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011726 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011727 if (buf == NULL) {
11728 xmlFreeParserCtxt(ctxt);
11729 return(NULL);
11730 }
Owen Taylor3473f882001-02-23 17:55:21 +000011731
11732 input = xmlNewInputStream(ctxt);
11733 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011734 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011735 xmlFreeParserCtxt(ctxt);
11736 return(NULL);
11737 }
11738
11739 input->filename = NULL;
11740 input->buf = buf;
11741 input->base = input->buf->buffer->content;
11742 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011743 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011744
11745 inputPush(ctxt, input);
11746 return(ctxt);
11747}
11748
Daniel Veillard81273902003-09-30 00:43:48 +000011749#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011750/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011751 * xmlSAXParseMemoryWithData:
11752 * @sax: the SAX handler block
11753 * @buffer: an pointer to a char array
11754 * @size: the size of the array
11755 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11756 * documents
11757 * @data: the userdata
11758 *
11759 * parse an XML in-memory block and use the given SAX function block
11760 * to handle the parsing callback. If sax is NULL, fallback to the default
11761 * DOM tree building routines.
11762 *
11763 * User data (void *) is stored within the parser context in the
11764 * context's _private member, so it is available nearly everywhere in libxml
11765 *
11766 * Returns the resulting document tree
11767 */
11768
11769xmlDocPtr
11770xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11771 int size, int recovery, void *data) {
11772 xmlDocPtr ret;
11773 xmlParserCtxtPtr ctxt;
11774
11775 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11776 if (ctxt == NULL) return(NULL);
11777 if (sax != NULL) {
11778 if (ctxt->sax != NULL)
11779 xmlFree(ctxt->sax);
11780 ctxt->sax = sax;
11781 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011782 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011783 if (data!=NULL) {
11784 ctxt->_private=data;
11785 }
11786
Daniel Veillardadba5f12003-04-04 16:09:01 +000011787 ctxt->recovery = recovery;
11788
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011789 xmlParseDocument(ctxt);
11790
11791 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11792 else {
11793 ret = NULL;
11794 xmlFreeDoc(ctxt->myDoc);
11795 ctxt->myDoc = NULL;
11796 }
11797 if (sax != NULL)
11798 ctxt->sax = NULL;
11799 xmlFreeParserCtxt(ctxt);
11800
11801 return(ret);
11802}
11803
11804/**
Owen Taylor3473f882001-02-23 17:55:21 +000011805 * xmlSAXParseMemory:
11806 * @sax: the SAX handler block
11807 * @buffer: an pointer to a char array
11808 * @size: the size of the array
11809 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11810 * documents
11811 *
11812 * parse an XML in-memory block and use the given SAX function block
11813 * to handle the parsing callback. If sax is NULL, fallback to the default
11814 * DOM tree building routines.
11815 *
11816 * Returns the resulting document tree
11817 */
11818xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011819xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11820 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011821 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011822}
11823
11824/**
11825 * xmlParseMemory:
11826 * @buffer: an pointer to a char array
11827 * @size: the size of the array
11828 *
11829 * parse an XML in-memory block and build a tree.
11830 *
11831 * Returns the resulting document tree
11832 */
11833
Daniel Veillard50822cb2001-07-26 20:05:51 +000011834xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011835 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11836}
11837
11838/**
11839 * xmlRecoverMemory:
11840 * @buffer: an pointer to a char array
11841 * @size: the size of the array
11842 *
11843 * parse an XML in-memory block and build a tree.
11844 * In the case the document is not Well Formed, a tree is built anyway
11845 *
11846 * Returns the resulting document tree
11847 */
11848
Daniel Veillard50822cb2001-07-26 20:05:51 +000011849xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011850 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11851}
11852
11853/**
11854 * xmlSAXUserParseMemory:
11855 * @sax: a SAX handler
11856 * @user_data: The user data returned on SAX callbacks
11857 * @buffer: an in-memory XML document input
11858 * @size: the length of the XML document in bytes
11859 *
11860 * A better SAX parsing routine.
11861 * parse an XML in-memory buffer and call the given SAX handler routines.
11862 *
11863 * Returns 0 in case of success or a error number otherwise
11864 */
11865int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011866 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011867 int ret = 0;
11868 xmlParserCtxtPtr ctxt;
11869 xmlSAXHandlerPtr oldsax = NULL;
11870
Daniel Veillard9e923512002-08-14 08:48:52 +000011871 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011872 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11873 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011874 oldsax = ctxt->sax;
11875 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011876 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011877 if (user_data != NULL)
11878 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011879
11880 xmlParseDocument(ctxt);
11881
11882 if (ctxt->wellFormed)
11883 ret = 0;
11884 else {
11885 if (ctxt->errNo != 0)
11886 ret = ctxt->errNo;
11887 else
11888 ret = -1;
11889 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011890 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011891 xmlFreeParserCtxt(ctxt);
11892
11893 return ret;
11894}
Daniel Veillard81273902003-09-30 00:43:48 +000011895#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011896
11897/**
11898 * xmlCreateDocParserCtxt:
11899 * @cur: a pointer to an array of xmlChar
11900 *
11901 * Creates a parser context for an XML in-memory document.
11902 *
11903 * Returns the new parser context or NULL
11904 */
11905xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011906xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011907 int len;
11908
11909 if (cur == NULL)
11910 return(NULL);
11911 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011912 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011913}
11914
Daniel Veillard81273902003-09-30 00:43:48 +000011915#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011916/**
11917 * xmlSAXParseDoc:
11918 * @sax: the SAX handler block
11919 * @cur: a pointer to an array of xmlChar
11920 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11921 * documents
11922 *
11923 * parse an XML in-memory document and build a tree.
11924 * It use the given SAX function block to handle the parsing callback.
11925 * If sax is NULL, fallback to the default DOM tree building routines.
11926 *
11927 * Returns the resulting document tree
11928 */
11929
11930xmlDocPtr
11931xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11932 xmlDocPtr ret;
11933 xmlParserCtxtPtr ctxt;
11934
11935 if (cur == NULL) return(NULL);
11936
11937
11938 ctxt = xmlCreateDocParserCtxt(cur);
11939 if (ctxt == NULL) return(NULL);
11940 if (sax != NULL) {
11941 ctxt->sax = sax;
11942 ctxt->userData = NULL;
11943 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011944 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011945
11946 xmlParseDocument(ctxt);
11947 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11948 else {
11949 ret = NULL;
11950 xmlFreeDoc(ctxt->myDoc);
11951 ctxt->myDoc = NULL;
11952 }
11953 if (sax != NULL)
11954 ctxt->sax = NULL;
11955 xmlFreeParserCtxt(ctxt);
11956
11957 return(ret);
11958}
11959
11960/**
11961 * xmlParseDoc:
11962 * @cur: a pointer to an array of xmlChar
11963 *
11964 * parse an XML in-memory document and build a tree.
11965 *
11966 * Returns the resulting document tree
11967 */
11968
11969xmlDocPtr
11970xmlParseDoc(xmlChar *cur) {
11971 return(xmlSAXParseDoc(NULL, cur, 0));
11972}
Daniel Veillard81273902003-09-30 00:43:48 +000011973#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011974
Daniel Veillard81273902003-09-30 00:43:48 +000011975#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011976/************************************************************************
11977 * *
11978 * Specific function to keep track of entities references *
11979 * and used by the XSLT debugger *
11980 * *
11981 ************************************************************************/
11982
11983static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11984
11985/**
11986 * xmlAddEntityReference:
11987 * @ent : A valid entity
11988 * @firstNode : A valid first node for children of entity
11989 * @lastNode : A valid last node of children entity
11990 *
11991 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11992 */
11993static void
11994xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11995 xmlNodePtr lastNode)
11996{
11997 if (xmlEntityRefFunc != NULL) {
11998 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11999 }
12000}
12001
12002
12003/**
12004 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012005 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012006 *
12007 * Set the function to call call back when a xml reference has been made
12008 */
12009void
12010xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12011{
12012 xmlEntityRefFunc = func;
12013}
Daniel Veillard81273902003-09-30 00:43:48 +000012014#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012015
12016/************************************************************************
12017 * *
12018 * Miscellaneous *
12019 * *
12020 ************************************************************************/
12021
12022#ifdef LIBXML_XPATH_ENABLED
12023#include <libxml/xpath.h>
12024#endif
12025
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012026extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012027static int xmlParserInitialized = 0;
12028
12029/**
12030 * xmlInitParser:
12031 *
12032 * Initialization function for the XML parser.
12033 * This is not reentrant. Call once before processing in case of
12034 * use in multithreaded programs.
12035 */
12036
12037void
12038xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012039 if (xmlParserInitialized != 0)
12040 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012041
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012042 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12043 (xmlGenericError == NULL))
12044 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012045 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012046 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012047 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012048 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012049 xmlDefaultSAXHandlerInit();
12050 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012051#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012052 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012053#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012054#ifdef LIBXML_HTML_ENABLED
12055 htmlInitAutoClose();
12056 htmlDefaultSAXHandlerInit();
12057#endif
12058#ifdef LIBXML_XPATH_ENABLED
12059 xmlXPathInit();
12060#endif
12061 xmlParserInitialized = 1;
12062}
12063
12064/**
12065 * xmlCleanupParser:
12066 *
12067 * Cleanup function for the XML parser. It tries to reclaim all
12068 * parsing related global memory allocated for the parser processing.
12069 * It doesn't deallocate any document related memory. Calling this
12070 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000012071 * One should call xmlCleanupParser() only when the process has
12072 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012073 */
12074
12075void
12076xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012077 if (!xmlParserInitialized)
12078 return;
12079
Owen Taylor3473f882001-02-23 17:55:21 +000012080 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012081#ifdef LIBXML_CATALOG_ENABLED
12082 xmlCatalogCleanup();
12083#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012084 xmlCleanupInputCallbacks();
12085#ifdef LIBXML_OUTPUT_ENABLED
12086 xmlCleanupOutputCallbacks();
12087#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012088 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012089 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012090 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000012091 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012092}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012093
12094/************************************************************************
12095 * *
12096 * New set (2.6.0) of simpler and more flexible APIs *
12097 * *
12098 ************************************************************************/
12099
12100/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012101 * DICT_FREE:
12102 * @str: a string
12103 *
12104 * Free a string if it is not owned by the "dict" dictionnary in the
12105 * current scope
12106 */
12107#define DICT_FREE(str) \
12108 if ((str) && ((!dict) || \
12109 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12110 xmlFree((char *)(str));
12111
12112/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012113 * xmlCtxtReset:
12114 * @ctxt: an XML parser context
12115 *
12116 * Reset a parser context
12117 */
12118void
12119xmlCtxtReset(xmlParserCtxtPtr ctxt)
12120{
12121 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012122 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012123
12124 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12125 xmlFreeInputStream(input);
12126 }
12127 ctxt->inputNr = 0;
12128 ctxt->input = NULL;
12129
12130 ctxt->spaceNr = 0;
12131 ctxt->spaceTab[0] = -1;
12132 ctxt->space = &ctxt->spaceTab[0];
12133
12134
12135 ctxt->nodeNr = 0;
12136 ctxt->node = NULL;
12137
12138 ctxt->nameNr = 0;
12139 ctxt->name = NULL;
12140
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012141 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012142 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012143 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012144 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012145 DICT_FREE(ctxt->directory);
12146 ctxt->directory = NULL;
12147 DICT_FREE(ctxt->extSubURI);
12148 ctxt->extSubURI = NULL;
12149 DICT_FREE(ctxt->extSubSystem);
12150 ctxt->extSubSystem = NULL;
12151 if (ctxt->myDoc != NULL)
12152 xmlFreeDoc(ctxt->myDoc);
12153 ctxt->myDoc = NULL;
12154
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012155 ctxt->standalone = -1;
12156 ctxt->hasExternalSubset = 0;
12157 ctxt->hasPErefs = 0;
12158 ctxt->html = 0;
12159 ctxt->external = 0;
12160 ctxt->instate = XML_PARSER_START;
12161 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012162
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012163 ctxt->wellFormed = 1;
12164 ctxt->nsWellFormed = 1;
12165 ctxt->valid = 1;
12166 ctxt->vctxt.userData = ctxt;
12167 ctxt->vctxt.error = xmlParserValidityError;
12168 ctxt->vctxt.warning = xmlParserValidityWarning;
12169 ctxt->record_info = 0;
12170 ctxt->nbChars = 0;
12171 ctxt->checkIndex = 0;
12172 ctxt->inSubset = 0;
12173 ctxt->errNo = XML_ERR_OK;
12174 ctxt->depth = 0;
12175 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12176 ctxt->catalogs = NULL;
12177 xmlInitNodeInfoSeq(&ctxt->node_seq);
12178
12179 if (ctxt->attsDefault != NULL) {
12180 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12181 ctxt->attsDefault = NULL;
12182 }
12183 if (ctxt->attsSpecial != NULL) {
12184 xmlHashFree(ctxt->attsSpecial, NULL);
12185 ctxt->attsSpecial = NULL;
12186 }
12187
Daniel Veillard4432df22003-09-28 18:58:27 +000012188#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012189 if (ctxt->catalogs != NULL)
12190 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012191#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012192 if (ctxt->lastError.code != XML_ERR_OK)
12193 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012194}
12195
12196/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012197 * xmlCtxtResetPush:
12198 * @ctxt: an XML parser context
12199 * @chunk: a pointer to an array of chars
12200 * @size: number of chars in the array
12201 * @filename: an optional file name or URI
12202 * @encoding: the document encoding, or NULL
12203 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012204 * Reset a push parser context
12205 *
12206 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012207 */
12208int
12209xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12210 int size, const char *filename, const char *encoding)
12211{
12212 xmlParserInputPtr inputStream;
12213 xmlParserInputBufferPtr buf;
12214 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12215
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012216 if (ctxt == NULL)
12217 return(1);
12218
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012219 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12220 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12221
12222 buf = xmlAllocParserInputBuffer(enc);
12223 if (buf == NULL)
12224 return(1);
12225
12226 if (ctxt == NULL) {
12227 xmlFreeParserInputBuffer(buf);
12228 return(1);
12229 }
12230
12231 xmlCtxtReset(ctxt);
12232
12233 if (ctxt->pushTab == NULL) {
12234 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12235 sizeof(xmlChar *));
12236 if (ctxt->pushTab == NULL) {
12237 xmlErrMemory(ctxt, NULL);
12238 xmlFreeParserInputBuffer(buf);
12239 return(1);
12240 }
12241 }
12242
12243 if (filename == NULL) {
12244 ctxt->directory = NULL;
12245 } else {
12246 ctxt->directory = xmlParserGetDirectory(filename);
12247 }
12248
12249 inputStream = xmlNewInputStream(ctxt);
12250 if (inputStream == NULL) {
12251 xmlFreeParserInputBuffer(buf);
12252 return(1);
12253 }
12254
12255 if (filename == NULL)
12256 inputStream->filename = NULL;
12257 else
12258 inputStream->filename = (char *)
12259 xmlCanonicPath((const xmlChar *) filename);
12260 inputStream->buf = buf;
12261 inputStream->base = inputStream->buf->buffer->content;
12262 inputStream->cur = inputStream->buf->buffer->content;
12263 inputStream->end =
12264 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12265
12266 inputPush(ctxt, inputStream);
12267
12268 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12269 (ctxt->input->buf != NULL)) {
12270 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12271 int cur = ctxt->input->cur - ctxt->input->base;
12272
12273 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12274
12275 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12276 ctxt->input->cur = ctxt->input->base + cur;
12277 ctxt->input->end =
12278 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12279 use];
12280#ifdef DEBUG_PUSH
12281 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12282#endif
12283 }
12284
12285 if (encoding != NULL) {
12286 xmlCharEncodingHandlerPtr hdlr;
12287
12288 hdlr = xmlFindCharEncodingHandler(encoding);
12289 if (hdlr != NULL) {
12290 xmlSwitchToEncoding(ctxt, hdlr);
12291 } else {
12292 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12293 "Unsupported encoding %s\n", BAD_CAST encoding);
12294 }
12295 } else if (enc != XML_CHAR_ENCODING_NONE) {
12296 xmlSwitchEncoding(ctxt, enc);
12297 }
12298
12299 return(0);
12300}
12301
12302/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012303 * xmlCtxtUseOptions:
12304 * @ctxt: an XML parser context
12305 * @options: a combination of xmlParserOption(s)
12306 *
12307 * Applies the options to the parser context
12308 *
12309 * Returns 0 in case of success, the set of unknown or unimplemented options
12310 * in case of error.
12311 */
12312int
12313xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12314{
12315 if (options & XML_PARSE_RECOVER) {
12316 ctxt->recovery = 1;
12317 options -= XML_PARSE_RECOVER;
12318 } else
12319 ctxt->recovery = 0;
12320 if (options & XML_PARSE_DTDLOAD) {
12321 ctxt->loadsubset = XML_DETECT_IDS;
12322 options -= XML_PARSE_DTDLOAD;
12323 } else
12324 ctxt->loadsubset = 0;
12325 if (options & XML_PARSE_DTDATTR) {
12326 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12327 options -= XML_PARSE_DTDATTR;
12328 }
12329 if (options & XML_PARSE_NOENT) {
12330 ctxt->replaceEntities = 1;
12331 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12332 options -= XML_PARSE_NOENT;
12333 } else
12334 ctxt->replaceEntities = 0;
12335 if (options & XML_PARSE_NOWARNING) {
12336 ctxt->sax->warning = NULL;
12337 options -= XML_PARSE_NOWARNING;
12338 }
12339 if (options & XML_PARSE_NOERROR) {
12340 ctxt->sax->error = NULL;
12341 ctxt->sax->fatalError = NULL;
12342 options -= XML_PARSE_NOERROR;
12343 }
12344 if (options & XML_PARSE_PEDANTIC) {
12345 ctxt->pedantic = 1;
12346 options -= XML_PARSE_PEDANTIC;
12347 } else
12348 ctxt->pedantic = 0;
12349 if (options & XML_PARSE_NOBLANKS) {
12350 ctxt->keepBlanks = 0;
12351 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12352 options -= XML_PARSE_NOBLANKS;
12353 } else
12354 ctxt->keepBlanks = 1;
12355 if (options & XML_PARSE_DTDVALID) {
12356 ctxt->validate = 1;
12357 if (options & XML_PARSE_NOWARNING)
12358 ctxt->vctxt.warning = NULL;
12359 if (options & XML_PARSE_NOERROR)
12360 ctxt->vctxt.error = NULL;
12361 options -= XML_PARSE_DTDVALID;
12362 } else
12363 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012364#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012365 if (options & XML_PARSE_SAX1) {
12366 ctxt->sax->startElement = xmlSAX2StartElement;
12367 ctxt->sax->endElement = xmlSAX2EndElement;
12368 ctxt->sax->startElementNs = NULL;
12369 ctxt->sax->endElementNs = NULL;
12370 ctxt->sax->initialized = 1;
12371 options -= XML_PARSE_SAX1;
12372 }
Daniel Veillard81273902003-09-30 00:43:48 +000012373#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012374 if (options & XML_PARSE_NODICT) {
12375 ctxt->dictNames = 0;
12376 options -= XML_PARSE_NODICT;
12377 } else {
12378 ctxt->dictNames = 1;
12379 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012380 if (options & XML_PARSE_NOCDATA) {
12381 ctxt->sax->cdataBlock = NULL;
12382 options -= XML_PARSE_NOCDATA;
12383 }
12384 if (options & XML_PARSE_NSCLEAN) {
12385 ctxt->options |= XML_PARSE_NSCLEAN;
12386 options -= XML_PARSE_NSCLEAN;
12387 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012388 return (options);
12389}
12390
12391/**
12392 * xmlDoRead:
12393 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012394 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012395 * @encoding: the document encoding, or NULL
12396 * @options: a combination of xmlParserOption(s)
12397 * @reuse: keep the context for reuse
12398 *
12399 * Common front-end for the xmlRead functions
12400 *
12401 * Returns the resulting document tree or NULL
12402 */
12403static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012404xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12405 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012406{
12407 xmlDocPtr ret;
12408
12409 xmlCtxtUseOptions(ctxt, options);
12410 if (encoding != NULL) {
12411 xmlCharEncodingHandlerPtr hdlr;
12412
12413 hdlr = xmlFindCharEncodingHandler(encoding);
12414 if (hdlr != NULL)
12415 xmlSwitchToEncoding(ctxt, hdlr);
12416 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012417 if ((URL != NULL) && (ctxt->input != NULL) &&
12418 (ctxt->input->filename == NULL))
12419 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012420 xmlParseDocument(ctxt);
12421 if ((ctxt->wellFormed) || ctxt->recovery)
12422 ret = ctxt->myDoc;
12423 else {
12424 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012425 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012426 if ((ctxt->dictNames) &&
12427 (ctxt->myDoc->dict == ctxt->dict))
12428 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012429 xmlFreeDoc(ctxt->myDoc);
12430 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012431 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012432 ctxt->myDoc = NULL;
12433 if (!reuse) {
12434 if ((ctxt->dictNames) &&
12435 (ret != NULL) &&
12436 (ret->dict == ctxt->dict))
12437 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012438 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012439 } else {
12440 /* Must duplicate the reference to the dictionary */
12441 if ((ctxt->dictNames) &&
12442 (ret != NULL) &&
12443 (ret->dict == ctxt->dict))
12444 xmlDictReference(ctxt->dict);
12445 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012446
12447 return (ret);
12448}
12449
12450/**
12451 * xmlReadDoc:
12452 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012453 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012454 * @encoding: the document encoding, or NULL
12455 * @options: a combination of xmlParserOption(s)
12456 *
12457 * parse an XML in-memory document and build a tree.
12458 *
12459 * Returns the resulting document tree
12460 */
12461xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012462xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012463{
12464 xmlParserCtxtPtr ctxt;
12465
12466 if (cur == NULL)
12467 return (NULL);
12468
12469 ctxt = xmlCreateDocParserCtxt(cur);
12470 if (ctxt == NULL)
12471 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012472 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012473}
12474
12475/**
12476 * xmlReadFile:
12477 * @filename: a file or URL
12478 * @encoding: the document encoding, or NULL
12479 * @options: a combination of xmlParserOption(s)
12480 *
12481 * parse an XML file from the filesystem or the network.
12482 *
12483 * Returns the resulting document tree
12484 */
12485xmlDocPtr
12486xmlReadFile(const char *filename, const char *encoding, int options)
12487{
12488 xmlParserCtxtPtr ctxt;
12489
12490 ctxt = xmlCreateFileParserCtxt(filename);
12491 if (ctxt == NULL)
12492 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012493 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012494}
12495
12496/**
12497 * xmlReadMemory:
12498 * @buffer: a pointer to a char array
12499 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012500 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012501 * @encoding: the document encoding, or NULL
12502 * @options: a combination of xmlParserOption(s)
12503 *
12504 * parse an XML in-memory document and build a tree.
12505 *
12506 * Returns the resulting document tree
12507 */
12508xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012509xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012510{
12511 xmlParserCtxtPtr ctxt;
12512
12513 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12514 if (ctxt == NULL)
12515 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012516 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012517}
12518
12519/**
12520 * xmlReadFd:
12521 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012522 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012523 * @encoding: the document encoding, or NULL
12524 * @options: a combination of xmlParserOption(s)
12525 *
12526 * parse an XML from a file descriptor and build a tree.
12527 *
12528 * Returns the resulting document tree
12529 */
12530xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012531xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012532{
12533 xmlParserCtxtPtr ctxt;
12534 xmlParserInputBufferPtr input;
12535 xmlParserInputPtr stream;
12536
12537 if (fd < 0)
12538 return (NULL);
12539
12540 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12541 if (input == NULL)
12542 return (NULL);
12543 ctxt = xmlNewParserCtxt();
12544 if (ctxt == NULL) {
12545 xmlFreeParserInputBuffer(input);
12546 return (NULL);
12547 }
12548 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12549 if (stream == NULL) {
12550 xmlFreeParserInputBuffer(input);
12551 xmlFreeParserCtxt(ctxt);
12552 return (NULL);
12553 }
12554 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012555 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012556}
12557
12558/**
12559 * xmlReadIO:
12560 * @ioread: an I/O read function
12561 * @ioclose: an I/O close function
12562 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012563 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012564 * @encoding: the document encoding, or NULL
12565 * @options: a combination of xmlParserOption(s)
12566 *
12567 * parse an XML document from I/O functions and source and build a tree.
12568 *
12569 * Returns the resulting document tree
12570 */
12571xmlDocPtr
12572xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012573 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012574{
12575 xmlParserCtxtPtr ctxt;
12576 xmlParserInputBufferPtr input;
12577 xmlParserInputPtr stream;
12578
12579 if (ioread == NULL)
12580 return (NULL);
12581
12582 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12583 XML_CHAR_ENCODING_NONE);
12584 if (input == NULL)
12585 return (NULL);
12586 ctxt = xmlNewParserCtxt();
12587 if (ctxt == NULL) {
12588 xmlFreeParserInputBuffer(input);
12589 return (NULL);
12590 }
12591 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12592 if (stream == NULL) {
12593 xmlFreeParserInputBuffer(input);
12594 xmlFreeParserCtxt(ctxt);
12595 return (NULL);
12596 }
12597 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012598 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599}
12600
12601/**
12602 * xmlCtxtReadDoc:
12603 * @ctxt: an XML parser context
12604 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012605 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012606 * @encoding: the document encoding, or NULL
12607 * @options: a combination of xmlParserOption(s)
12608 *
12609 * parse an XML in-memory document and build a tree.
12610 * This reuses the existing @ctxt parser context
12611 *
12612 * Returns the resulting document tree
12613 */
12614xmlDocPtr
12615xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012616 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012617{
12618 xmlParserInputPtr stream;
12619
12620 if (cur == NULL)
12621 return (NULL);
12622 if (ctxt == NULL)
12623 return (NULL);
12624
12625 xmlCtxtReset(ctxt);
12626
12627 stream = xmlNewStringInputStream(ctxt, cur);
12628 if (stream == NULL) {
12629 return (NULL);
12630 }
12631 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012632 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012633}
12634
12635/**
12636 * xmlCtxtReadFile:
12637 * @ctxt: an XML parser context
12638 * @filename: a file or URL
12639 * @encoding: the document encoding, or NULL
12640 * @options: a combination of xmlParserOption(s)
12641 *
12642 * parse an XML file from the filesystem or the network.
12643 * This reuses the existing @ctxt parser context
12644 *
12645 * Returns the resulting document tree
12646 */
12647xmlDocPtr
12648xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12649 const char *encoding, int options)
12650{
12651 xmlParserInputPtr stream;
12652
12653 if (filename == NULL)
12654 return (NULL);
12655 if (ctxt == NULL)
12656 return (NULL);
12657
12658 xmlCtxtReset(ctxt);
12659
12660 stream = xmlNewInputFromFile(ctxt, filename);
12661 if (stream == NULL) {
12662 return (NULL);
12663 }
12664 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012665 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012666}
12667
12668/**
12669 * xmlCtxtReadMemory:
12670 * @ctxt: an XML parser context
12671 * @buffer: a pointer to a char array
12672 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012673 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012674 * @encoding: the document encoding, or NULL
12675 * @options: a combination of xmlParserOption(s)
12676 *
12677 * parse an XML in-memory document and build a tree.
12678 * This reuses the existing @ctxt parser context
12679 *
12680 * Returns the resulting document tree
12681 */
12682xmlDocPtr
12683xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012684 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012685{
12686 xmlParserInputBufferPtr input;
12687 xmlParserInputPtr stream;
12688
12689 if (ctxt == NULL)
12690 return (NULL);
12691 if (buffer == NULL)
12692 return (NULL);
12693
12694 xmlCtxtReset(ctxt);
12695
12696 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12697 if (input == NULL) {
12698 return(NULL);
12699 }
12700
12701 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12702 if (stream == NULL) {
12703 xmlFreeParserInputBuffer(input);
12704 return(NULL);
12705 }
12706
12707 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012708 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012709}
12710
12711/**
12712 * xmlCtxtReadFd:
12713 * @ctxt: an XML parser context
12714 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012715 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012716 * @encoding: the document encoding, or NULL
12717 * @options: a combination of xmlParserOption(s)
12718 *
12719 * parse an XML from a file descriptor and build a tree.
12720 * This reuses the existing @ctxt parser context
12721 *
12722 * Returns the resulting document tree
12723 */
12724xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012725xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12726 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012727{
12728 xmlParserInputBufferPtr input;
12729 xmlParserInputPtr stream;
12730
12731 if (fd < 0)
12732 return (NULL);
12733 if (ctxt == NULL)
12734 return (NULL);
12735
12736 xmlCtxtReset(ctxt);
12737
12738
12739 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12740 if (input == NULL)
12741 return (NULL);
12742 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12743 if (stream == NULL) {
12744 xmlFreeParserInputBuffer(input);
12745 return (NULL);
12746 }
12747 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012748 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012749}
12750
12751/**
12752 * xmlCtxtReadIO:
12753 * @ctxt: an XML parser context
12754 * @ioread: an I/O read function
12755 * @ioclose: an I/O close function
12756 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012757 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012758 * @encoding: the document encoding, or NULL
12759 * @options: a combination of xmlParserOption(s)
12760 *
12761 * parse an XML document from I/O functions and source and build a tree.
12762 * This reuses the existing @ctxt parser context
12763 *
12764 * Returns the resulting document tree
12765 */
12766xmlDocPtr
12767xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12768 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012769 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012770 const char *encoding, int options)
12771{
12772 xmlParserInputBufferPtr input;
12773 xmlParserInputPtr stream;
12774
12775 if (ioread == NULL)
12776 return (NULL);
12777 if (ctxt == NULL)
12778 return (NULL);
12779
12780 xmlCtxtReset(ctxt);
12781
12782 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12783 XML_CHAR_ENCODING_NONE);
12784 if (input == NULL)
12785 return (NULL);
12786 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12787 if (stream == NULL) {
12788 xmlFreeParserInputBuffer(input);
12789 return (NULL);
12790 }
12791 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012792 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012793}