blob: cf2e251f98ac25a79b7ec330bafbcbf40c666c52 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
1847 buffer##_size *= 2; \
1848 buffer = (xmlChar *) \
1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001850 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001851}
1852
1853/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001854 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * @ctxt: the parser context
1856 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001857 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1859 * @end: an end marker xmlChar, 0 if none
1860 * @end2: an end marker xmlChar, 0 if none
1861 * @end3: an end marker xmlChar, 0 if none
1862 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001863 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001864 *
1865 * [67] Reference ::= EntityRef | CharRef
1866 *
1867 * [69] PEReference ::= '%' Name ';'
1868 *
1869 * Returns A newly allocated string with the substitution done. The caller
1870 * must deallocate it !
1871 */
1872xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1874 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 xmlChar *buffer = NULL;
1876 int buffer_size = 0;
1877
1878 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlEntityPtr ent;
1881 int c,l;
1882 int nbchars = 0;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001885 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001887
1888 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 return(NULL);
1891 }
1892
1893 /*
1894 * allocate a translation buffer.
1895 */
1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001898 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 * we are operating on already parsed values.
1903 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 if (str < last)
1905 c = CUR_SCHAR(str, l);
1906 else
1907 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001908 while ((c != 0) && (c != end) && /* non input consuming loop */
1909 (c != end2) && (c != end3)) {
1910
1911 if (c == 0) break;
1912 if ((c == '&') && (str[1] == '#')) {
1913 int val = xmlParseStringCharRef(ctxt, &str);
1914 if (val != 0) {
1915 COPY_BUF(0,buffer,nbchars,val);
1916 }
1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext,
1920 "String decoding Entity Reference: %.30s\n",
1921 str);
1922 ent = xmlParseStringEntityRef(ctxt, &str);
1923 if ((ent != NULL) &&
1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1925 if (ent->content != NULL) {
1926 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1927 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1929 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931 } else if ((ent != NULL) && (ent->content != NULL)) {
1932 xmlChar *rep;
1933
1934 ctxt->depth++;
1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1936 0, 0, 0);
1937 ctxt->depth--;
1938 if (rep != NULL) {
1939 current = rep;
1940 while (*current != 0) { /* non input consuming loop */
1941 buffer[nbchars++] = *current++;
1942 if (nbchars >
1943 buffer_size - XML_PARSER_BUFFER_SIZE) {
1944 growBuffer(buffer);
1945 }
1946 }
1947 xmlFree(rep);
1948 }
1949 } else if (ent != NULL) {
1950 int i = xmlStrlen(ent->name);
1951 const xmlChar *cur = ent->name;
1952
1953 buffer[nbchars++] = '&';
1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1955 growBuffer(buffer);
1956 }
1957 for (;i > 0;i--)
1958 buffer[nbchars++] = *cur++;
1959 buffer[nbchars++] = ';';
1960 }
1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "String decoding PE Reference: %.30s\n", str);
1965 ent = xmlParseStringPEReference(ctxt, &str);
1966 if (ent != NULL) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 }
1985 } else {
1986 COPY_BUF(l,buffer,nbchars,c);
1987 str += l;
1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001992 if (str < last)
1993 c = CUR_SCHAR(str, l);
1994 else
1995 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 }
1997 buffer[nbchars++] = 0;
1998 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001999
2000mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002002 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003}
2004
Daniel Veillarde57ec792003-09-10 10:50:59 +00002005/**
2006 * xmlStringDecodeEntities:
2007 * @ctxt: the parser context
2008 * @str: the input string
2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2010 * @end: an end marker xmlChar, 0 if none
2011 * @end2: an end marker xmlChar, 0 if none
2012 * @end3: an end marker xmlChar, 0 if none
2013 *
2014 * Takes a entity string content and process to do the adequate substitutions.
2015 *
2016 * [67] Reference ::= EntityRef | CharRef
2017 *
2018 * [69] PEReference ::= '%' Name ';'
2019 *
2020 * Returns A newly allocated string with the substitution done. The caller
2021 * must deallocate it !
2022 */
2023xmlChar *
2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2025 xmlChar end, xmlChar end2, xmlChar end3) {
2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2027 end, end2, end3));
2028}
Owen Taylor3473f882001-02-23 17:55:21 +00002029
2030/************************************************************************
2031 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002032 * Commodity functions, cleanup needed ? *
2033 * *
2034 ************************************************************************/
2035
2036/**
2037 * areBlanks:
2038 * @ctxt: an XML parser context
2039 * @str: a xmlChar *
2040 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002041 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002042 *
2043 * Is this a sequence of blank chars that one can ignore ?
2044 *
2045 * Returns 1 if ignorable 0 otherwise.
2046 */
2047
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002048static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2049 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002050 int i, ret;
2051 xmlNodePtr lastChild;
2052
Daniel Veillard05c13a22001-09-09 08:38:09 +00002053 /*
2054 * Don't spend time trying to differentiate them, the same callback is
2055 * used !
2056 */
2057 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002058 return(0);
2059
Owen Taylor3473f882001-02-23 17:55:21 +00002060 /*
2061 * Check for xml:space value.
2062 */
2063 if (*(ctxt->space) == 1)
2064 return(0);
2065
2066 /*
2067 * Check that the string is made of blanks
2068 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002069 if (blank_chars == 0) {
2070 for (i = 0;i < len;i++)
2071 if (!(IS_BLANK_CH(str[i]))) return(0);
2072 }
Owen Taylor3473f882001-02-23 17:55:21 +00002073
2074 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002075 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002076 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002077 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002078 if (ctxt->myDoc != NULL) {
2079 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2080 if (ret == 0) return(1);
2081 if (ret == 1) return(0);
2082 }
2083
2084 /*
2085 * Otherwise, heuristic :-\
2086 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002087 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002088 if ((ctxt->node->children == NULL) &&
2089 (RAW == '<') && (NXT(1) == '/')) return(0);
2090
2091 lastChild = xmlGetLastChild(ctxt->node);
2092 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002093 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2094 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 } else if (xmlNodeIsText(lastChild))
2096 return(0);
2097 else if ((ctxt->node->children != NULL) &&
2098 (xmlNodeIsText(ctxt->node->children)))
2099 return(0);
2100 return(1);
2101}
2102
Owen Taylor3473f882001-02-23 17:55:21 +00002103/************************************************************************
2104 * *
2105 * Extra stuff for namespace support *
2106 * Relates to http://www.w3.org/TR/WD-xml-names *
2107 * *
2108 ************************************************************************/
2109
2110/**
2111 * xmlSplitQName:
2112 * @ctxt: an XML parser context
2113 * @name: an XML parser context
2114 * @prefix: a xmlChar **
2115 *
2116 * parse an UTF8 encoded XML qualified name string
2117 *
2118 * [NS 5] QName ::= (Prefix ':')? LocalPart
2119 *
2120 * [NS 6] Prefix ::= NCName
2121 *
2122 * [NS 7] LocalPart ::= NCName
2123 *
2124 * Returns the local part, and prefix is updated
2125 * to get the Prefix if any.
2126 */
2127
2128xmlChar *
2129xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2130 xmlChar buf[XML_MAX_NAMELEN + 5];
2131 xmlChar *buffer = NULL;
2132 int len = 0;
2133 int max = XML_MAX_NAMELEN;
2134 xmlChar *ret = NULL;
2135 const xmlChar *cur = name;
2136 int c;
2137
2138 *prefix = NULL;
2139
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002140 if (cur == NULL) return(NULL);
2141
Owen Taylor3473f882001-02-23 17:55:21 +00002142#ifndef XML_XML_NAMESPACE
2143 /* xml: prefix is not really a namespace */
2144 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2145 (cur[2] == 'l') && (cur[3] == ':'))
2146 return(xmlStrdup(name));
2147#endif
2148
Daniel Veillard597bc482003-07-24 16:08:28 +00002149 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002150 if (cur[0] == ':')
2151 return(xmlStrdup(name));
2152
2153 c = *cur++;
2154 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2155 buf[len++] = c;
2156 c = *cur++;
2157 }
2158 if (len >= max) {
2159 /*
2160 * Okay someone managed to make a huge name, so he's ready to pay
2161 * for the processing speed.
2162 */
2163 max = len * 2;
2164
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002165 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002166 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002167 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002168 return(NULL);
2169 }
2170 memcpy(buffer, buf, len);
2171 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2172 if (len + 10 > max) {
2173 max *= 2;
2174 buffer = (xmlChar *) xmlRealloc(buffer,
2175 max * sizeof(xmlChar));
2176 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002177 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002178 return(NULL);
2179 }
2180 }
2181 buffer[len++] = c;
2182 c = *cur++;
2183 }
2184 buffer[len] = 0;
2185 }
2186
Daniel Veillard597bc482003-07-24 16:08:28 +00002187 /* nasty but well=formed
2188 if ((c == ':') && (*cur == 0)) {
2189 return(xmlStrdup(name));
2190 } */
2191
Owen Taylor3473f882001-02-23 17:55:21 +00002192 if (buffer == NULL)
2193 ret = xmlStrndup(buf, len);
2194 else {
2195 ret = buffer;
2196 buffer = NULL;
2197 max = XML_MAX_NAMELEN;
2198 }
2199
2200
2201 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002202 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002204 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002205 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002206 }
Owen Taylor3473f882001-02-23 17:55:21 +00002207 len = 0;
2208
Daniel Veillardbb284f42002-10-16 18:02:47 +00002209 /*
2210 * Check that the first character is proper to start
2211 * a new name
2212 */
2213 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2214 ((c >= 0x41) && (c <= 0x5A)) ||
2215 (c == '_') || (c == ':'))) {
2216 int l;
2217 int first = CUR_SCHAR(cur, l);
2218
2219 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002220 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002221 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002222 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002223 }
2224 }
2225 cur++;
2226
Owen Taylor3473f882001-02-23 17:55:21 +00002227 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2228 buf[len++] = c;
2229 c = *cur++;
2230 }
2231 if (len >= max) {
2232 /*
2233 * Okay someone managed to make a huge name, so he's ready to pay
2234 * for the processing speed.
2235 */
2236 max = len * 2;
2237
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002238 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002239 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
2243 memcpy(buffer, buf, len);
2244 while (c != 0) { /* tested bigname2.xml */
2245 if (len + 10 > max) {
2246 max *= 2;
2247 buffer = (xmlChar *) xmlRealloc(buffer,
2248 max * sizeof(xmlChar));
2249 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002250 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002251 return(NULL);
2252 }
2253 }
2254 buffer[len++] = c;
2255 c = *cur++;
2256 }
2257 buffer[len] = 0;
2258 }
2259
2260 if (buffer == NULL)
2261 ret = xmlStrndup(buf, len);
2262 else {
2263 ret = buffer;
2264 }
2265 }
2266
2267 return(ret);
2268}
2269
2270/************************************************************************
2271 * *
2272 * The parser itself *
2273 * Relates to http://www.w3.org/TR/REC-xml *
2274 * *
2275 ************************************************************************/
2276
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002278static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002279 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002280
Owen Taylor3473f882001-02-23 17:55:21 +00002281/**
2282 * xmlParseName:
2283 * @ctxt: an XML parser context
2284 *
2285 * parse an XML name.
2286 *
2287 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2288 * CombiningChar | Extender
2289 *
2290 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2291 *
2292 * [6] Names ::= Name (S Name)*
2293 *
2294 * Returns the Name parsed or NULL
2295 */
2296
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002297const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002298xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002299 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002300 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002301 int count = 0;
2302
2303 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002304
2305 /*
2306 * Accelerator for simple ASCII names
2307 */
2308 in = ctxt->input->cur;
2309 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2310 ((*in >= 0x41) && (*in <= 0x5A)) ||
2311 (*in == '_') || (*in == ':')) {
2312 in++;
2313 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2314 ((*in >= 0x41) && (*in <= 0x5A)) ||
2315 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002316 (*in == '_') || (*in == '-') ||
2317 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002318 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002319 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002320 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002321 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002322 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002323 ctxt->nbChars += count;
2324 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002325 if (ret == NULL)
2326 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002327 return(ret);
2328 }
2329 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002330 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002331}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002332
Daniel Veillard46de64e2002-05-29 08:21:33 +00002333/**
2334 * xmlParseNameAndCompare:
2335 * @ctxt: an XML parser context
2336 *
2337 * parse an XML name and compares for match
2338 * (specialized for endtag parsing)
2339 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002340 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2341 * and the name for mismatch
2342 */
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002345xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002346 register const xmlChar *cmp = other;
2347 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002348 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002349
2350 GROW;
2351
2352 in = ctxt->input->cur;
2353 while (*in != 0 && *in == *cmp) {
2354 ++in;
2355 ++cmp;
2356 }
William M. Brack76e95df2003-10-18 16:20:14 +00002357 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002358 /* success */
2359 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002360 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002361 }
2362 /* failure (or end of input buffer), check with full function */
2363 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002364 /* strings coming from the dictionnary direct compare possible */
2365 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002366 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002367 }
2368 return ret;
2369}
2370
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002371static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002372xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002373 int len = 0, l;
2374 int c;
2375 int count = 0;
2376
2377 /*
2378 * Handler for more complex cases
2379 */
2380 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002381 c = CUR_CHAR(l);
2382 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2383 (!IS_LETTER(c) && (c != '_') &&
2384 (c != ':'))) {
2385 return(NULL);
2386 }
2387
2388 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002389 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002390 (c == '.') || (c == '-') ||
2391 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002392 (IS_COMBINING(c)) ||
2393 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002394 if (count++ > 100) {
2395 count = 0;
2396 GROW;
2397 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002398 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002399 NEXTL(l);
2400 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002401 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002403}
2404
2405/**
2406 * xmlParseStringName:
2407 * @ctxt: an XML parser context
2408 * @str: a pointer to the string pointer (IN/OUT)
2409 *
2410 * parse an XML name.
2411 *
2412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2413 * CombiningChar | Extender
2414 *
2415 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2416 *
2417 * [6] Names ::= Name (S Name)*
2418 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002419 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002420 * is updated to the current location in the string.
2421 */
2422
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002423static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002424xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2425 xmlChar buf[XML_MAX_NAMELEN + 5];
2426 const xmlChar *cur = *str;
2427 int len = 0, l;
2428 int c;
2429
2430 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002431 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002432 (c != ':')) {
2433 return(NULL);
2434 }
2435
William M. Brack871611b2003-10-18 04:53:14 +00002436 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002437 (c == '.') || (c == '-') ||
2438 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002439 (IS_COMBINING(c)) ||
2440 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002441 COPY_BUF(l,buf,len,c);
2442 cur += l;
2443 c = CUR_SCHAR(cur, l);
2444 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2445 /*
2446 * Okay someone managed to make a huge name, so he's ready to pay
2447 * for the processing speed.
2448 */
2449 xmlChar *buffer;
2450 int max = len * 2;
2451
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002452 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002454 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002455 return(NULL);
2456 }
2457 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002458 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002459 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002460 (c == '.') || (c == '-') ||
2461 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002462 (IS_COMBINING(c)) ||
2463 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002464 if (len + 10 > max) {
2465 max *= 2;
2466 buffer = (xmlChar *) xmlRealloc(buffer,
2467 max * sizeof(xmlChar));
2468 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002469 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 return(NULL);
2471 }
2472 }
2473 COPY_BUF(l,buffer,len,c);
2474 cur += l;
2475 c = CUR_SCHAR(cur, l);
2476 }
2477 buffer[len] = 0;
2478 *str = cur;
2479 return(buffer);
2480 }
2481 }
2482 *str = cur;
2483 return(xmlStrndup(buf, len));
2484}
2485
2486/**
2487 * xmlParseNmtoken:
2488 * @ctxt: an XML parser context
2489 *
2490 * parse an XML Nmtoken.
2491 *
2492 * [7] Nmtoken ::= (NameChar)+
2493 *
2494 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2495 *
2496 * Returns the Nmtoken parsed or NULL
2497 */
2498
2499xmlChar *
2500xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2501 xmlChar buf[XML_MAX_NAMELEN + 5];
2502 int len = 0, l;
2503 int c;
2504 int count = 0;
2505
2506 GROW;
2507 c = CUR_CHAR(l);
2508
William M. Brack871611b2003-10-18 04:53:14 +00002509 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002510 (c == '.') || (c == '-') ||
2511 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002512 (IS_COMBINING(c)) ||
2513 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (count++ > 100) {
2515 count = 0;
2516 GROW;
2517 }
2518 COPY_BUF(l,buf,len,c);
2519 NEXTL(l);
2520 c = CUR_CHAR(l);
2521 if (len >= XML_MAX_NAMELEN) {
2522 /*
2523 * Okay someone managed to make a huge token, so he's ready to pay
2524 * for the processing speed.
2525 */
2526 xmlChar *buffer;
2527 int max = len * 2;
2528
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002529 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002531 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002532 return(NULL);
2533 }
2534 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002535 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002536 (c == '.') || (c == '-') ||
2537 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002538 (IS_COMBINING(c)) ||
2539 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002540 if (count++ > 100) {
2541 count = 0;
2542 GROW;
2543 }
2544 if (len + 10 > max) {
2545 max *= 2;
2546 buffer = (xmlChar *) xmlRealloc(buffer,
2547 max * sizeof(xmlChar));
2548 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002549 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 return(NULL);
2551 }
2552 }
2553 COPY_BUF(l,buffer,len,c);
2554 NEXTL(l);
2555 c = CUR_CHAR(l);
2556 }
2557 buffer[len] = 0;
2558 return(buffer);
2559 }
2560 }
2561 if (len == 0)
2562 return(NULL);
2563 return(xmlStrndup(buf, len));
2564}
2565
2566/**
2567 * xmlParseEntityValue:
2568 * @ctxt: an XML parser context
2569 * @orig: if non-NULL store a copy of the original entity value
2570 *
2571 * parse a value for ENTITY declarations
2572 *
2573 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2574 * "'" ([^%&'] | PEReference | Reference)* "'"
2575 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002576 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002577 */
2578
2579xmlChar *
2580xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2581 xmlChar *buf = NULL;
2582 int len = 0;
2583 int size = XML_PARSER_BUFFER_SIZE;
2584 int c, l;
2585 xmlChar stop;
2586 xmlChar *ret = NULL;
2587 const xmlChar *cur = NULL;
2588 xmlParserInputPtr input;
2589
2590 if (RAW == '"') stop = '"';
2591 else if (RAW == '\'') stop = '\'';
2592 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002593 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002594 return(NULL);
2595 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002596 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002597 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002598 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 return(NULL);
2600 }
2601
2602 /*
2603 * The content of the entity definition is copied in a buffer.
2604 */
2605
2606 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2607 input = ctxt->input;
2608 GROW;
2609 NEXT;
2610 c = CUR_CHAR(l);
2611 /*
2612 * NOTE: 4.4.5 Included in Literal
2613 * When a parameter entity reference appears in a literal entity
2614 * value, ... a single or double quote character in the replacement
2615 * text is always treated as a normal data character and will not
2616 * terminate the literal.
2617 * In practice it means we stop the loop only when back at parsing
2618 * the initial entity and the quote is found
2619 */
William M. Brack871611b2003-10-18 04:53:14 +00002620 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002621 (ctxt->input != input))) {
2622 if (len + 5 >= size) {
2623 size *= 2;
2624 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2625 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002626 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 return(NULL);
2628 }
2629 }
2630 COPY_BUF(l,buf,len,c);
2631 NEXTL(l);
2632 /*
2633 * Pop-up of finished entities.
2634 */
2635 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2636 xmlPopInput(ctxt);
2637
2638 GROW;
2639 c = CUR_CHAR(l);
2640 if (c == 0) {
2641 GROW;
2642 c = CUR_CHAR(l);
2643 }
2644 }
2645 buf[len] = 0;
2646
2647 /*
2648 * Raise problem w.r.t. '&' and '%' being used in non-entities
2649 * reference constructs. Note Charref will be handled in
2650 * xmlStringDecodeEntities()
2651 */
2652 cur = buf;
2653 while (*cur != 0) { /* non input consuming */
2654 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2655 xmlChar *name;
2656 xmlChar tmp = *cur;
2657
2658 cur++;
2659 name = xmlParseStringName(ctxt, &cur);
2660 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002661 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002662 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002663 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002664 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002665 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2666 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002667 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002668 }
2669 if (name != NULL)
2670 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002671 if (*cur == 0)
2672 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002673 }
2674 cur++;
2675 }
2676
2677 /*
2678 * Then PEReference entities are substituted.
2679 */
2680 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002682 xmlFree(buf);
2683 } else {
2684 NEXT;
2685 /*
2686 * NOTE: 4.4.7 Bypassed
2687 * When a general entity reference appears in the EntityValue in
2688 * an entity declaration, it is bypassed and left as is.
2689 * so XML_SUBSTITUTE_REF is not set here.
2690 */
2691 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2692 0, 0, 0);
2693 if (orig != NULL)
2694 *orig = buf;
2695 else
2696 xmlFree(buf);
2697 }
2698
2699 return(ret);
2700}
2701
2702/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002703 * xmlParseAttValueComplex:
2704 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002705 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002706 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002707 *
2708 * parse a value for an attribute, this is the fallback function
2709 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002710 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002711 *
2712 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2713 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002714static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002715xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002716 xmlChar limit = 0;
2717 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002718 int len = 0;
2719 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002720 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 xmlChar *current = NULL;
2722 xmlEntityPtr ent;
2723
Owen Taylor3473f882001-02-23 17:55:21 +00002724 if (NXT(0) == '"') {
2725 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2726 limit = '"';
2727 NEXT;
2728 } else if (NXT(0) == '\'') {
2729 limit = '\'';
2730 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2731 NEXT;
2732 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002733 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
2736
2737 /*
2738 * allocate a translation buffer.
2739 */
2740 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002741 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002742 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002743
2744 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002745 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002746 */
2747 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002748 while ((NXT(0) != limit) && /* checked */
2749 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002750 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002751 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002752 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 if (NXT(1) == '#') {
2754 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002755
Owen Taylor3473f882001-02-23 17:55:21 +00002756 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002757 if (ctxt->replaceEntities) {
2758 if (len > buf_size - 10) {
2759 growBuffer(buf);
2760 }
2761 buf[len++] = '&';
2762 } else {
2763 /*
2764 * The reparsing will be done in xmlStringGetNodeList()
2765 * called by the attribute() function in SAX.c
2766 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002767 if (len > buf_size - 10) {
2768 growBuffer(buf);
2769 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002770 buf[len++] = '&';
2771 buf[len++] = '#';
2772 buf[len++] = '3';
2773 buf[len++] = '8';
2774 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002775 }
2776 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002777 if (len > buf_size - 10) {
2778 growBuffer(buf);
2779 }
Owen Taylor3473f882001-02-23 17:55:21 +00002780 len += xmlCopyChar(0, &buf[len], val);
2781 }
2782 } else {
2783 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002784 if ((ent != NULL) &&
2785 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2786 if (len > buf_size - 10) {
2787 growBuffer(buf);
2788 }
2789 if ((ctxt->replaceEntities == 0) &&
2790 (ent->content[0] == '&')) {
2791 buf[len++] = '&';
2792 buf[len++] = '#';
2793 buf[len++] = '3';
2794 buf[len++] = '8';
2795 buf[len++] = ';';
2796 } else {
2797 buf[len++] = ent->content[0];
2798 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 } else if ((ent != NULL) &&
2800 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002801 xmlChar *rep;
2802
2803 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2804 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002805 XML_SUBSTITUTE_REF,
2806 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002807 if (rep != NULL) {
2808 current = rep;
2809 while (*current != 0) { /* non input consuming */
2810 buf[len++] = *current++;
2811 if (len > buf_size - 10) {
2812 growBuffer(buf);
2813 }
2814 }
2815 xmlFree(rep);
2816 }
2817 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002818 if (len > buf_size - 10) {
2819 growBuffer(buf);
2820 }
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (ent->content != NULL)
2822 buf[len++] = ent->content[0];
2823 }
2824 } else if (ent != NULL) {
2825 int i = xmlStrlen(ent->name);
2826 const xmlChar *cur = ent->name;
2827
2828 /*
2829 * This may look absurd but is needed to detect
2830 * entities problems
2831 */
2832 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2833 (ent->content != NULL)) {
2834 xmlChar *rep;
2835 rep = xmlStringDecodeEntities(ctxt, ent->content,
2836 XML_SUBSTITUTE_REF, 0, 0, 0);
2837 if (rep != NULL)
2838 xmlFree(rep);
2839 }
2840
2841 /*
2842 * Just output the reference
2843 */
2844 buf[len++] = '&';
2845 if (len > buf_size - i - 10) {
2846 growBuffer(buf);
2847 }
2848 for (;i > 0;i--)
2849 buf[len++] = *cur++;
2850 buf[len++] = ';';
2851 }
2852 }
2853 } else {
2854 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002855 if ((len != 0) || (!normalize)) {
2856 if ((!normalize) || (!in_space)) {
2857 COPY_BUF(l,buf,len,0x20);
2858 if (len > buf_size - 10) {
2859 growBuffer(buf);
2860 }
2861 }
2862 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002863 }
2864 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002865 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002866 COPY_BUF(l,buf,len,c);
2867 if (len > buf_size - 10) {
2868 growBuffer(buf);
2869 }
2870 }
2871 NEXTL(l);
2872 }
2873 GROW;
2874 c = CUR_CHAR(l);
2875 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002876 if ((in_space) && (normalize)) {
2877 while (buf[len - 1] == 0x20) len--;
2878 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002879 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002880 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002881 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002882 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002883 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2884 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002885 } else
2886 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002887 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002889
2890mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002891 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002892 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002893}
2894
2895/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002896 * xmlParseAttValue:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse a value for an attribute
2900 * Note: the parser won't do substitution of entities here, this
2901 * will be handled later in xmlStringGetNodeList
2902 *
2903 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2904 * "'" ([^<&'] | Reference)* "'"
2905 *
2906 * 3.3.3 Attribute-Value Normalization:
2907 * Before the value of an attribute is passed to the application or
2908 * checked for validity, the XML processor must normalize it as follows:
2909 * - a character reference is processed by appending the referenced
2910 * character to the attribute value
2911 * - an entity reference is processed by recursively processing the
2912 * replacement text of the entity
2913 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2914 * appending #x20 to the normalized value, except that only a single
2915 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2916 * parsed entity or the literal entity value of an internal parsed entity
2917 * - other characters are processed by appending them to the normalized value
2918 * If the declared value is not CDATA, then the XML processor must further
2919 * process the normalized attribute value by discarding any leading and
2920 * trailing space (#x20) characters, and by replacing sequences of space
2921 * (#x20) characters by a single space (#x20) character.
2922 * All attributes for which no declaration has been read should be treated
2923 * by a non-validating parser as if declared CDATA.
2924 *
2925 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2926 */
2927
2928
2929xmlChar *
2930xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002931 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002932}
2933
2934/**
Owen Taylor3473f882001-02-23 17:55:21 +00002935 * xmlParseSystemLiteral:
2936 * @ctxt: an XML parser context
2937 *
2938 * parse an XML Literal
2939 *
2940 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2941 *
2942 * Returns the SystemLiteral parsed or NULL
2943 */
2944
2945xmlChar *
2946xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2947 xmlChar *buf = NULL;
2948 int len = 0;
2949 int size = XML_PARSER_BUFFER_SIZE;
2950 int cur, l;
2951 xmlChar stop;
2952 int state = ctxt->instate;
2953 int count = 0;
2954
2955 SHRINK;
2956 if (RAW == '"') {
2957 NEXT;
2958 stop = '"';
2959 } else if (RAW == '\'') {
2960 NEXT;
2961 stop = '\'';
2962 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002963 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(NULL);
2965 }
2966
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002967 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002968 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002969 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002970 return(NULL);
2971 }
2972 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2973 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002974 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002975 if (len + 5 >= size) {
2976 size *= 2;
2977 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2978 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002979 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002980 ctxt->instate = (xmlParserInputState) state;
2981 return(NULL);
2982 }
2983 }
2984 count++;
2985 if (count > 50) {
2986 GROW;
2987 count = 0;
2988 }
2989 COPY_BUF(l,buf,len,cur);
2990 NEXTL(l);
2991 cur = CUR_CHAR(l);
2992 if (cur == 0) {
2993 GROW;
2994 SHRINK;
2995 cur = CUR_CHAR(l);
2996 }
2997 }
2998 buf[len] = 0;
2999 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003000 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003001 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003002 } else {
3003 NEXT;
3004 }
3005 return(buf);
3006}
3007
3008/**
3009 * xmlParsePubidLiteral:
3010 * @ctxt: an XML parser context
3011 *
3012 * parse an XML public literal
3013 *
3014 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3015 *
3016 * Returns the PubidLiteral parsed or NULL.
3017 */
3018
3019xmlChar *
3020xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3021 xmlChar *buf = NULL;
3022 int len = 0;
3023 int size = XML_PARSER_BUFFER_SIZE;
3024 xmlChar cur;
3025 xmlChar stop;
3026 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003027 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003028
3029 SHRINK;
3030 if (RAW == '"') {
3031 NEXT;
3032 stop = '"';
3033 } else if (RAW == '\'') {
3034 NEXT;
3035 stop = '\'';
3036 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 return(NULL);
3039 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003040 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003041 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003042 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003045 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003047 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (len + 1 >= size) {
3049 size *= 2;
3050 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3051 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003052 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003053 return(NULL);
3054 }
3055 }
3056 buf[len++] = cur;
3057 count++;
3058 if (count > 50) {
3059 GROW;
3060 count = 0;
3061 }
3062 NEXT;
3063 cur = CUR;
3064 if (cur == 0) {
3065 GROW;
3066 SHRINK;
3067 cur = CUR;
3068 }
3069 }
3070 buf[len] = 0;
3071 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003073 } else {
3074 NEXT;
3075 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003076 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003077 return(buf);
3078}
3079
Daniel Veillard48b2f892001-02-25 16:11:03 +00003080void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003081/**
3082 * xmlParseCharData:
3083 * @ctxt: an XML parser context
3084 * @cdata: int indicating whether we are within a CDATA section
3085 *
3086 * parse a CharData section.
3087 * if we are within a CDATA section ']]>' marks an end of section.
3088 *
3089 * The right angle bracket (>) may be represented using the string "&gt;",
3090 * and must, for compatibility, be escaped using "&gt;" or a character
3091 * reference when it appears in the string "]]>" in content, when that
3092 * string is not marking the end of a CDATA section.
3093 *
3094 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3095 */
3096
3097void
3098xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003099 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003100 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003101 int line = ctxt->input->line;
3102 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003103
3104 SHRINK;
3105 GROW;
3106 /*
3107 * Accelerated common case where input don't need to be
3108 * modified before passing it to the handler.
3109 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003110 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003111 in = ctxt->input->cur;
3112 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003113get_more_space:
3114 while (*in == 0x20) in++;
3115 if (*in == 0xA) {
3116 ctxt->input->line++;
3117 in++;
3118 while (*in == 0xA) {
3119 ctxt->input->line++;
3120 in++;
3121 }
3122 goto get_more_space;
3123 }
3124 if (*in == '<') {
3125 nbchar = in - ctxt->input->cur;
3126 if (nbchar > 0) {
3127 const xmlChar *tmp = ctxt->input->cur;
3128 ctxt->input->cur = in;
3129
3130 if (ctxt->sax->ignorableWhitespace !=
3131 ctxt->sax->characters) {
3132 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3133 ctxt->sax->ignorableWhitespace(ctxt->userData,
3134 tmp, nbchar);
3135 } else if (ctxt->sax->characters != NULL)
3136 ctxt->sax->characters(ctxt->userData,
3137 tmp, nbchar);
3138 } else if (ctxt->sax->characters != NULL) {
3139 ctxt->sax->characters(ctxt->userData,
3140 tmp, nbchar);
3141 }
3142 }
3143 return;
3144 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003145get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003146 while (((*in > ']') && (*in <= 0x7F)) ||
3147 ((*in > '&') && (*in < '<')) ||
3148 ((*in > '<') && (*in < ']')) ||
3149 ((*in >= 0x20) && (*in < '&')) ||
3150 (*in == 0x09))
3151 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003152 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003153 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003154 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003155 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003156 ctxt->input->line++;
3157 in++;
3158 }
3159 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003160 }
3161 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003162 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003163 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003164 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003165 return;
3166 }
3167 in++;
3168 goto get_more;
3169 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003171 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003172 if ((ctxt->sax->ignorableWhitespace !=
3173 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003174 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003175 const xmlChar *tmp = ctxt->input->cur;
3176 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003177
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003178 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003179 ctxt->sax->ignorableWhitespace(ctxt->userData,
3180 tmp, nbchar);
3181 } else if (ctxt->sax->characters != NULL)
3182 ctxt->sax->characters(ctxt->userData,
3183 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003184 line = ctxt->input->line;
3185 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003186 } else {
3187 if (ctxt->sax->characters != NULL)
3188 ctxt->sax->characters(ctxt->userData,
3189 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003190 line = ctxt->input->line;
3191 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003192 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003193 }
3194 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003195 if (*in == 0xD) {
3196 in++;
3197 if (*in == 0xA) {
3198 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003199 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003200 ctxt->input->line++;
3201 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003202 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003203 in--;
3204 }
3205 if (*in == '<') {
3206 return;
3207 }
3208 if (*in == '&') {
3209 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003210 }
3211 SHRINK;
3212 GROW;
3213 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003214 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003215 nbchar = 0;
3216 }
Daniel Veillard50582112001-03-26 22:52:16 +00003217 ctxt->input->line = line;
3218 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003219 xmlParseCharDataComplex(ctxt, cdata);
3220}
3221
Daniel Veillard01c13b52002-12-10 15:19:08 +00003222/**
3223 * xmlParseCharDataComplex:
3224 * @ctxt: an XML parser context
3225 * @cdata: int indicating whether we are within a CDATA section
3226 *
3227 * parse a CharData section.this is the fallback function
3228 * of xmlParseCharData() when the parsing requires handling
3229 * of non-ASCII characters.
3230 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003231void
3232xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003233 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3234 int nbchar = 0;
3235 int cur, l;
3236 int count = 0;
3237
3238 SHRINK;
3239 GROW;
3240 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003241 while ((cur != '<') && /* checked */
3242 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003243 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003244 if ((cur == ']') && (NXT(1) == ']') &&
3245 (NXT(2) == '>')) {
3246 if (cdata) break;
3247 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003248 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003249 }
3250 }
3251 COPY_BUF(l,buf,nbchar,cur);
3252 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003253 buf[nbchar] = 0;
3254
Owen Taylor3473f882001-02-23 17:55:21 +00003255 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003256 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003257 */
3258 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003259 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003260 if (ctxt->sax->ignorableWhitespace != NULL)
3261 ctxt->sax->ignorableWhitespace(ctxt->userData,
3262 buf, nbchar);
3263 } else {
3264 if (ctxt->sax->characters != NULL)
3265 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3266 }
3267 }
3268 nbchar = 0;
3269 }
3270 count++;
3271 if (count > 50) {
3272 GROW;
3273 count = 0;
3274 }
3275 NEXTL(l);
3276 cur = CUR_CHAR(l);
3277 }
3278 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003279 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003280 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003281 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003282 */
3283 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003284 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003285 if (ctxt->sax->ignorableWhitespace != NULL)
3286 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3287 } else {
3288 if (ctxt->sax->characters != NULL)
3289 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3290 }
3291 }
3292 }
3293}
3294
3295/**
3296 * xmlParseExternalID:
3297 * @ctxt: an XML parser context
3298 * @publicID: a xmlChar** receiving PubidLiteral
3299 * @strict: indicate whether we should restrict parsing to only
3300 * production [75], see NOTE below
3301 *
3302 * Parse an External ID or a Public ID
3303 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003304 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003305 * 'PUBLIC' S PubidLiteral S SystemLiteral
3306 *
3307 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3308 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3309 *
3310 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3311 *
3312 * Returns the function returns SystemLiteral and in the second
3313 * case publicID receives PubidLiteral, is strict is off
3314 * it is possible to return NULL and have publicID set.
3315 */
3316
3317xmlChar *
3318xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3319 xmlChar *URI = NULL;
3320
3321 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003322
3323 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003324 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003325 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003326 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3328 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003329 }
3330 SKIP_BLANKS;
3331 URI = xmlParseSystemLiteral(ctxt);
3332 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003333 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003335 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003336 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003337 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003339 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003340 }
3341 SKIP_BLANKS;
3342 *publicID = xmlParsePubidLiteral(ctxt);
3343 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003345 }
3346 if (strict) {
3347 /*
3348 * We don't handle [83] so "S SystemLiteral" is required.
3349 */
William M. Brack76e95df2003-10-18 16:20:14 +00003350 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003352 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003353 }
3354 } else {
3355 /*
3356 * We handle [83] so we return immediately, if
3357 * "S SystemLiteral" is not detected. From a purely parsing
3358 * point of view that's a nice mess.
3359 */
3360 const xmlChar *ptr;
3361 GROW;
3362
3363 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003364 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003365
William M. Brack76e95df2003-10-18 16:20:14 +00003366 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003367 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3368 }
3369 SKIP_BLANKS;
3370 URI = xmlParseSystemLiteral(ctxt);
3371 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003372 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003373 }
3374 }
3375 return(URI);
3376}
3377
3378/**
3379 * xmlParseComment:
3380 * @ctxt: an XML parser context
3381 *
3382 * Skip an XML (SGML) comment <!-- .... -->
3383 * The spec says that "For compatibility, the string "--" (double-hyphen)
3384 * must not occur within comments. "
3385 *
3386 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3387 */
3388void
3389xmlParseComment(xmlParserCtxtPtr ctxt) {
3390 xmlChar *buf = NULL;
3391 int len;
3392 int size = XML_PARSER_BUFFER_SIZE;
3393 int q, ql;
3394 int r, rl;
3395 int cur, l;
3396 xmlParserInputState state;
3397 xmlParserInputPtr input = ctxt->input;
3398 int count = 0;
3399
3400 /*
3401 * Check that there is a comment right here.
3402 */
3403 if ((RAW != '<') || (NXT(1) != '!') ||
3404 (NXT(2) != '-') || (NXT(3) != '-')) return;
3405
3406 state = ctxt->instate;
3407 ctxt->instate = XML_PARSER_COMMENT;
3408 SHRINK;
3409 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003411 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003412 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003413 ctxt->instate = state;
3414 return;
3415 }
3416 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003417 if (q == 0)
3418 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003419 NEXTL(ql);
3420 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003421 if (r == 0)
3422 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003423 NEXTL(rl);
3424 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003425 if (cur == 0)
3426 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003428 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 ((cur != '>') ||
3430 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003431 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 }
3434 if (len + 5 >= size) {
3435 size *= 2;
3436 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3437 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003439 ctxt->instate = state;
3440 return;
3441 }
3442 }
3443 COPY_BUF(ql,buf,len,q);
3444 q = r;
3445 ql = rl;
3446 r = cur;
3447 rl = l;
3448
3449 count++;
3450 if (count > 50) {
3451 GROW;
3452 count = 0;
3453 }
3454 NEXTL(l);
3455 cur = CUR_CHAR(l);
3456 if (cur == 0) {
3457 SHRINK;
3458 GROW;
3459 cur = CUR_CHAR(l);
3460 }
3461 }
3462 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003463 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003465 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003466 xmlFree(buf);
3467 } else {
3468 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003469 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3470 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003471 }
3472 NEXT;
3473 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3474 (!ctxt->disableSAX))
3475 ctxt->sax->comment(ctxt->userData, buf);
3476 xmlFree(buf);
3477 }
3478 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003479 return;
3480not_terminated:
3481 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3482 "Comment not terminated\n", NULL);
3483 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003484}
3485
3486/**
3487 * xmlParsePITarget:
3488 * @ctxt: an XML parser context
3489 *
3490 * parse the name of a PI
3491 *
3492 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3493 *
3494 * Returns the PITarget name or NULL
3495 */
3496
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003497const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003498xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003499 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003500
3501 name = xmlParseName(ctxt);
3502 if ((name != NULL) &&
3503 ((name[0] == 'x') || (name[0] == 'X')) &&
3504 ((name[1] == 'm') || (name[1] == 'M')) &&
3505 ((name[2] == 'l') || (name[2] == 'L'))) {
3506 int i;
3507 if ((name[0] == 'x') && (name[1] == 'm') &&
3508 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003509 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003510 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003511 return(name);
3512 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003513 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003514 return(name);
3515 }
3516 for (i = 0;;i++) {
3517 if (xmlW3CPIs[i] == NULL) break;
3518 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3519 return(name);
3520 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003521 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3522 "xmlParsePITarget: invalid name prefix 'xml'\n",
3523 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 }
3525 return(name);
3526}
3527
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003528#ifdef LIBXML_CATALOG_ENABLED
3529/**
3530 * xmlParseCatalogPI:
3531 * @ctxt: an XML parser context
3532 * @catalog: the PI value string
3533 *
3534 * parse an XML Catalog Processing Instruction.
3535 *
3536 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3537 *
3538 * Occurs only if allowed by the user and if happening in the Misc
3539 * part of the document before any doctype informations
3540 * This will add the given catalog to the parsing context in order
3541 * to be used if there is a resolution need further down in the document
3542 */
3543
3544static void
3545xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3546 xmlChar *URL = NULL;
3547 const xmlChar *tmp, *base;
3548 xmlChar marker;
3549
3550 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003551 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003552 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3553 goto error;
3554 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003555 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003556 if (*tmp != '=') {
3557 return;
3558 }
3559 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003560 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003561 marker = *tmp;
3562 if ((marker != '\'') && (marker != '"'))
3563 goto error;
3564 tmp++;
3565 base = tmp;
3566 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3567 if (*tmp == 0)
3568 goto error;
3569 URL = xmlStrndup(base, tmp - base);
3570 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003571 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003572 if (*tmp != 0)
3573 goto error;
3574
3575 if (URL != NULL) {
3576 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3577 xmlFree(URL);
3578 }
3579 return;
3580
3581error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003582 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3583 "Catalog PI syntax error: %s\n",
3584 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003585 if (URL != NULL)
3586 xmlFree(URL);
3587}
3588#endif
3589
Owen Taylor3473f882001-02-23 17:55:21 +00003590/**
3591 * xmlParsePI:
3592 * @ctxt: an XML parser context
3593 *
3594 * parse an XML Processing Instruction.
3595 *
3596 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3597 *
3598 * The processing is transfered to SAX once parsed.
3599 */
3600
3601void
3602xmlParsePI(xmlParserCtxtPtr ctxt) {
3603 xmlChar *buf = NULL;
3604 int len = 0;
3605 int size = XML_PARSER_BUFFER_SIZE;
3606 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003607 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003608 xmlParserInputState state;
3609 int count = 0;
3610
3611 if ((RAW == '<') && (NXT(1) == '?')) {
3612 xmlParserInputPtr input = ctxt->input;
3613 state = ctxt->instate;
3614 ctxt->instate = XML_PARSER_PI;
3615 /*
3616 * this is a Processing Instruction.
3617 */
3618 SKIP(2);
3619 SHRINK;
3620
3621 /*
3622 * Parse the target name and check for special support like
3623 * namespace.
3624 */
3625 target = xmlParsePITarget(ctxt);
3626 if (target != NULL) {
3627 if ((RAW == '?') && (NXT(1) == '>')) {
3628 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3630 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003631 }
3632 SKIP(2);
3633
3634 /*
3635 * SAX: PI detected.
3636 */
3637 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3638 (ctxt->sax->processingInstruction != NULL))
3639 ctxt->sax->processingInstruction(ctxt->userData,
3640 target, NULL);
3641 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003642 return;
3643 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003644 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003645 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003646 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003647 ctxt->instate = state;
3648 return;
3649 }
3650 cur = CUR;
3651 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003652 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3653 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003654 }
3655 SKIP_BLANKS;
3656 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003657 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003658 ((cur != '?') || (NXT(1) != '>'))) {
3659 if (len + 5 >= size) {
3660 size *= 2;
3661 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003663 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003664 ctxt->instate = state;
3665 return;
3666 }
3667 }
3668 count++;
3669 if (count > 50) {
3670 GROW;
3671 count = 0;
3672 }
3673 COPY_BUF(l,buf,len,cur);
3674 NEXTL(l);
3675 cur = CUR_CHAR(l);
3676 if (cur == 0) {
3677 SHRINK;
3678 GROW;
3679 cur = CUR_CHAR(l);
3680 }
3681 }
3682 buf[len] = 0;
3683 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003684 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3685 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003686 } else {
3687 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3689 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003690 }
3691 SKIP(2);
3692
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003693#ifdef LIBXML_CATALOG_ENABLED
3694 if (((state == XML_PARSER_MISC) ||
3695 (state == XML_PARSER_START)) &&
3696 (xmlStrEqual(target, XML_CATALOG_PI))) {
3697 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3698 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3699 (allow == XML_CATA_ALLOW_ALL))
3700 xmlParseCatalogPI(ctxt, buf);
3701 }
3702#endif
3703
3704
Owen Taylor3473f882001-02-23 17:55:21 +00003705 /*
3706 * SAX: PI detected.
3707 */
3708 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3709 (ctxt->sax->processingInstruction != NULL))
3710 ctxt->sax->processingInstruction(ctxt->userData,
3711 target, buf);
3712 }
3713 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003714 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 ctxt->instate = state;
3718 }
3719}
3720
3721/**
3722 * xmlParseNotationDecl:
3723 * @ctxt: an XML parser context
3724 *
3725 * parse a notation declaration
3726 *
3727 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3728 *
3729 * Hence there is actually 3 choices:
3730 * 'PUBLIC' S PubidLiteral
3731 * 'PUBLIC' S PubidLiteral S SystemLiteral
3732 * and 'SYSTEM' S SystemLiteral
3733 *
3734 * See the NOTE on xmlParseExternalID().
3735 */
3736
3737void
3738xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003740 xmlChar *Pubid;
3741 xmlChar *Systemid;
3742
Daniel Veillarda07050d2003-10-19 14:46:32 +00003743 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003744 xmlParserInputPtr input = ctxt->input;
3745 SHRINK;
3746 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003747 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3749 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003750 return;
3751 }
3752 SKIP_BLANKS;
3753
Daniel Veillard76d66f42001-05-16 21:05:17 +00003754 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003756 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 return;
3758 }
William M. Brack76e95df2003-10-18 16:20:14 +00003759 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003761 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003762 return;
3763 }
3764 SKIP_BLANKS;
3765
3766 /*
3767 * Parse the IDs.
3768 */
3769 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3770 SKIP_BLANKS;
3771
3772 if (RAW == '>') {
3773 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003774 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3775 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003776 }
3777 NEXT;
3778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3779 (ctxt->sax->notationDecl != NULL))
3780 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3781 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if (Systemid != NULL) xmlFree(Systemid);
3785 if (Pubid != NULL) xmlFree(Pubid);
3786 }
3787}
3788
3789/**
3790 * xmlParseEntityDecl:
3791 * @ctxt: an XML parser context
3792 *
3793 * parse <!ENTITY declarations
3794 *
3795 * [70] EntityDecl ::= GEDecl | PEDecl
3796 *
3797 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3798 *
3799 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3800 *
3801 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3802 *
3803 * [74] PEDef ::= EntityValue | ExternalID
3804 *
3805 * [76] NDataDecl ::= S 'NDATA' S Name
3806 *
3807 * [ VC: Notation Declared ]
3808 * The Name must match the declared name of a notation.
3809 */
3810
3811void
3812xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003813 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 xmlChar *value = NULL;
3815 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003816 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 int isParameter = 0;
3818 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003819 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003820
3821 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003822 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003823 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003824 SHRINK;
3825 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003826 skipped = SKIP_BLANKS;
3827 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003828 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3829 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003830 }
Owen Taylor3473f882001-02-23 17:55:21 +00003831
3832 if (RAW == '%') {
3833 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003834 skipped = SKIP_BLANKS;
3835 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003836 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3837 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
Owen Taylor3473f882001-02-23 17:55:21 +00003839 isParameter = 1;
3840 }
3841
Daniel Veillard76d66f42001-05-16 21:05:17 +00003842 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003843 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003844 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3845 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003846 return;
3847 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003848 skipped = SKIP_BLANKS;
3849 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003850 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3851 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003852 }
Owen Taylor3473f882001-02-23 17:55:21 +00003853
Daniel Veillardf5582f12002-06-11 10:08:16 +00003854 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003855 /*
3856 * handle the various case of definitions...
3857 */
3858 if (isParameter) {
3859 if ((RAW == '"') || (RAW == '\'')) {
3860 value = xmlParseEntityValue(ctxt, &orig);
3861 if (value) {
3862 if ((ctxt->sax != NULL) &&
3863 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3864 ctxt->sax->entityDecl(ctxt->userData, name,
3865 XML_INTERNAL_PARAMETER_ENTITY,
3866 NULL, NULL, value);
3867 }
3868 } else {
3869 URI = xmlParseExternalID(ctxt, &literal, 1);
3870 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003871 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 if (URI) {
3874 xmlURIPtr uri;
3875
3876 uri = xmlParseURI((const char *) URI);
3877 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003878 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3879 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003880 /*
3881 * This really ought to be a well formedness error
3882 * but the XML Core WG decided otherwise c.f. issue
3883 * E26 of the XML erratas.
3884 */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 } else {
3886 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003887 /*
3888 * Okay this is foolish to block those but not
3889 * invalid URIs.
3890 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003891 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003892 } else {
3893 if ((ctxt->sax != NULL) &&
3894 (!ctxt->disableSAX) &&
3895 (ctxt->sax->entityDecl != NULL))
3896 ctxt->sax->entityDecl(ctxt->userData, name,
3897 XML_EXTERNAL_PARAMETER_ENTITY,
3898 literal, URI, NULL);
3899 }
3900 xmlFreeURI(uri);
3901 }
3902 }
3903 }
3904 } else {
3905 if ((RAW == '"') || (RAW == '\'')) {
3906 value = xmlParseEntityValue(ctxt, &orig);
3907 if ((ctxt->sax != NULL) &&
3908 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3909 ctxt->sax->entityDecl(ctxt->userData, name,
3910 XML_INTERNAL_GENERAL_ENTITY,
3911 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003912 /*
3913 * For expat compatibility in SAX mode.
3914 */
3915 if ((ctxt->myDoc == NULL) ||
3916 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3917 if (ctxt->myDoc == NULL) {
3918 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3919 }
3920 if (ctxt->myDoc->intSubset == NULL)
3921 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3922 BAD_CAST "fake", NULL, NULL);
3923
Daniel Veillard1af9a412003-08-20 22:54:39 +00003924 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3925 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003926 }
Owen Taylor3473f882001-02-23 17:55:21 +00003927 } else {
3928 URI = xmlParseExternalID(ctxt, &literal, 1);
3929 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003930 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
3932 if (URI) {
3933 xmlURIPtr uri;
3934
3935 uri = xmlParseURI((const char *)URI);
3936 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003937 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3938 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003939 /*
3940 * This really ought to be a well formedness error
3941 * but the XML Core WG decided otherwise c.f. issue
3942 * E26 of the XML erratas.
3943 */
Owen Taylor3473f882001-02-23 17:55:21 +00003944 } else {
3945 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003946 /*
3947 * Okay this is foolish to block those but not
3948 * invalid URIs.
3949 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003950 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003951 }
3952 xmlFreeURI(uri);
3953 }
3954 }
William M. Brack76e95df2003-10-18 16:20:14 +00003955 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3957 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003958 }
3959 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003960 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003961 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003962 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003963 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3964 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
3966 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003967 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003968 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3969 (ctxt->sax->unparsedEntityDecl != NULL))
3970 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3971 literal, URI, ndata);
3972 } else {
3973 if ((ctxt->sax != NULL) &&
3974 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3975 ctxt->sax->entityDecl(ctxt->userData, name,
3976 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3977 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003978 /*
3979 * For expat compatibility in SAX mode.
3980 * assuming the entity repalcement was asked for
3981 */
3982 if ((ctxt->replaceEntities != 0) &&
3983 ((ctxt->myDoc == NULL) ||
3984 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3985 if (ctxt->myDoc == NULL) {
3986 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3987 }
3988
3989 if (ctxt->myDoc->intSubset == NULL)
3990 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3991 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00003992 xmlSAX2EntityDecl(ctxt, name,
3993 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3994 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003995 }
Owen Taylor3473f882001-02-23 17:55:21 +00003996 }
3997 }
3998 }
3999 SKIP_BLANKS;
4000 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004001 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004002 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004003 } else {
4004 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004005 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4006 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004007 }
4008 NEXT;
4009 }
4010 if (orig != NULL) {
4011 /*
4012 * Ugly mechanism to save the raw entity value.
4013 */
4014 xmlEntityPtr cur = NULL;
4015
4016 if (isParameter) {
4017 if ((ctxt->sax != NULL) &&
4018 (ctxt->sax->getParameterEntity != NULL))
4019 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4020 } else {
4021 if ((ctxt->sax != NULL) &&
4022 (ctxt->sax->getEntity != NULL))
4023 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004024 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004025 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004026 }
Owen Taylor3473f882001-02-23 17:55:21 +00004027 }
4028 if (cur != NULL) {
4029 if (cur->orig != NULL)
4030 xmlFree(orig);
4031 else
4032 cur->orig = orig;
4033 } else
4034 xmlFree(orig);
4035 }
Owen Taylor3473f882001-02-23 17:55:21 +00004036 if (value != NULL) xmlFree(value);
4037 if (URI != NULL) xmlFree(URI);
4038 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004039 }
4040}
4041
4042/**
4043 * xmlParseDefaultDecl:
4044 * @ctxt: an XML parser context
4045 * @value: Receive a possible fixed default value for the attribute
4046 *
4047 * Parse an attribute default declaration
4048 *
4049 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4050 *
4051 * [ VC: Required Attribute ]
4052 * if the default declaration is the keyword #REQUIRED, then the
4053 * attribute must be specified for all elements of the type in the
4054 * attribute-list declaration.
4055 *
4056 * [ VC: Attribute Default Legal ]
4057 * The declared default value must meet the lexical constraints of
4058 * the declared attribute type c.f. xmlValidateAttributeDecl()
4059 *
4060 * [ VC: Fixed Attribute Default ]
4061 * if an attribute has a default value declared with the #FIXED
4062 * keyword, instances of that attribute must match the default value.
4063 *
4064 * [ WFC: No < in Attribute Values ]
4065 * handled in xmlParseAttValue()
4066 *
4067 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4068 * or XML_ATTRIBUTE_FIXED.
4069 */
4070
4071int
4072xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4073 int val;
4074 xmlChar *ret;
4075
4076 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004077 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004078 SKIP(9);
4079 return(XML_ATTRIBUTE_REQUIRED);
4080 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004081 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004082 SKIP(8);
4083 return(XML_ATTRIBUTE_IMPLIED);
4084 }
4085 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004086 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004087 SKIP(6);
4088 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004089 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4091 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004092 }
4093 SKIP_BLANKS;
4094 }
4095 ret = xmlParseAttValue(ctxt);
4096 ctxt->instate = XML_PARSER_DTD;
4097 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004098 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004099 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004100 } else
4101 *value = ret;
4102 return(val);
4103}
4104
4105/**
4106 * xmlParseNotationType:
4107 * @ctxt: an XML parser context
4108 *
4109 * parse an Notation attribute type.
4110 *
4111 * Note: the leading 'NOTATION' S part has already being parsed...
4112 *
4113 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4114 *
4115 * [ VC: Notation Attributes ]
4116 * Values of this type must match one of the notation names included
4117 * in the declaration; all notation names in the declaration must be declared.
4118 *
4119 * Returns: the notation attribute tree built while parsing
4120 */
4121
4122xmlEnumerationPtr
4123xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004124 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004125 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4126
4127 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004128 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 return(NULL);
4130 }
4131 SHRINK;
4132 do {
4133 NEXT;
4134 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004135 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004136 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4138 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004139 return(ret);
4140 }
4141 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (cur == NULL) return(ret);
4143 if (last == NULL) ret = last = cur;
4144 else {
4145 last->next = cur;
4146 last = cur;
4147 }
4148 SKIP_BLANKS;
4149 } while (RAW == '|');
4150 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004151 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004152 if ((last != NULL) && (last != ret))
4153 xmlFreeEnumeration(last);
4154 return(ret);
4155 }
4156 NEXT;
4157 return(ret);
4158}
4159
4160/**
4161 * xmlParseEnumerationType:
4162 * @ctxt: an XML parser context
4163 *
4164 * parse an Enumeration attribute type.
4165 *
4166 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4167 *
4168 * [ VC: Enumeration ]
4169 * Values of this type must match one of the Nmtoken tokens in
4170 * the declaration
4171 *
4172 * Returns: the enumeration attribute tree built while parsing
4173 */
4174
4175xmlEnumerationPtr
4176xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4177 xmlChar *name;
4178 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4179
4180 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004181 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004182 return(NULL);
4183 }
4184 SHRINK;
4185 do {
4186 NEXT;
4187 SKIP_BLANKS;
4188 name = xmlParseNmtoken(ctxt);
4189 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004190 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004191 return(ret);
4192 }
4193 cur = xmlCreateEnumeration(name);
4194 xmlFree(name);
4195 if (cur == NULL) return(ret);
4196 if (last == NULL) ret = last = cur;
4197 else {
4198 last->next = cur;
4199 last = cur;
4200 }
4201 SKIP_BLANKS;
4202 } while (RAW == '|');
4203 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004204 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 return(ret);
4206 }
4207 NEXT;
4208 return(ret);
4209}
4210
4211/**
4212 * xmlParseEnumeratedType:
4213 * @ctxt: an XML parser context
4214 * @tree: the enumeration tree built while parsing
4215 *
4216 * parse an Enumerated attribute type.
4217 *
4218 * [57] EnumeratedType ::= NotationType | Enumeration
4219 *
4220 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4221 *
4222 *
4223 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4224 */
4225
4226int
4227xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004228 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004229 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004230 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004231 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4232 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return(0);
4234 }
4235 SKIP_BLANKS;
4236 *tree = xmlParseNotationType(ctxt);
4237 if (*tree == NULL) return(0);
4238 return(XML_ATTRIBUTE_NOTATION);
4239 }
4240 *tree = xmlParseEnumerationType(ctxt);
4241 if (*tree == NULL) return(0);
4242 return(XML_ATTRIBUTE_ENUMERATION);
4243}
4244
4245/**
4246 * xmlParseAttributeType:
4247 * @ctxt: an XML parser context
4248 * @tree: the enumeration tree built while parsing
4249 *
4250 * parse the Attribute list def for an element
4251 *
4252 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4253 *
4254 * [55] StringType ::= 'CDATA'
4255 *
4256 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4257 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4258 *
4259 * Validity constraints for attribute values syntax are checked in
4260 * xmlValidateAttributeValue()
4261 *
4262 * [ VC: ID ]
4263 * Values of type ID must match the Name production. A name must not
4264 * appear more than once in an XML document as a value of this type;
4265 * i.e., ID values must uniquely identify the elements which bear them.
4266 *
4267 * [ VC: One ID per Element Type ]
4268 * No element type may have more than one ID attribute specified.
4269 *
4270 * [ VC: ID Attribute Default ]
4271 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4272 *
4273 * [ VC: IDREF ]
4274 * Values of type IDREF must match the Name production, and values
4275 * of type IDREFS must match Names; each IDREF Name must match the value
4276 * of an ID attribute on some element in the XML document; i.e. IDREF
4277 * values must match the value of some ID attribute.
4278 *
4279 * [ VC: Entity Name ]
4280 * Values of type ENTITY must match the Name production, values
4281 * of type ENTITIES must match Names; each Entity Name must match the
4282 * name of an unparsed entity declared in the DTD.
4283 *
4284 * [ VC: Name Token ]
4285 * Values of type NMTOKEN must match the Nmtoken production; values
4286 * of type NMTOKENS must match Nmtokens.
4287 *
4288 * Returns the attribute type
4289 */
4290int
4291xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4292 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004293 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004294 SKIP(5);
4295 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004296 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004297 SKIP(6);
4298 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004299 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004300 SKIP(5);
4301 return(XML_ATTRIBUTE_IDREF);
4302 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4303 SKIP(2);
4304 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004305 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004306 SKIP(6);
4307 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004308 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004309 SKIP(8);
4310 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004311 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004312 SKIP(8);
4313 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004314 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004315 SKIP(7);
4316 return(XML_ATTRIBUTE_NMTOKEN);
4317 }
4318 return(xmlParseEnumeratedType(ctxt, tree));
4319}
4320
4321/**
4322 * xmlParseAttributeListDecl:
4323 * @ctxt: an XML parser context
4324 *
4325 * : parse the Attribute list def for an element
4326 *
4327 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4328 *
4329 * [53] AttDef ::= S Name S AttType S DefaultDecl
4330 *
4331 */
4332void
4333xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004334 const xmlChar *elemName;
4335 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004336 xmlEnumerationPtr tree;
4337
Daniel Veillarda07050d2003-10-19 14:46:32 +00004338 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004339 xmlParserInputPtr input = ctxt->input;
4340
4341 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004342 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004343 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004344 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004345 }
4346 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004347 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004348 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004349 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4350 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004351 return;
4352 }
4353 SKIP_BLANKS;
4354 GROW;
4355 while (RAW != '>') {
4356 const xmlChar *check = CUR_PTR;
4357 int type;
4358 int def;
4359 xmlChar *defaultValue = NULL;
4360
4361 GROW;
4362 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004363 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004365 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4366 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004367 break;
4368 }
4369 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004370 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004372 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004373 if (defaultValue != NULL)
4374 xmlFree(defaultValue);
4375 break;
4376 }
4377 SKIP_BLANKS;
4378
4379 type = xmlParseAttributeType(ctxt, &tree);
4380 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004381 if (defaultValue != NULL)
4382 xmlFree(defaultValue);
4383 break;
4384 }
4385
4386 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004387 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4389 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (defaultValue != NULL)
4391 xmlFree(defaultValue);
4392 if (tree != NULL)
4393 xmlFreeEnumeration(tree);
4394 break;
4395 }
4396 SKIP_BLANKS;
4397
4398 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4399 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004400 if (defaultValue != NULL)
4401 xmlFree(defaultValue);
4402 if (tree != NULL)
4403 xmlFreeEnumeration(tree);
4404 break;
4405 }
4406
4407 GROW;
4408 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004409 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004411 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004412 if (defaultValue != NULL)
4413 xmlFree(defaultValue);
4414 if (tree != NULL)
4415 xmlFreeEnumeration(tree);
4416 break;
4417 }
4418 SKIP_BLANKS;
4419 }
4420 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4422 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004423 if (defaultValue != NULL)
4424 xmlFree(defaultValue);
4425 if (tree != NULL)
4426 xmlFreeEnumeration(tree);
4427 break;
4428 }
4429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4430 (ctxt->sax->attributeDecl != NULL))
4431 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4432 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004433 else if (tree != NULL)
4434 xmlFreeEnumeration(tree);
4435
4436 if ((ctxt->sax2) && (defaultValue != NULL) &&
4437 (def != XML_ATTRIBUTE_IMPLIED) &&
4438 (def != XML_ATTRIBUTE_REQUIRED)) {
4439 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4440 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004441 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4442 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4443 }
Owen Taylor3473f882001-02-23 17:55:21 +00004444 if (defaultValue != NULL)
4445 xmlFree(defaultValue);
4446 GROW;
4447 }
4448 if (RAW == '>') {
4449 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004450 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4451 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004452 }
4453 NEXT;
4454 }
Owen Taylor3473f882001-02-23 17:55:21 +00004455 }
4456}
4457
4458/**
4459 * xmlParseElementMixedContentDecl:
4460 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004461 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004462 *
4463 * parse the declaration for a Mixed Element content
4464 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4465 *
4466 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4467 * '(' S? '#PCDATA' S? ')'
4468 *
4469 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4470 *
4471 * [ VC: No Duplicate Types ]
4472 * The same name must not appear more than once in a single
4473 * mixed-content declaration.
4474 *
4475 * returns: the list of the xmlElementContentPtr describing the element choices
4476 */
4477xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004478xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004479 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004480 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004481
4482 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004483 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004484 SKIP(7);
4485 SKIP_BLANKS;
4486 SHRINK;
4487 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004489 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4490"Element content declaration doesn't start and stop in the same entity\n",
4491 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004492 }
Owen Taylor3473f882001-02-23 17:55:21 +00004493 NEXT;
4494 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4495 if (RAW == '*') {
4496 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4497 NEXT;
4498 }
4499 return(ret);
4500 }
4501 if ((RAW == '(') || (RAW == '|')) {
4502 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4503 if (ret == NULL) return(NULL);
4504 }
4505 while (RAW == '|') {
4506 NEXT;
4507 if (elem == NULL) {
4508 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4509 if (ret == NULL) return(NULL);
4510 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004511 if (cur != NULL)
4512 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004513 cur = ret;
4514 } else {
4515 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4516 if (n == NULL) return(NULL);
4517 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004518 if (n->c1 != NULL)
4519 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004520 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004521 if (n != NULL)
4522 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004523 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004524 }
4525 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004526 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004527 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004528 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004529 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004530 xmlFreeElementContent(cur);
4531 return(NULL);
4532 }
4533 SKIP_BLANKS;
4534 GROW;
4535 }
4536 if ((RAW == ')') && (NXT(1) == '*')) {
4537 if (elem != NULL) {
4538 cur->c2 = xmlNewElementContent(elem,
4539 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004540 if (cur->c2 != NULL)
4541 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004542 }
4543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004544 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004545 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4546"Element content declaration doesn't start and stop in the same entity\n",
4547 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004548 }
Owen Taylor3473f882001-02-23 17:55:21 +00004549 SKIP(2);
4550 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004552 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555
4556 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004557 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
4559 return(ret);
4560}
4561
4562/**
4563 * xmlParseElementChildrenContentDecl:
4564 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004565 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004566 *
4567 * parse the declaration for a Mixed Element content
4568 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4569 *
4570 *
4571 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4572 *
4573 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4574 *
4575 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4576 *
4577 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4578 *
4579 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4580 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004581 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004582 * opening or closing parentheses in a choice, seq, or Mixed
4583 * construct is contained in the replacement text for a parameter
4584 * entity, both must be contained in the same replacement text. For
4585 * interoperability, if a parameter-entity reference appears in a
4586 * choice, seq, or Mixed construct, its replacement text should not
4587 * be empty, and neither the first nor last non-blank character of
4588 * the replacement text should be a connector (| or ,).
4589 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004590 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004591 * hierarchy.
4592 */
4593xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004594xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004595 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004596 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 xmlChar type = 0;
4598
4599 SKIP_BLANKS;
4600 GROW;
4601 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004602 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004603
Owen Taylor3473f882001-02-23 17:55:21 +00004604 /* Recurse on first child */
4605 NEXT;
4606 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004607 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 SKIP_BLANKS;
4609 GROW;
4610 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004611 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004612 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004613 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004614 return(NULL);
4615 }
4616 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004617 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004618 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004619 return(NULL);
4620 }
Owen Taylor3473f882001-02-23 17:55:21 +00004621 GROW;
4622 if (RAW == '?') {
4623 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4624 NEXT;
4625 } else if (RAW == '*') {
4626 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4627 NEXT;
4628 } else if (RAW == '+') {
4629 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4630 NEXT;
4631 } else {
4632 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4633 }
Owen Taylor3473f882001-02-23 17:55:21 +00004634 GROW;
4635 }
4636 SKIP_BLANKS;
4637 SHRINK;
4638 while (RAW != ')') {
4639 /*
4640 * Each loop we parse one separator and one element.
4641 */
4642 if (RAW == ',') {
4643 if (type == 0) type = CUR;
4644
4645 /*
4646 * Detect "Name | Name , Name" error
4647 */
4648 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004650 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004651 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004652 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004653 xmlFreeElementContent(last);
4654 if (ret != NULL)
4655 xmlFreeElementContent(ret);
4656 return(NULL);
4657 }
4658 NEXT;
4659
4660 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4661 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004662 if ((last != NULL) && (last != ret))
4663 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004664 xmlFreeElementContent(ret);
4665 return(NULL);
4666 }
4667 if (last == NULL) {
4668 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004669 if (ret != NULL)
4670 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004671 ret = cur = op;
4672 } else {
4673 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004674 if (op != NULL)
4675 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004677 if (last != NULL)
4678 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004679 cur =op;
4680 last = NULL;
4681 }
4682 } else if (RAW == '|') {
4683 if (type == 0) type = CUR;
4684
4685 /*
4686 * Detect "Name , Name | Name" error
4687 */
4688 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004689 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004690 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004691 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004692 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004693 xmlFreeElementContent(last);
4694 if (ret != NULL)
4695 xmlFreeElementContent(ret);
4696 return(NULL);
4697 }
4698 NEXT;
4699
4700 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4701 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004702 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004703 xmlFreeElementContent(last);
4704 if (ret != NULL)
4705 xmlFreeElementContent(ret);
4706 return(NULL);
4707 }
4708 if (last == NULL) {
4709 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004710 if (ret != NULL)
4711 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004712 ret = cur = op;
4713 } else {
4714 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004715 if (op != NULL)
4716 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004718 if (last != NULL)
4719 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004720 cur =op;
4721 last = NULL;
4722 }
4723 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004724 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004725 if (ret != NULL)
4726 xmlFreeElementContent(ret);
4727 return(NULL);
4728 }
4729 GROW;
4730 SKIP_BLANKS;
4731 GROW;
4732 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004733 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004734 /* Recurse on second child */
4735 NEXT;
4736 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004737 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 SKIP_BLANKS;
4739 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004740 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004742 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 if (ret != NULL)
4744 xmlFreeElementContent(ret);
4745 return(NULL);
4746 }
4747 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004748 if (RAW == '?') {
4749 last->ocur = XML_ELEMENT_CONTENT_OPT;
4750 NEXT;
4751 } else if (RAW == '*') {
4752 last->ocur = XML_ELEMENT_CONTENT_MULT;
4753 NEXT;
4754 } else if (RAW == '+') {
4755 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4756 NEXT;
4757 } else {
4758 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4759 }
4760 }
4761 SKIP_BLANKS;
4762 GROW;
4763 }
4764 if ((cur != NULL) && (last != NULL)) {
4765 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004766 if (last != NULL)
4767 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004768 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004769 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004770 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4771"Element content declaration doesn't start and stop in the same entity\n",
4772 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004773 }
Owen Taylor3473f882001-02-23 17:55:21 +00004774 NEXT;
4775 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004776 if (ret != NULL)
4777 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004778 NEXT;
4779 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004780 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004781 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004782 cur = ret;
4783 /*
4784 * Some normalization:
4785 * (a | b* | c?)* == (a | b | c)*
4786 */
4787 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4788 if ((cur->c1 != NULL) &&
4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4792 if ((cur->c2 != NULL) &&
4793 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4794 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4795 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4796 cur = cur->c2;
4797 }
4798 }
Owen Taylor3473f882001-02-23 17:55:21 +00004799 NEXT;
4800 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004801 if (ret != NULL) {
4802 int found = 0;
4803
Daniel Veillarde470df72001-04-18 21:41:07 +00004804 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004805 /*
4806 * Some normalization:
4807 * (a | b*)+ == (a | b)*
4808 * (a | b?)+ == (a | b)*
4809 */
4810 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4811 if ((cur->c1 != NULL) &&
4812 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4813 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4814 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4815 found = 1;
4816 }
4817 if ((cur->c2 != NULL) &&
4818 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4819 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4820 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4821 found = 1;
4822 }
4823 cur = cur->c2;
4824 }
4825 if (found)
4826 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4827 }
Owen Taylor3473f882001-02-23 17:55:21 +00004828 NEXT;
4829 }
4830 return(ret);
4831}
4832
4833/**
4834 * xmlParseElementContentDecl:
4835 * @ctxt: an XML parser context
4836 * @name: the name of the element being defined.
4837 * @result: the Element Content pointer will be stored here if any
4838 *
4839 * parse the declaration for an Element content either Mixed or Children,
4840 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4841 *
4842 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4843 *
4844 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4845 */
4846
4847int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004848xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004849 xmlElementContentPtr *result) {
4850
4851 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004852 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004853 int res;
4854
4855 *result = NULL;
4856
4857 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004858 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004859 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 return(-1);
4861 }
4862 NEXT;
4863 GROW;
4864 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004865 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004866 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004867 res = XML_ELEMENT_TYPE_MIXED;
4868 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004870 res = XML_ELEMENT_TYPE_ELEMENT;
4871 }
Owen Taylor3473f882001-02-23 17:55:21 +00004872 SKIP_BLANKS;
4873 *result = tree;
4874 return(res);
4875}
4876
4877/**
4878 * xmlParseElementDecl:
4879 * @ctxt: an XML parser context
4880 *
4881 * parse an Element declaration.
4882 *
4883 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4884 *
4885 * [ VC: Unique Element Type Declaration ]
4886 * No element type may be declared more than once
4887 *
4888 * Returns the type of the element, or -1 in case of error
4889 */
4890int
4891xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004892 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004893 int ret = -1;
4894 xmlElementContentPtr content = NULL;
4895
4896 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004897 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004898 xmlParserInputPtr input = ctxt->input;
4899
4900 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004901 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4903 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004904 }
4905 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004906 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004907 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4909 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004910 return(-1);
4911 }
4912 while ((RAW == 0) && (ctxt->inputNr > 1))
4913 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 }
4918 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004919 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004920 SKIP(5);
4921 /*
4922 * Element must always be empty.
4923 */
4924 ret = XML_ELEMENT_TYPE_EMPTY;
4925 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4926 (NXT(2) == 'Y')) {
4927 SKIP(3);
4928 /*
4929 * Element is a generic container.
4930 */
4931 ret = XML_ELEMENT_TYPE_ANY;
4932 } else if (RAW == '(') {
4933 ret = xmlParseElementContentDecl(ctxt, name, &content);
4934 } else {
4935 /*
4936 * [ WFC: PEs in Internal Subset ] error handling.
4937 */
4938 if ((RAW == '%') && (ctxt->external == 0) &&
4939 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004940 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004941 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004942 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004943 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004944 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4945 }
Owen Taylor3473f882001-02-23 17:55:21 +00004946 return(-1);
4947 }
4948
4949 SKIP_BLANKS;
4950 /*
4951 * Pop-up of finished entities.
4952 */
4953 while ((RAW == 0) && (ctxt->inputNr > 1))
4954 xmlPopInput(ctxt);
4955 SKIP_BLANKS;
4956
4957 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004958 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004959 } else {
4960 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004961 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4962 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004963 }
4964
4965 NEXT;
4966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4967 (ctxt->sax->elementDecl != NULL))
4968 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4969 content);
4970 }
4971 if (content != NULL) {
4972 xmlFreeElementContent(content);
4973 }
Owen Taylor3473f882001-02-23 17:55:21 +00004974 }
4975 return(ret);
4976}
4977
4978/**
Owen Taylor3473f882001-02-23 17:55:21 +00004979 * xmlParseConditionalSections
4980 * @ctxt: an XML parser context
4981 *
4982 * [61] conditionalSect ::= includeSect | ignoreSect
4983 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4984 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4985 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4986 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4987 */
4988
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004989static void
Owen Taylor3473f882001-02-23 17:55:21 +00004990xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4991 SKIP(3);
4992 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004993 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004994 SKIP(7);
4995 SKIP_BLANKS;
4996 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004997 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 } else {
4999 NEXT;
5000 }
5001 if (xmlParserDebugEntities) {
5002 if ((ctxt->input != NULL) && (ctxt->input->filename))
5003 xmlGenericError(xmlGenericErrorContext,
5004 "%s(%d): ", ctxt->input->filename,
5005 ctxt->input->line);
5006 xmlGenericError(xmlGenericErrorContext,
5007 "Entering INCLUDE Conditional Section\n");
5008 }
5009
5010 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5011 (NXT(2) != '>'))) {
5012 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005013 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005014
5015 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5016 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005017 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005018 NEXT;
5019 } else if (RAW == '%') {
5020 xmlParsePEReference(ctxt);
5021 } else
5022 xmlParseMarkupDecl(ctxt);
5023
5024 /*
5025 * Pop-up of finished entities.
5026 */
5027 while ((RAW == 0) && (ctxt->inputNr > 1))
5028 xmlPopInput(ctxt);
5029
Daniel Veillardfdc91562002-07-01 21:52:03 +00005030 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 break;
5033 }
5034 }
5035 if (xmlParserDebugEntities) {
5036 if ((ctxt->input != NULL) && (ctxt->input->filename))
5037 xmlGenericError(xmlGenericErrorContext,
5038 "%s(%d): ", ctxt->input->filename,
5039 ctxt->input->line);
5040 xmlGenericError(xmlGenericErrorContext,
5041 "Leaving INCLUDE Conditional Section\n");
5042 }
5043
Daniel Veillarda07050d2003-10-19 14:46:32 +00005044 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005045 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005046 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005047 int depth = 0;
5048
5049 SKIP(6);
5050 SKIP_BLANKS;
5051 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005052 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005053 } else {
5054 NEXT;
5055 }
5056 if (xmlParserDebugEntities) {
5057 if ((ctxt->input != NULL) && (ctxt->input->filename))
5058 xmlGenericError(xmlGenericErrorContext,
5059 "%s(%d): ", ctxt->input->filename,
5060 ctxt->input->line);
5061 xmlGenericError(xmlGenericErrorContext,
5062 "Entering IGNORE Conditional Section\n");
5063 }
5064
5065 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005066 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005067 * But disable SAX event generating DTD building in the meantime
5068 */
5069 state = ctxt->disableSAX;
5070 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 ctxt->instate = XML_PARSER_IGNORE;
5073
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005074 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005075 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5076 depth++;
5077 SKIP(3);
5078 continue;
5079 }
5080 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5081 if (--depth >= 0) SKIP(3);
5082 continue;
5083 }
5084 NEXT;
5085 continue;
5086 }
5087
5088 ctxt->disableSAX = state;
5089 ctxt->instate = instate;
5090
5091 if (xmlParserDebugEntities) {
5092 if ((ctxt->input != NULL) && (ctxt->input->filename))
5093 xmlGenericError(xmlGenericErrorContext,
5094 "%s(%d): ", ctxt->input->filename,
5095 ctxt->input->line);
5096 xmlGenericError(xmlGenericErrorContext,
5097 "Leaving IGNORE Conditional Section\n");
5098 }
5099
5100 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005101 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103
5104 if (RAW == 0)
5105 SHRINK;
5106
5107 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005108 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 SKIP(3);
5111 }
5112}
5113
5114/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005115 * xmlParseMarkupDecl:
5116 * @ctxt: an XML parser context
5117 *
5118 * parse Markup declarations
5119 *
5120 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5121 * NotationDecl | PI | Comment
5122 *
5123 * [ VC: Proper Declaration/PE Nesting ]
5124 * Parameter-entity replacement text must be properly nested with
5125 * markup declarations. That is to say, if either the first character
5126 * or the last character of a markup declaration (markupdecl above) is
5127 * contained in the replacement text for a parameter-entity reference,
5128 * both must be contained in the same replacement text.
5129 *
5130 * [ WFC: PEs in Internal Subset ]
5131 * In the internal DTD subset, parameter-entity references can occur
5132 * only where markup declarations can occur, not within markup declarations.
5133 * (This does not apply to references that occur in external parameter
5134 * entities or to the external subset.)
5135 */
5136void
5137xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5138 GROW;
5139 xmlParseElementDecl(ctxt);
5140 xmlParseAttributeListDecl(ctxt);
5141 xmlParseEntityDecl(ctxt);
5142 xmlParseNotationDecl(ctxt);
5143 xmlParsePI(ctxt);
5144 xmlParseComment(ctxt);
5145 /*
5146 * This is only for internal subset. On external entities,
5147 * the replacement is done before parsing stage
5148 */
5149 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5150 xmlParsePEReference(ctxt);
5151
5152 /*
5153 * Conditional sections are allowed from entities included
5154 * by PE References in the internal subset.
5155 */
5156 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5157 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5158 xmlParseConditionalSections(ctxt);
5159 }
5160 }
5161
5162 ctxt->instate = XML_PARSER_DTD;
5163}
5164
5165/**
5166 * xmlParseTextDecl:
5167 * @ctxt: an XML parser context
5168 *
5169 * parse an XML declaration header for external entities
5170 *
5171 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5172 *
5173 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5174 */
5175
5176void
5177xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5178 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005179 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005180
5181 /*
5182 * We know that '<?xml' is here.
5183 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005184 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005185 SKIP(5);
5186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005188 return;
5189 }
5190
William M. Brack76e95df2003-10-18 16:20:14 +00005191 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5193 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005194 }
5195 SKIP_BLANKS;
5196
5197 /*
5198 * We may have the VersionInfo here.
5199 */
5200 version = xmlParseVersionInfo(ctxt);
5201 if (version == NULL)
5202 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005203 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005204 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005205 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5206 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005207 }
5208 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005209 ctxt->input->version = version;
5210
5211 /*
5212 * We must have the encoding declaration
5213 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005214 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005215 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5216 /*
5217 * The XML REC instructs us to stop parsing right here
5218 */
5219 return;
5220 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005221 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5222 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5223 "Missing encoding in text declaration\n");
5224 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005225
5226 SKIP_BLANKS;
5227 if ((RAW == '?') && (NXT(1) == '>')) {
5228 SKIP(2);
5229 } else if (RAW == '>') {
5230 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005231 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005232 NEXT;
5233 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005234 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005235 MOVETO_ENDTAG(CUR_PTR);
5236 NEXT;
5237 }
5238}
5239
5240/**
Owen Taylor3473f882001-02-23 17:55:21 +00005241 * xmlParseExternalSubset:
5242 * @ctxt: an XML parser context
5243 * @ExternalID: the external identifier
5244 * @SystemID: the system identifier (or URL)
5245 *
5246 * parse Markup declarations from an external subset
5247 *
5248 * [30] extSubset ::= textDecl? extSubsetDecl
5249 *
5250 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5251 */
5252void
5253xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5254 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005255 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005256 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005257 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005258 xmlParseTextDecl(ctxt);
5259 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5260 /*
5261 * The XML REC instructs us to stop parsing right here
5262 */
5263 ctxt->instate = XML_PARSER_EOF;
5264 return;
5265 }
5266 }
5267 if (ctxt->myDoc == NULL) {
5268 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5269 }
5270 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5271 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5272
5273 ctxt->instate = XML_PARSER_DTD;
5274 ctxt->external = 1;
5275 while (((RAW == '<') && (NXT(1) == '?')) ||
5276 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005277 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005278 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005279 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005280
5281 GROW;
5282 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5283 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005284 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005285 NEXT;
5286 } else if (RAW == '%') {
5287 xmlParsePEReference(ctxt);
5288 } else
5289 xmlParseMarkupDecl(ctxt);
5290
5291 /*
5292 * Pop-up of finished entities.
5293 */
5294 while ((RAW == 0) && (ctxt->inputNr > 1))
5295 xmlPopInput(ctxt);
5296
Daniel Veillardfdc91562002-07-01 21:52:03 +00005297 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005298 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005299 break;
5300 }
5301 }
5302
5303 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005304 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 }
5306
5307}
5308
5309/**
5310 * xmlParseReference:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse and handle entity references in content, depending on the SAX
5314 * interface, this may end-up in a call to character() if this is a
5315 * CharRef, a predefined entity, if there is no reference() callback.
5316 * or if the parser was asked to switch to that mode.
5317 *
5318 * [67] Reference ::= EntityRef | CharRef
5319 */
5320void
5321xmlParseReference(xmlParserCtxtPtr ctxt) {
5322 xmlEntityPtr ent;
5323 xmlChar *val;
5324 if (RAW != '&') return;
5325
5326 if (NXT(1) == '#') {
5327 int i = 0;
5328 xmlChar out[10];
5329 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005330 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005331
5332 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5333 /*
5334 * So we are using non-UTF-8 buffers
5335 * Check that the char fit on 8bits, if not
5336 * generate a CharRef.
5337 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005338 if (value <= 0xFF) {
5339 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005340 out[1] = 0;
5341 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5342 (!ctxt->disableSAX))
5343 ctxt->sax->characters(ctxt->userData, out, 1);
5344 } else {
5345 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005346 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005347 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005348 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005349 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5350 (!ctxt->disableSAX))
5351 ctxt->sax->reference(ctxt->userData, out);
5352 }
5353 } else {
5354 /*
5355 * Just encode the value in UTF-8
5356 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005357 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005358 out[i] = 0;
5359 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5360 (!ctxt->disableSAX))
5361 ctxt->sax->characters(ctxt->userData, out, i);
5362 }
5363 } else {
5364 ent = xmlParseEntityRef(ctxt);
5365 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005366 if (!ctxt->wellFormed)
5367 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005368 if ((ent->name != NULL) &&
5369 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5370 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005371 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005372
5373
5374 /*
5375 * The first reference to the entity trigger a parsing phase
5376 * where the ent->children is filled with the result from
5377 * the parsing.
5378 */
5379 if (ent->children == NULL) {
5380 xmlChar *value;
5381 value = ent->content;
5382
5383 /*
5384 * Check that this entity is well formed
5385 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005386 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005387 (value[1] == 0) && (value[0] == '<') &&
5388 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5389 /*
5390 * DONE: get definite answer on this !!!
5391 * Lots of entity decls are used to declare a single
5392 * char
5393 * <!ENTITY lt "<">
5394 * Which seems to be valid since
5395 * 2.4: The ampersand character (&) and the left angle
5396 * bracket (<) may appear in their literal form only
5397 * when used ... They are also legal within the literal
5398 * entity value of an internal entity declaration;i
5399 * see "4.3.2 Well-Formed Parsed Entities".
5400 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5401 * Looking at the OASIS test suite and James Clark
5402 * tests, this is broken. However the XML REC uses
5403 * it. Is the XML REC not well-formed ????
5404 * This is a hack to avoid this problem
5405 *
5406 * ANSWER: since lt gt amp .. are already defined,
5407 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005408 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005409 * is lousy but acceptable.
5410 */
5411 list = xmlNewDocText(ctxt->myDoc, value);
5412 if (list != NULL) {
5413 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5414 (ent->children == NULL)) {
5415 ent->children = list;
5416 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005417 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005418 list->parent = (xmlNodePtr) ent;
5419 } else {
5420 xmlFreeNodeList(list);
5421 }
5422 } else if (list != NULL) {
5423 xmlFreeNodeList(list);
5424 }
5425 } else {
5426 /*
5427 * 4.3.2: An internal general parsed entity is well-formed
5428 * if its replacement text matches the production labeled
5429 * content.
5430 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005431
5432 void *user_data;
5433 /*
5434 * This is a bit hackish but this seems the best
5435 * way to make sure both SAX and DOM entity support
5436 * behaves okay.
5437 */
5438 if (ctxt->userData == ctxt)
5439 user_data = NULL;
5440 else
5441 user_data = ctxt->userData;
5442
Owen Taylor3473f882001-02-23 17:55:21 +00005443 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5444 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005445 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5446 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005447 ctxt->depth--;
5448 } else if (ent->etype ==
5449 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5450 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005451 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005452 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005453 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005454 ctxt->depth--;
5455 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005456 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005457 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5458 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 }
5460 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005461 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005462 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005463 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005464 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5465 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005466 (ent->children == NULL)) {
5467 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005468 if (ctxt->replaceEntities) {
5469 /*
5470 * Prune it directly in the generated document
5471 * except for single text nodes.
5472 */
5473 if ((list->type == XML_TEXT_NODE) &&
5474 (list->next == NULL)) {
5475 list->parent = (xmlNodePtr) ent;
5476 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005477 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005478 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005479 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005480 while (list != NULL) {
5481 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005482 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005483 if (list->next == NULL)
5484 ent->last = list;
5485 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005486 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005487 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005488#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005489 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5490 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005491#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005492 }
5493 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005494 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005495 while (list != NULL) {
5496 list->parent = (xmlNodePtr) ent;
5497 if (list->next == NULL)
5498 ent->last = list;
5499 list = list->next;
5500 }
Owen Taylor3473f882001-02-23 17:55:21 +00005501 }
5502 } else {
5503 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005504 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005505 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005506 } else if ((ret != XML_ERR_OK) &&
5507 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005509 } else if (list != NULL) {
5510 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005511 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005512 }
5513 }
5514 }
5515 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5516 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5517 /*
5518 * Create a node.
5519 */
5520 ctxt->sax->reference(ctxt->userData, ent->name);
5521 return;
5522 } else if (ctxt->replaceEntities) {
5523 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5524 /*
5525 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005526 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005527 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005528 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005529 if ((list == NULL) && (ent->owner == 0)) {
5530 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005531 cur = ent->children;
5532 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005533 nw = xmlCopyNode(cur, 1);
5534 if (nw != NULL) {
5535 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005536 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005537 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005538 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005539 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005540 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005541 if (cur == ent->last)
5542 break;
5543 cur = cur->next;
5544 }
Daniel Veillard81273902003-09-30 00:43:48 +00005545#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005546 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005547 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005548#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005549 } else if (list == NULL) {
5550 xmlNodePtr nw = NULL, cur, next, last,
5551 firstChild = NULL;
5552 /*
5553 * Copy the entity child list and make it the new
5554 * entity child list. The goal is to make sure any
5555 * ID or REF referenced will be the one from the
5556 * document content and not the entity copy.
5557 */
5558 cur = ent->children;
5559 ent->children = NULL;
5560 last = ent->last;
5561 ent->last = NULL;
5562 while (cur != NULL) {
5563 next = cur->next;
5564 cur->next = NULL;
5565 cur->parent = NULL;
5566 nw = xmlCopyNode(cur, 1);
5567 if (nw != NULL) {
5568 nw->_private = cur->_private;
5569 if (firstChild == NULL){
5570 firstChild = cur;
5571 }
5572 xmlAddChild((xmlNodePtr) ent, nw);
5573 xmlAddChild(ctxt->node, cur);
5574 }
5575 if (cur == last)
5576 break;
5577 cur = next;
5578 }
5579 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005580#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005581 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5582 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005583#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005584 } else {
5585 /*
5586 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005587 * node with a possible previous text one which
5588 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005589 */
5590 if (ent->children->type == XML_TEXT_NODE)
5591 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5592 if ((ent->last != ent->children) &&
5593 (ent->last->type == XML_TEXT_NODE))
5594 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5595 xmlAddChildList(ctxt->node, ent->children);
5596 }
5597
Owen Taylor3473f882001-02-23 17:55:21 +00005598 /*
5599 * This is to avoid a nasty side effect, see
5600 * characters() in SAX.c
5601 */
5602 ctxt->nodemem = 0;
5603 ctxt->nodelen = 0;
5604 return;
5605 } else {
5606 /*
5607 * Probably running in SAX mode
5608 */
5609 xmlParserInputPtr input;
5610
5611 input = xmlNewEntityInputStream(ctxt, ent);
5612 xmlPushInput(ctxt, input);
5613 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005614 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5615 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 xmlParseTextDecl(ctxt);
5617 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5618 /*
5619 * The XML REC instructs us to stop parsing right here
5620 */
5621 ctxt->instate = XML_PARSER_EOF;
5622 return;
5623 }
5624 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005625 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5626 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005627 }
5628 }
5629 return;
5630 }
5631 }
5632 } else {
5633 val = ent->content;
5634 if (val == NULL) return;
5635 /*
5636 * inline the entity.
5637 */
5638 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5639 (!ctxt->disableSAX))
5640 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5641 }
5642 }
5643}
5644
5645/**
5646 * xmlParseEntityRef:
5647 * @ctxt: an XML parser context
5648 *
5649 * parse ENTITY references declarations
5650 *
5651 * [68] EntityRef ::= '&' Name ';'
5652 *
5653 * [ WFC: Entity Declared ]
5654 * In a document without any DTD, a document with only an internal DTD
5655 * subset which contains no parameter entity references, or a document
5656 * with "standalone='yes'", the Name given in the entity reference
5657 * must match that in an entity declaration, except that well-formed
5658 * documents need not declare any of the following entities: amp, lt,
5659 * gt, apos, quot. The declaration of a parameter entity must precede
5660 * any reference to it. Similarly, the declaration of a general entity
5661 * must precede any reference to it which appears in a default value in an
5662 * attribute-list declaration. Note that if entities are declared in the
5663 * external subset or in external parameter entities, a non-validating
5664 * processor is not obligated to read and process their declarations;
5665 * for such documents, the rule that an entity must be declared is a
5666 * well-formedness constraint only if standalone='yes'.
5667 *
5668 * [ WFC: Parsed Entity ]
5669 * An entity reference must not contain the name of an unparsed entity
5670 *
5671 * Returns the xmlEntityPtr if found, or NULL otherwise.
5672 */
5673xmlEntityPtr
5674xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005675 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005676 xmlEntityPtr ent = NULL;
5677
5678 GROW;
5679
5680 if (RAW == '&') {
5681 NEXT;
5682 name = xmlParseName(ctxt);
5683 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005684 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5685 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005686 } else {
5687 if (RAW == ';') {
5688 NEXT;
5689 /*
5690 * Ask first SAX for entity resolution, otherwise try the
5691 * predefined set.
5692 */
5693 if (ctxt->sax != NULL) {
5694 if (ctxt->sax->getEntity != NULL)
5695 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005696 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005697 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005698 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5699 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005700 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005701 }
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703 /*
5704 * [ WFC: Entity Declared ]
5705 * In a document without any DTD, a document with only an
5706 * internal DTD subset which contains no parameter entity
5707 * references, or a document with "standalone='yes'", the
5708 * Name given in the entity reference must match that in an
5709 * entity declaration, except that well-formed documents
5710 * need not declare any of the following entities: amp, lt,
5711 * gt, apos, quot.
5712 * The declaration of a parameter entity must precede any
5713 * reference to it.
5714 * Similarly, the declaration of a general entity must
5715 * precede any reference to it which appears in a default
5716 * value in an attribute-list declaration. Note that if
5717 * entities are declared in the external subset or in
5718 * external parameter entities, a non-validating processor
5719 * is not obligated to read and process their declarations;
5720 * for such documents, the rule that an entity must be
5721 * declared is a well-formedness constraint only if
5722 * standalone='yes'.
5723 */
5724 if (ent == NULL) {
5725 if ((ctxt->standalone == 1) ||
5726 ((ctxt->hasExternalSubset == 0) &&
5727 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005729 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005730 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005732 "Entity '%s' not defined\n", name);
5733 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005734 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 }
5736
5737 /*
5738 * [ WFC: Parsed Entity ]
5739 * An entity reference must not contain the name of an
5740 * unparsed entity
5741 */
5742 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005743 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005744 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 }
5746
5747 /*
5748 * [ WFC: No External Entity References ]
5749 * Attribute values cannot contain direct or indirect
5750 * entity references to external entities.
5751 */
5752 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5753 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005754 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5755 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 }
5757 /*
5758 * [ WFC: No < in Attribute Values ]
5759 * The replacement text of any entity referred to directly or
5760 * indirectly in an attribute value (other than "&lt;") must
5761 * not contain a <.
5762 */
5763 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5764 (ent != NULL) &&
5765 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5766 (ent->content != NULL) &&
5767 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005768 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005769 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
5771
5772 /*
5773 * Internal check, no parameter entities here ...
5774 */
5775 else {
5776 switch (ent->etype) {
5777 case XML_INTERNAL_PARAMETER_ENTITY:
5778 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005779 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5780 "Attempt to reference the parameter entity '%s'\n",
5781 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005782 break;
5783 default:
5784 break;
5785 }
5786 }
5787
5788 /*
5789 * [ WFC: No Recursion ]
5790 * A parsed entity must not contain a recursive reference
5791 * to itself, either directly or indirectly.
5792 * Done somewhere else
5793 */
5794
5795 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005796 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005797 }
Owen Taylor3473f882001-02-23 17:55:21 +00005798 }
5799 }
5800 return(ent);
5801}
5802
5803/**
5804 * xmlParseStringEntityRef:
5805 * @ctxt: an XML parser context
5806 * @str: a pointer to an index in the string
5807 *
5808 * parse ENTITY references declarations, but this version parses it from
5809 * a string value.
5810 *
5811 * [68] EntityRef ::= '&' Name ';'
5812 *
5813 * [ WFC: Entity Declared ]
5814 * In a document without any DTD, a document with only an internal DTD
5815 * subset which contains no parameter entity references, or a document
5816 * with "standalone='yes'", the Name given in the entity reference
5817 * must match that in an entity declaration, except that well-formed
5818 * documents need not declare any of the following entities: amp, lt,
5819 * gt, apos, quot. The declaration of a parameter entity must precede
5820 * any reference to it. Similarly, the declaration of a general entity
5821 * must precede any reference to it which appears in a default value in an
5822 * attribute-list declaration. Note that if entities are declared in the
5823 * external subset or in external parameter entities, a non-validating
5824 * processor is not obligated to read and process their declarations;
5825 * for such documents, the rule that an entity must be declared is a
5826 * well-formedness constraint only if standalone='yes'.
5827 *
5828 * [ WFC: Parsed Entity ]
5829 * An entity reference must not contain the name of an unparsed entity
5830 *
5831 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5832 * is updated to the current location in the string.
5833 */
5834xmlEntityPtr
5835xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5836 xmlChar *name;
5837 const xmlChar *ptr;
5838 xmlChar cur;
5839 xmlEntityPtr ent = NULL;
5840
5841 if ((str == NULL) || (*str == NULL))
5842 return(NULL);
5843 ptr = *str;
5844 cur = *ptr;
5845 if (cur == '&') {
5846 ptr++;
5847 cur = *ptr;
5848 name = xmlParseStringName(ctxt, &ptr);
5849 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005850 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5851 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005852 } else {
5853 if (*ptr == ';') {
5854 ptr++;
5855 /*
5856 * Ask first SAX for entity resolution, otherwise try the
5857 * predefined set.
5858 */
5859 if (ctxt->sax != NULL) {
5860 if (ctxt->sax->getEntity != NULL)
5861 ent = ctxt->sax->getEntity(ctxt->userData, name);
5862 if (ent == NULL)
5863 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005864 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005865 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005866 }
Owen Taylor3473f882001-02-23 17:55:21 +00005867 }
5868 /*
5869 * [ WFC: Entity Declared ]
5870 * In a document without any DTD, a document with only an
5871 * internal DTD subset which contains no parameter entity
5872 * references, or a document with "standalone='yes'", the
5873 * Name given in the entity reference must match that in an
5874 * entity declaration, except that well-formed documents
5875 * need not declare any of the following entities: amp, lt,
5876 * gt, apos, quot.
5877 * The declaration of a parameter entity must precede any
5878 * reference to it.
5879 * Similarly, the declaration of a general entity must
5880 * precede any reference to it which appears in a default
5881 * value in an attribute-list declaration. Note that if
5882 * entities are declared in the external subset or in
5883 * external parameter entities, a non-validating processor
5884 * is not obligated to read and process their declarations;
5885 * for such documents, the rule that an entity must be
5886 * declared is a well-formedness constraint only if
5887 * standalone='yes'.
5888 */
5889 if (ent == NULL) {
5890 if ((ctxt->standalone == 1) ||
5891 ((ctxt->hasExternalSubset == 0) &&
5892 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005893 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005894 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005895 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005896 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005897 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005898 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005900 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005901 }
5902
5903 /*
5904 * [ WFC: Parsed Entity ]
5905 * An entity reference must not contain the name of an
5906 * unparsed entity
5907 */
5908 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005909 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005910 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 }
5912
5913 /*
5914 * [ WFC: No External Entity References ]
5915 * Attribute values cannot contain direct or indirect
5916 * entity references to external entities.
5917 */
5918 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5919 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005920 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005922 }
5923 /*
5924 * [ WFC: No < in Attribute Values ]
5925 * The replacement text of any entity referred to directly or
5926 * indirectly in an attribute value (other than "&lt;") must
5927 * not contain a <.
5928 */
5929 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5930 (ent != NULL) &&
5931 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5932 (ent->content != NULL) &&
5933 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005934 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5935 "'<' in entity '%s' is not allowed in attributes values\n",
5936 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 }
5938
5939 /*
5940 * Internal check, no parameter entities here ...
5941 */
5942 else {
5943 switch (ent->etype) {
5944 case XML_INTERNAL_PARAMETER_ENTITY:
5945 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005946 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5947 "Attempt to reference the parameter entity '%s'\n",
5948 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005949 break;
5950 default:
5951 break;
5952 }
5953 }
5954
5955 /*
5956 * [ WFC: No Recursion ]
5957 * A parsed entity must not contain a recursive reference
5958 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005959 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005960 */
5961
5962 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005963 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005964 }
5965 xmlFree(name);
5966 }
5967 }
5968 *str = ptr;
5969 return(ent);
5970}
5971
5972/**
5973 * xmlParsePEReference:
5974 * @ctxt: an XML parser context
5975 *
5976 * parse PEReference declarations
5977 * The entity content is handled directly by pushing it's content as
5978 * a new input stream.
5979 *
5980 * [69] PEReference ::= '%' Name ';'
5981 *
5982 * [ WFC: No Recursion ]
5983 * A parsed entity must not contain a recursive
5984 * reference to itself, either directly or indirectly.
5985 *
5986 * [ WFC: Entity Declared ]
5987 * In a document without any DTD, a document with only an internal DTD
5988 * subset which contains no parameter entity references, or a document
5989 * with "standalone='yes'", ... ... The declaration of a parameter
5990 * entity must precede any reference to it...
5991 *
5992 * [ VC: Entity Declared ]
5993 * In a document with an external subset or external parameter entities
5994 * with "standalone='no'", ... ... The declaration of a parameter entity
5995 * must precede any reference to it...
5996 *
5997 * [ WFC: In DTD ]
5998 * Parameter-entity references may only appear in the DTD.
5999 * NOTE: misleading but this is handled.
6000 */
6001void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006002xmlParsePEReference(xmlParserCtxtPtr ctxt)
6003{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006004 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006005 xmlEntityPtr entity = NULL;
6006 xmlParserInputPtr input;
6007
6008 if (RAW == '%') {
6009 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006010 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006011 if (name == NULL) {
6012 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6013 "xmlParsePEReference: no name\n");
6014 } else {
6015 if (RAW == ';') {
6016 NEXT;
6017 if ((ctxt->sax != NULL) &&
6018 (ctxt->sax->getParameterEntity != NULL))
6019 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6020 name);
6021 if (entity == NULL) {
6022 /*
6023 * [ WFC: Entity Declared ]
6024 * In a document without any DTD, a document with only an
6025 * internal DTD subset which contains no parameter entity
6026 * references, or a document with "standalone='yes'", ...
6027 * ... The declaration of a parameter entity must precede
6028 * any reference to it...
6029 */
6030 if ((ctxt->standalone == 1) ||
6031 ((ctxt->hasExternalSubset == 0) &&
6032 (ctxt->hasPErefs == 0))) {
6033 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6034 "PEReference: %%%s; not found\n",
6035 name);
6036 } else {
6037 /*
6038 * [ VC: Entity Declared ]
6039 * In a document with an external subset or external
6040 * parameter entities with "standalone='no'", ...
6041 * ... The declaration of a parameter entity must
6042 * precede any reference to it...
6043 */
6044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6045 "PEReference: %%%s; not found\n",
6046 name, NULL);
6047 ctxt->valid = 0;
6048 }
6049 } else {
6050 /*
6051 * Internal checking in case the entity quest barfed
6052 */
6053 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6054 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6055 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6056 "Internal: %%%s; is not a parameter entity\n",
6057 name, NULL);
6058 } else if (ctxt->input->free != deallocblankswrapper) {
6059 input =
6060 xmlNewBlanksWrapperInputStream(ctxt, entity);
6061 xmlPushInput(ctxt, input);
6062 } else {
6063 /*
6064 * TODO !!!
6065 * handle the extra spaces added before and after
6066 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6067 */
6068 input = xmlNewEntityInputStream(ctxt, entity);
6069 xmlPushInput(ctxt, input);
6070 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006071 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006072 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006073 xmlParseTextDecl(ctxt);
6074 if (ctxt->errNo ==
6075 XML_ERR_UNSUPPORTED_ENCODING) {
6076 /*
6077 * The XML REC instructs us to stop parsing
6078 * right here
6079 */
6080 ctxt->instate = XML_PARSER_EOF;
6081 return;
6082 }
6083 }
6084 }
6085 }
6086 ctxt->hasPErefs = 1;
6087 } else {
6088 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6089 }
6090 }
Owen Taylor3473f882001-02-23 17:55:21 +00006091 }
6092}
6093
6094/**
6095 * xmlParseStringPEReference:
6096 * @ctxt: an XML parser context
6097 * @str: a pointer to an index in the string
6098 *
6099 * parse PEReference declarations
6100 *
6101 * [69] PEReference ::= '%' Name ';'
6102 *
6103 * [ WFC: No Recursion ]
6104 * A parsed entity must not contain a recursive
6105 * reference to itself, either directly or indirectly.
6106 *
6107 * [ WFC: Entity Declared ]
6108 * In a document without any DTD, a document with only an internal DTD
6109 * subset which contains no parameter entity references, or a document
6110 * with "standalone='yes'", ... ... The declaration of a parameter
6111 * entity must precede any reference to it...
6112 *
6113 * [ VC: Entity Declared ]
6114 * In a document with an external subset or external parameter entities
6115 * with "standalone='no'", ... ... The declaration of a parameter entity
6116 * must precede any reference to it...
6117 *
6118 * [ WFC: In DTD ]
6119 * Parameter-entity references may only appear in the DTD.
6120 * NOTE: misleading but this is handled.
6121 *
6122 * Returns the string of the entity content.
6123 * str is updated to the current value of the index
6124 */
6125xmlEntityPtr
6126xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6127 const xmlChar *ptr;
6128 xmlChar cur;
6129 xmlChar *name;
6130 xmlEntityPtr entity = NULL;
6131
6132 if ((str == NULL) || (*str == NULL)) return(NULL);
6133 ptr = *str;
6134 cur = *ptr;
6135 if (cur == '%') {
6136 ptr++;
6137 cur = *ptr;
6138 name = xmlParseStringName(ctxt, &ptr);
6139 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006140 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6141 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006142 } else {
6143 cur = *ptr;
6144 if (cur == ';') {
6145 ptr++;
6146 cur = *ptr;
6147 if ((ctxt->sax != NULL) &&
6148 (ctxt->sax->getParameterEntity != NULL))
6149 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6150 name);
6151 if (entity == NULL) {
6152 /*
6153 * [ WFC: Entity Declared ]
6154 * In a document without any DTD, a document with only an
6155 * internal DTD subset which contains no parameter entity
6156 * references, or a document with "standalone='yes'", ...
6157 * ... The declaration of a parameter entity must precede
6158 * any reference to it...
6159 */
6160 if ((ctxt->standalone == 1) ||
6161 ((ctxt->hasExternalSubset == 0) &&
6162 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006163 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006164 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 } else {
6166 /*
6167 * [ VC: Entity Declared ]
6168 * In a document with an external subset or external
6169 * parameter entities with "standalone='no'", ...
6170 * ... The declaration of a parameter entity must
6171 * precede any reference to it...
6172 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006173 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6174 "PEReference: %%%s; not found\n",
6175 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006176 ctxt->valid = 0;
6177 }
6178 } else {
6179 /*
6180 * Internal checking in case the entity quest barfed
6181 */
6182 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6183 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006184 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6185 "%%%s; is not a parameter entity\n",
6186 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006187 }
6188 }
6189 ctxt->hasPErefs = 1;
6190 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006191 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006192 }
6193 xmlFree(name);
6194 }
6195 }
6196 *str = ptr;
6197 return(entity);
6198}
6199
6200/**
6201 * xmlParseDocTypeDecl:
6202 * @ctxt: an XML parser context
6203 *
6204 * parse a DOCTYPE declaration
6205 *
6206 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6207 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6208 *
6209 * [ VC: Root Element Type ]
6210 * The Name in the document type declaration must match the element
6211 * type of the root element.
6212 */
6213
6214void
6215xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006216 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006217 xmlChar *ExternalID = NULL;
6218 xmlChar *URI = NULL;
6219
6220 /*
6221 * We know that '<!DOCTYPE' has been detected.
6222 */
6223 SKIP(9);
6224
6225 SKIP_BLANKS;
6226
6227 /*
6228 * Parse the DOCTYPE name.
6229 */
6230 name = xmlParseName(ctxt);
6231 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006232 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6233 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006234 }
6235 ctxt->intSubName = name;
6236
6237 SKIP_BLANKS;
6238
6239 /*
6240 * Check for SystemID and ExternalID
6241 */
6242 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6243
6244 if ((URI != NULL) || (ExternalID != NULL)) {
6245 ctxt->hasExternalSubset = 1;
6246 }
6247 ctxt->extSubURI = URI;
6248 ctxt->extSubSystem = ExternalID;
6249
6250 SKIP_BLANKS;
6251
6252 /*
6253 * Create and update the internal subset.
6254 */
6255 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6256 (!ctxt->disableSAX))
6257 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6258
6259 /*
6260 * Is there any internal subset declarations ?
6261 * they are handled separately in xmlParseInternalSubset()
6262 */
6263 if (RAW == '[')
6264 return;
6265
6266 /*
6267 * We should be at the end of the DOCTYPE declaration.
6268 */
6269 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006270 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
6272 NEXT;
6273}
6274
6275/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006276 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006277 * @ctxt: an XML parser context
6278 *
6279 * parse the internal subset declaration
6280 *
6281 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6282 */
6283
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006284static void
Owen Taylor3473f882001-02-23 17:55:21 +00006285xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6286 /*
6287 * Is there any DTD definition ?
6288 */
6289 if (RAW == '[') {
6290 ctxt->instate = XML_PARSER_DTD;
6291 NEXT;
6292 /*
6293 * Parse the succession of Markup declarations and
6294 * PEReferences.
6295 * Subsequence (markupdecl | PEReference | S)*
6296 */
6297 while (RAW != ']') {
6298 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006299 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006300
6301 SKIP_BLANKS;
6302 xmlParseMarkupDecl(ctxt);
6303 xmlParsePEReference(ctxt);
6304
6305 /*
6306 * Pop-up of finished entities.
6307 */
6308 while ((RAW == 0) && (ctxt->inputNr > 1))
6309 xmlPopInput(ctxt);
6310
6311 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006313 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006314 break;
6315 }
6316 }
6317 if (RAW == ']') {
6318 NEXT;
6319 SKIP_BLANKS;
6320 }
6321 }
6322
6323 /*
6324 * We should be at the end of the DOCTYPE declaration.
6325 */
6326 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006327 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 }
6329 NEXT;
6330}
6331
Daniel Veillard81273902003-09-30 00:43:48 +00006332#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006333/**
6334 * xmlParseAttribute:
6335 * @ctxt: an XML parser context
6336 * @value: a xmlChar ** used to store the value of the attribute
6337 *
6338 * parse an attribute
6339 *
6340 * [41] Attribute ::= Name Eq AttValue
6341 *
6342 * [ WFC: No External Entity References ]
6343 * Attribute values cannot contain direct or indirect entity references
6344 * to external entities.
6345 *
6346 * [ WFC: No < in Attribute Values ]
6347 * The replacement text of any entity referred to directly or indirectly in
6348 * an attribute value (other than "&lt;") must not contain a <.
6349 *
6350 * [ VC: Attribute Value Type ]
6351 * The attribute must have been declared; the value must be of the type
6352 * declared for it.
6353 *
6354 * [25] Eq ::= S? '=' S?
6355 *
6356 * With namespace:
6357 *
6358 * [NS 11] Attribute ::= QName Eq AttValue
6359 *
6360 * Also the case QName == xmlns:??? is handled independently as a namespace
6361 * definition.
6362 *
6363 * Returns the attribute name, and the value in *value.
6364 */
6365
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006366const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006367xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006368 const xmlChar *name;
6369 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006370
6371 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006372 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 name = xmlParseName(ctxt);
6374 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006375 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006376 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006377 return(NULL);
6378 }
6379
6380 /*
6381 * read the value
6382 */
6383 SKIP_BLANKS;
6384 if (RAW == '=') {
6385 NEXT;
6386 SKIP_BLANKS;
6387 val = xmlParseAttValue(ctxt);
6388 ctxt->instate = XML_PARSER_CONTENT;
6389 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006390 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006391 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006392 return(NULL);
6393 }
6394
6395 /*
6396 * Check that xml:lang conforms to the specification
6397 * No more registered as an error, just generate a warning now
6398 * since this was deprecated in XML second edition
6399 */
6400 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6401 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006402 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6403 "Malformed value for xml:lang : %s\n",
6404 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006405 }
6406 }
6407
6408 /*
6409 * Check that xml:space conforms to the specification
6410 */
6411 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6412 if (xmlStrEqual(val, BAD_CAST "default"))
6413 *(ctxt->space) = 0;
6414 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6415 *(ctxt->space) = 1;
6416 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006417 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006418"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006419 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 }
6422
6423 *value = val;
6424 return(name);
6425}
6426
6427/**
6428 * xmlParseStartTag:
6429 * @ctxt: an XML parser context
6430 *
6431 * parse a start of tag either for rule element or
6432 * EmptyElement. In both case we don't parse the tag closing chars.
6433 *
6434 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6435 *
6436 * [ WFC: Unique Att Spec ]
6437 * No attribute name may appear more than once in the same start-tag or
6438 * empty-element tag.
6439 *
6440 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6441 *
6442 * [ WFC: Unique Att Spec ]
6443 * No attribute name may appear more than once in the same start-tag or
6444 * empty-element tag.
6445 *
6446 * With namespace:
6447 *
6448 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6449 *
6450 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6451 *
6452 * Returns the element name parsed
6453 */
6454
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006455const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006456xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006457 const xmlChar *name;
6458 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006459 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006460 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006461 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006462 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006463 int i;
6464
6465 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006466 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006467
6468 name = xmlParseName(ctxt);
6469 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006470 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006471 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006472 return(NULL);
6473 }
6474
6475 /*
6476 * Now parse the attributes, it ends up with the ending
6477 *
6478 * (S Attribute)* S?
6479 */
6480 SKIP_BLANKS;
6481 GROW;
6482
Daniel Veillard21a0f912001-02-25 19:54:14 +00006483 while ((RAW != '>') &&
6484 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006485 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006486 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006487 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006488
6489 attname = xmlParseAttribute(ctxt, &attvalue);
6490 if ((attname != NULL) && (attvalue != NULL)) {
6491 /*
6492 * [ WFC: Unique Att Spec ]
6493 * No attribute name may appear more than once in the same
6494 * start-tag or empty-element tag.
6495 */
6496 for (i = 0; i < nbatts;i += 2) {
6497 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006498 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006499 xmlFree(attvalue);
6500 goto failed;
6501 }
6502 }
Owen Taylor3473f882001-02-23 17:55:21 +00006503 /*
6504 * Add the pair to atts
6505 */
6506 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006507 maxatts = 22; /* allow for 10 attrs by default */
6508 atts = (const xmlChar **)
6509 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006510 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006511 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006512 if (attvalue != NULL)
6513 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006514 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006515 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006516 ctxt->atts = atts;
6517 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006518 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006519 const xmlChar **n;
6520
Owen Taylor3473f882001-02-23 17:55:21 +00006521 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006522 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006523 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006524 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006525 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006526 if (attvalue != NULL)
6527 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006528 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006529 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006530 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006531 ctxt->atts = atts;
6532 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
6534 atts[nbatts++] = attname;
6535 atts[nbatts++] = attvalue;
6536 atts[nbatts] = NULL;
6537 atts[nbatts + 1] = NULL;
6538 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006539 if (attvalue != NULL)
6540 xmlFree(attvalue);
6541 }
6542
6543failed:
6544
Daniel Veillard3772de32002-12-17 10:31:45 +00006545 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006546 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6547 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006548 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006549 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6550 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006551 }
6552 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006553 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6554 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6556 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006557 break;
6558 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006559 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006560 GROW;
6561 }
6562
6563 /*
6564 * SAX: Start of Element !
6565 */
6566 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006567 (!ctxt->disableSAX)) {
6568 if (nbatts > 0)
6569 ctxt->sax->startElement(ctxt->userData, name, atts);
6570 else
6571 ctxt->sax->startElement(ctxt->userData, name, NULL);
6572 }
Owen Taylor3473f882001-02-23 17:55:21 +00006573
6574 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006575 /* Free only the content strings */
6576 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006577 if (atts[i] != NULL)
6578 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580 return(name);
6581}
6582
6583/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006584 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006585 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006586 * @line: line of the start tag
6587 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006588 *
6589 * parse an end of tag
6590 *
6591 * [42] ETag ::= '</' Name S? '>'
6592 *
6593 * With namespace
6594 *
6595 * [NS 9] ETag ::= '</' QName S? '>'
6596 */
6597
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006598static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006599xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006600 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006601
6602 GROW;
6603 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006604 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006605 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006606 return;
6607 }
6608 SKIP(2);
6609
Daniel Veillard46de64e2002-05-29 08:21:33 +00006610 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006611
6612 /*
6613 * We should definitely be at the ending "S? '>'" part
6614 */
6615 GROW;
6616 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006617 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006618 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006619 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006620 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006621
6622 /*
6623 * [ WFC: Element Type Match ]
6624 * The Name in an element's end-tag must match the element type in the
6625 * start-tag.
6626 *
6627 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006628 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006629 if (name == NULL) name = BAD_CAST "unparseable";
6630 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006631 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006632 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006633 }
6634
6635 /*
6636 * SAX: End of Tag
6637 */
6638 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6639 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006640 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006641
Daniel Veillarde57ec792003-09-10 10:50:59 +00006642 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006643 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006644 return;
6645}
6646
6647/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006648 * xmlParseEndTag:
6649 * @ctxt: an XML parser context
6650 *
6651 * parse an end of tag
6652 *
6653 * [42] ETag ::= '</' Name S? '>'
6654 *
6655 * With namespace
6656 *
6657 * [NS 9] ETag ::= '</' QName S? '>'
6658 */
6659
6660void
6661xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006662 xmlParseEndTag1(ctxt, 0);
6663}
Daniel Veillard81273902003-09-30 00:43:48 +00006664#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006665
6666/************************************************************************
6667 * *
6668 * SAX 2 specific operations *
6669 * *
6670 ************************************************************************/
6671
6672static const xmlChar *
6673xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6674 int len = 0, l;
6675 int c;
6676 int count = 0;
6677
6678 /*
6679 * Handler for more complex cases
6680 */
6681 GROW;
6682 c = CUR_CHAR(l);
6683 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006684 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006685 return(NULL);
6686 }
6687
6688 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006689 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006690 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006691 (IS_COMBINING(c)) ||
6692 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006693 if (count++ > 100) {
6694 count = 0;
6695 GROW;
6696 }
6697 len += l;
6698 NEXTL(l);
6699 c = CUR_CHAR(l);
6700 }
6701 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6702}
6703
6704/*
6705 * xmlGetNamespace:
6706 * @ctxt: an XML parser context
6707 * @prefix: the prefix to lookup
6708 *
6709 * Lookup the namespace name for the @prefix (which ca be NULL)
6710 * The prefix must come from the @ctxt->dict dictionnary
6711 *
6712 * Returns the namespace name or NULL if not bound
6713 */
6714static const xmlChar *
6715xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6716 int i;
6717
Daniel Veillarde57ec792003-09-10 10:50:59 +00006718 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006719 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006720 if (ctxt->nsTab[i] == prefix) {
6721 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6722 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006723 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006724 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006725 return(NULL);
6726}
6727
6728/**
6729 * xmlParseNCName:
6730 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006731 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006732 *
6733 * parse an XML name.
6734 *
6735 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6736 * CombiningChar | Extender
6737 *
6738 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6739 *
6740 * Returns the Name parsed or NULL
6741 */
6742
6743static const xmlChar *
6744xmlParseNCName(xmlParserCtxtPtr ctxt) {
6745 const xmlChar *in;
6746 const xmlChar *ret;
6747 int count = 0;
6748
6749 /*
6750 * Accelerator for simple ASCII names
6751 */
6752 in = ctxt->input->cur;
6753 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6754 ((*in >= 0x41) && (*in <= 0x5A)) ||
6755 (*in == '_')) {
6756 in++;
6757 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6758 ((*in >= 0x41) && (*in <= 0x5A)) ||
6759 ((*in >= 0x30) && (*in <= 0x39)) ||
6760 (*in == '_') || (*in == '-') ||
6761 (*in == '.'))
6762 in++;
6763 if ((*in > 0) && (*in < 0x80)) {
6764 count = in - ctxt->input->cur;
6765 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6766 ctxt->input->cur = in;
6767 ctxt->nbChars += count;
6768 ctxt->input->col += count;
6769 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006770 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006771 }
6772 return(ret);
6773 }
6774 }
6775 return(xmlParseNCNameComplex(ctxt));
6776}
6777
6778/**
6779 * xmlParseQName:
6780 * @ctxt: an XML parser context
6781 * @prefix: pointer to store the prefix part
6782 *
6783 * parse an XML Namespace QName
6784 *
6785 * [6] QName ::= (Prefix ':')? LocalPart
6786 * [7] Prefix ::= NCName
6787 * [8] LocalPart ::= NCName
6788 *
6789 * Returns the Name parsed or NULL
6790 */
6791
6792static const xmlChar *
6793xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6794 const xmlChar *l, *p;
6795
6796 GROW;
6797
6798 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006799 if (l == NULL) {
6800 if (CUR == ':') {
6801 l = xmlParseName(ctxt);
6802 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006803 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6804 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006805 *prefix = NULL;
6806 return(l);
6807 }
6808 }
6809 return(NULL);
6810 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006811 if (CUR == ':') {
6812 NEXT;
6813 p = l;
6814 l = xmlParseNCName(ctxt);
6815 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006816 xmlChar *tmp;
6817
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006818 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6819 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006820 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6821 p = xmlDictLookup(ctxt->dict, tmp, -1);
6822 if (tmp != NULL) xmlFree(tmp);
6823 *prefix = NULL;
6824 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006825 }
6826 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006827 xmlChar *tmp;
6828
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006829 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6830 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006831 NEXT;
6832 tmp = (xmlChar *) xmlParseName(ctxt);
6833 if (tmp != NULL) {
6834 tmp = xmlBuildQName(tmp, l, NULL, 0);
6835 l = xmlDictLookup(ctxt->dict, tmp, -1);
6836 if (tmp != NULL) xmlFree(tmp);
6837 *prefix = p;
6838 return(l);
6839 }
6840 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6841 l = xmlDictLookup(ctxt->dict, tmp, -1);
6842 if (tmp != NULL) xmlFree(tmp);
6843 *prefix = p;
6844 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006845 }
6846 *prefix = p;
6847 } else
6848 *prefix = NULL;
6849 return(l);
6850}
6851
6852/**
6853 * xmlParseQNameAndCompare:
6854 * @ctxt: an XML parser context
6855 * @name: the localname
6856 * @prefix: the prefix, if any.
6857 *
6858 * parse an XML name and compares for match
6859 * (specialized for endtag parsing)
6860 *
6861 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6862 * and the name for mismatch
6863 */
6864
6865static const xmlChar *
6866xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6867 xmlChar const *prefix) {
6868 const xmlChar *cmp = name;
6869 const xmlChar *in;
6870 const xmlChar *ret;
6871 const xmlChar *prefix2;
6872
6873 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6874
6875 GROW;
6876 in = ctxt->input->cur;
6877
6878 cmp = prefix;
6879 while (*in != 0 && *in == *cmp) {
6880 ++in;
6881 ++cmp;
6882 }
6883 if ((*cmp == 0) && (*in == ':')) {
6884 in++;
6885 cmp = name;
6886 while (*in != 0 && *in == *cmp) {
6887 ++in;
6888 ++cmp;
6889 }
William M. Brack76e95df2003-10-18 16:20:14 +00006890 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006891 /* success */
6892 ctxt->input->cur = in;
6893 return((const xmlChar*) 1);
6894 }
6895 }
6896 /*
6897 * all strings coms from the dictionary, equality can be done directly
6898 */
6899 ret = xmlParseQName (ctxt, &prefix2);
6900 if ((ret == name) && (prefix == prefix2))
6901 return((const xmlChar*) 1);
6902 return ret;
6903}
6904
6905/**
6906 * xmlParseAttValueInternal:
6907 * @ctxt: an XML parser context
6908 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006909 * @alloc: whether the attribute was reallocated as a new string
6910 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006911 *
6912 * parse a value for an attribute.
6913 * NOTE: if no normalization is needed, the routine will return pointers
6914 * directly from the data buffer.
6915 *
6916 * 3.3.3 Attribute-Value Normalization:
6917 * Before the value of an attribute is passed to the application or
6918 * checked for validity, the XML processor must normalize it as follows:
6919 * - a character reference is processed by appending the referenced
6920 * character to the attribute value
6921 * - an entity reference is processed by recursively processing the
6922 * replacement text of the entity
6923 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6924 * appending #x20 to the normalized value, except that only a single
6925 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6926 * parsed entity or the literal entity value of an internal parsed entity
6927 * - other characters are processed by appending them to the normalized value
6928 * If the declared value is not CDATA, then the XML processor must further
6929 * process the normalized attribute value by discarding any leading and
6930 * trailing space (#x20) characters, and by replacing sequences of space
6931 * (#x20) characters by a single space (#x20) character.
6932 * All attributes for which no declaration has been read should be treated
6933 * by a non-validating parser as if declared CDATA.
6934 *
6935 * Returns the AttValue parsed or NULL. The value has to be freed by the
6936 * caller if it was copied, this can be detected by val[*len] == 0.
6937 */
6938
6939static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006940xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6941 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006942{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006943 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006944 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006945 xmlChar *ret = NULL;
6946
6947 GROW;
6948 in = (xmlChar *) CUR_PTR;
6949 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006950 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006951 return (NULL);
6952 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006953 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006954
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006955 /*
6956 * try to handle in this routine the most common case where no
6957 * allocation of a new string is required and where content is
6958 * pure ASCII.
6959 */
6960 limit = *in++;
6961 end = ctxt->input->end;
6962 start = in;
6963 if (in >= end) {
6964 const xmlChar *oldbase = ctxt->input->base;
6965 GROW;
6966 if (oldbase != ctxt->input->base) {
6967 long delta = ctxt->input->base - oldbase;
6968 start = start + delta;
6969 in = in + delta;
6970 }
6971 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006972 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006973 if (normalize) {
6974 /*
6975 * Skip any leading spaces
6976 */
6977 while ((in < end) && (*in != limit) &&
6978 ((*in == 0x20) || (*in == 0x9) ||
6979 (*in == 0xA) || (*in == 0xD))) {
6980 in++;
6981 start = in;
6982 if (in >= end) {
6983 const xmlChar *oldbase = ctxt->input->base;
6984 GROW;
6985 if (oldbase != ctxt->input->base) {
6986 long delta = ctxt->input->base - oldbase;
6987 start = start + delta;
6988 in = in + delta;
6989 }
6990 end = ctxt->input->end;
6991 }
6992 }
6993 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
6994 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
6995 if ((*in++ == 0x20) && (*in == 0x20)) break;
6996 if (in >= end) {
6997 const xmlChar *oldbase = ctxt->input->base;
6998 GROW;
6999 if (oldbase != ctxt->input->base) {
7000 long delta = ctxt->input->base - oldbase;
7001 start = start + delta;
7002 in = in + delta;
7003 }
7004 end = ctxt->input->end;
7005 }
7006 }
7007 last = in;
7008 /*
7009 * skip the trailing blanks
7010 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007011 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007012 while ((in < end) && (*in != limit) &&
7013 ((*in == 0x20) || (*in == 0x9) ||
7014 (*in == 0xA) || (*in == 0xD))) {
7015 in++;
7016 if (in >= end) {
7017 const xmlChar *oldbase = ctxt->input->base;
7018 GROW;
7019 if (oldbase != ctxt->input->base) {
7020 long delta = ctxt->input->base - oldbase;
7021 start = start + delta;
7022 in = in + delta;
7023 last = last + delta;
7024 }
7025 end = ctxt->input->end;
7026 }
7027 }
7028 if (*in != limit) goto need_complex;
7029 } else {
7030 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7031 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7032 in++;
7033 if (in >= end) {
7034 const xmlChar *oldbase = ctxt->input->base;
7035 GROW;
7036 if (oldbase != ctxt->input->base) {
7037 long delta = ctxt->input->base - oldbase;
7038 start = start + delta;
7039 in = in + delta;
7040 }
7041 end = ctxt->input->end;
7042 }
7043 }
7044 last = in;
7045 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007046 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007047 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007048 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007049 *len = last - start;
7050 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007051 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007052 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007053 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007054 }
7055 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007056 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007057 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007058need_complex:
7059 if (alloc) *alloc = 1;
7060 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007061}
7062
7063/**
7064 * xmlParseAttribute2:
7065 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007066 * @pref: the element prefix
7067 * @elem: the element name
7068 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007069 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007070 * @len: an int * to save the length of the attribute
7071 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007072 *
7073 * parse an attribute in the new SAX2 framework.
7074 *
7075 * Returns the attribute name, and the value in *value, .
7076 */
7077
7078static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007079xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7080 const xmlChar *pref, const xmlChar *elem,
7081 const xmlChar **prefix, xmlChar **value,
7082 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007083 const xmlChar *name;
7084 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007085 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007086
7087 *value = NULL;
7088 GROW;
7089 name = xmlParseQName(ctxt, prefix);
7090 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7092 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007093 return(NULL);
7094 }
7095
7096 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007097 * get the type if needed
7098 */
7099 if (ctxt->attsSpecial != NULL) {
7100 int type;
7101
7102 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7103 pref, elem, *prefix, name);
7104 if (type != 0) normalize = 1;
7105 }
7106
7107 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007108 * read the value
7109 */
7110 SKIP_BLANKS;
7111 if (RAW == '=') {
7112 NEXT;
7113 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007114 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007115 ctxt->instate = XML_PARSER_CONTENT;
7116 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007117 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007118 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007119 return(NULL);
7120 }
7121
7122 /*
7123 * Check that xml:lang conforms to the specification
7124 * No more registered as an error, just generate a warning now
7125 * since this was deprecated in XML second edition
7126 */
7127 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7128 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007129 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7130 "Malformed value for xml:lang : %s\n",
7131 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007132 }
7133 }
7134
7135 /*
7136 * Check that xml:space conforms to the specification
7137 */
7138 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7139 if (xmlStrEqual(val, BAD_CAST "default"))
7140 *(ctxt->space) = 0;
7141 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7142 *(ctxt->space) = 1;
7143 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007144 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007145"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7146 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007147 }
7148 }
7149
7150 *value = val;
7151 return(name);
7152}
7153
7154/**
7155 * xmlParseStartTag2:
7156 * @ctxt: an XML parser context
7157 *
7158 * parse a start of tag either for rule element or
7159 * EmptyElement. In both case we don't parse the tag closing chars.
7160 * This routine is called when running SAX2 parsing
7161 *
7162 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7163 *
7164 * [ WFC: Unique Att Spec ]
7165 * No attribute name may appear more than once in the same start-tag or
7166 * empty-element tag.
7167 *
7168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7169 *
7170 * [ WFC: Unique Att Spec ]
7171 * No attribute name may appear more than once in the same start-tag or
7172 * empty-element tag.
7173 *
7174 * With namespace:
7175 *
7176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7177 *
7178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7179 *
7180 * Returns the element name parsed
7181 */
7182
7183static const xmlChar *
7184xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007185 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007186 const xmlChar *localname;
7187 const xmlChar *prefix;
7188 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007189 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190 const xmlChar *nsname;
7191 xmlChar *attvalue;
7192 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007193 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007194 int nratts, nbatts, nbdef;
7195 int i, j, nbNs, attval;
7196 const xmlChar *base;
7197 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198
7199 if (RAW != '<') return(NULL);
7200 NEXT1;
7201
7202 /*
7203 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7204 * point since the attribute values may be stored as pointers to
7205 * the buffer and calling SHRINK would destroy them !
7206 * The Shrinking is only possible once the full set of attribute
7207 * callbacks have been done.
7208 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007209reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007210 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007211 base = ctxt->input->base;
7212 cur = ctxt->input->cur - ctxt->input->base;
7213 nbatts = 0;
7214 nratts = 0;
7215 nbdef = 0;
7216 nbNs = 0;
7217 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007218
7219 localname = xmlParseQName(ctxt, &prefix);
7220 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007221 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7222 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223 return(NULL);
7224 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007225 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007226
7227 /*
7228 * Now parse the attributes, it ends up with the ending
7229 *
7230 * (S Attribute)* S?
7231 */
7232 SKIP_BLANKS;
7233 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007234 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007235
7236 while ((RAW != '>') &&
7237 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007238 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007239 const xmlChar *q = CUR_PTR;
7240 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007241 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007242
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007243 attname = xmlParseAttribute2(ctxt, prefix, localname,
7244 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007245 if ((attname != NULL) && (attvalue != NULL)) {
7246 if (len < 0) len = xmlStrlen(attvalue);
7247 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007248 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7249 xmlURIPtr uri;
7250
7251 if (*URL != 0) {
7252 uri = xmlParseURI((const char *) URL);
7253 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007254 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7255 "xmlns: %s not a valid URI\n",
7256 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007257 } else {
7258 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007259 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7260 "xmlns: URI %s is not absolute\n",
7261 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007262 }
7263 xmlFreeURI(uri);
7264 }
7265 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007267 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007268 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007269 for (j = 1;j <= nbNs;j++)
7270 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7271 break;
7272 if (j <= nbNs)
7273 xmlErrAttributeDup(ctxt, NULL, attname);
7274 else
7275 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007276 if (alloc != 0) xmlFree(attvalue);
7277 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278 continue;
7279 }
7280 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007281 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7282 xmlURIPtr uri;
7283
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007284 if (attname == ctxt->str_xml) {
7285 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007286 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7287 "xml namespace prefix mapped to wrong URI\n",
7288 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007289 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007290 /*
7291 * Do not keep a namespace definition node
7292 */
7293 if (alloc != 0) xmlFree(attvalue);
7294 SKIP_BLANKS;
7295 continue;
7296 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007297 uri = xmlParseURI((const char *) URL);
7298 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007299 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7300 "xmlns:%s: '%s' is not a valid URI\n",
7301 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007302 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007303 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007304 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7305 "xmlns:%s: URI %s is not absolute\n",
7306 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 }
7308 xmlFreeURI(uri);
7309 }
7310
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007312 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007314 for (j = 1;j <= nbNs;j++)
7315 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7316 break;
7317 if (j <= nbNs)
7318 xmlErrAttributeDup(ctxt, aprefix, attname);
7319 else
7320 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007321 if (alloc != 0) xmlFree(attvalue);
7322 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007323 continue;
7324 }
7325
7326 /*
7327 * Add the pair to atts
7328 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007329 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7330 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331 if (attvalue[len] == 0)
7332 xmlFree(attvalue);
7333 goto failed;
7334 }
7335 maxatts = ctxt->maxatts;
7336 atts = ctxt->atts;
7337 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007338 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007339 atts[nbatts++] = attname;
7340 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007341 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 atts[nbatts++] = attvalue;
7343 attvalue += len;
7344 atts[nbatts++] = attvalue;
7345 /*
7346 * tag if some deallocation is needed
7347 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007348 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007349 } else {
7350 if ((attvalue != NULL) && (attvalue[len] == 0))
7351 xmlFree(attvalue);
7352 }
7353
7354failed:
7355
7356 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007357 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007358 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7359 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007360 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007361 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7362 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007363 }
7364 SKIP_BLANKS;
7365 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7366 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007367 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007369 break;
7370 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007372 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007373 }
7374
Daniel Veillard0fb18932003-09-07 09:14:37 +00007375 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007376 * The attributes defaulting
7377 */
7378 if (ctxt->attsDefault != NULL) {
7379 xmlDefAttrsPtr defaults;
7380
7381 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7382 if (defaults != NULL) {
7383 for (i = 0;i < defaults->nbAttrs;i++) {
7384 attname = defaults->values[4 * i];
7385 aprefix = defaults->values[4 * i + 1];
7386
7387 /*
7388 * special work for namespaces defaulted defs
7389 */
7390 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7391 /*
7392 * check that it's not a defined namespace
7393 */
7394 for (j = 1;j <= nbNs;j++)
7395 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7396 break;
7397 if (j <= nbNs) continue;
7398
7399 nsname = xmlGetNamespace(ctxt, NULL);
7400 if (nsname != defaults->values[4 * i + 2]) {
7401 if (nsPush(ctxt, NULL,
7402 defaults->values[4 * i + 2]) > 0)
7403 nbNs++;
7404 }
7405 } else if (aprefix == ctxt->str_xmlns) {
7406 /*
7407 * check that it's not a defined namespace
7408 */
7409 for (j = 1;j <= nbNs;j++)
7410 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7411 break;
7412 if (j <= nbNs) continue;
7413
7414 nsname = xmlGetNamespace(ctxt, attname);
7415 if (nsname != defaults->values[2]) {
7416 if (nsPush(ctxt, attname,
7417 defaults->values[4 * i + 2]) > 0)
7418 nbNs++;
7419 }
7420 } else {
7421 /*
7422 * check that it's not a defined attribute
7423 */
7424 for (j = 0;j < nbatts;j+=5) {
7425 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7426 break;
7427 }
7428 if (j < nbatts) continue;
7429
7430 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7431 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007432 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007433 }
7434 maxatts = ctxt->maxatts;
7435 atts = ctxt->atts;
7436 }
7437 atts[nbatts++] = attname;
7438 atts[nbatts++] = aprefix;
7439 if (aprefix == NULL)
7440 atts[nbatts++] = NULL;
7441 else
7442 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7443 atts[nbatts++] = defaults->values[4 * i + 2];
7444 atts[nbatts++] = defaults->values[4 * i + 3];
7445 nbdef++;
7446 }
7447 }
7448 }
7449 }
7450
Daniel Veillarde70c8772003-11-25 07:21:18 +00007451 /*
7452 * The attributes checkings
7453 */
7454 for (i = 0; i < nbatts;i += 5) {
7455 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7456 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7457 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7458 "Namespace prefix %s for %s on %s is not defined\n",
7459 atts[i + 1], atts[i], localname);
7460 }
7461 atts[i + 2] = nsname;
7462 /*
7463 * [ WFC: Unique Att Spec ]
7464 * No attribute name may appear more than once in the same
7465 * start-tag or empty-element tag.
7466 * As extended by the Namespace in XML REC.
7467 */
7468 for (j = 0; j < i;j += 5) {
7469 if (atts[i] == atts[j]) {
7470 if (atts[i+1] == atts[j+1]) {
7471 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7472 break;
7473 }
7474 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7475 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7476 "Namespaced Attribute %s in '%s' redefined\n",
7477 atts[i], nsname, NULL);
7478 break;
7479 }
7480 }
7481 }
7482 }
7483
Daniel Veillarde57ec792003-09-10 10:50:59 +00007484 nsname = xmlGetNamespace(ctxt, prefix);
7485 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007486 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7487 "Namespace prefix %s on %s is not defined\n",
7488 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007489 }
7490 *pref = prefix;
7491 *URI = nsname;
7492
7493 /*
7494 * SAX: Start of Element !
7495 */
7496 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7497 (!ctxt->disableSAX)) {
7498 if (nbNs > 0)
7499 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7500 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7501 nbatts / 5, nbdef, atts);
7502 else
7503 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7504 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7505 }
7506
7507 /*
7508 * Free up attribute allocated strings if needed
7509 */
7510 if (attval != 0) {
7511 for (i = 3,j = 0; j < nratts;i += 5,j++)
7512 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7513 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007514 }
7515
7516 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007517
7518base_changed:
7519 /*
7520 * the attribute strings are valid iif the base didn't changed
7521 */
7522 if (attval != 0) {
7523 for (i = 3,j = 0; j < nratts;i += 5,j++)
7524 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7525 xmlFree((xmlChar *) atts[i]);
7526 }
7527 ctxt->input->cur = ctxt->input->base + cur;
7528 if (ctxt->wellFormed == 1) {
7529 goto reparse;
7530 }
7531 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007532}
7533
7534/**
7535 * xmlParseEndTag2:
7536 * @ctxt: an XML parser context
7537 * @line: line of the start tag
7538 * @nsNr: number of namespaces on the start tag
7539 *
7540 * parse an end of tag
7541 *
7542 * [42] ETag ::= '</' Name S? '>'
7543 *
7544 * With namespace
7545 *
7546 * [NS 9] ETag ::= '</' QName S? '>'
7547 */
7548
7549static void
7550xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007551 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007552 const xmlChar *name;
7553
7554 GROW;
7555 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007556 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 return;
7558 }
7559 SKIP(2);
7560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007561 if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
7562 if (ctxt->input->cur[tlen] == '>') {
7563 ctxt->input->cur += tlen + 1;
7564 goto done;
7565 }
7566 ctxt->input->cur += tlen;
7567 name = (xmlChar*)1;
7568 } else {
7569 if (prefix == NULL)
7570 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7571 else
7572 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7573 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007574
7575 /*
7576 * We should definitely be at the ending "S? '>'" part
7577 */
7578 GROW;
7579 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007580 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007581 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007582 } else
7583 NEXT1;
7584
7585 /*
7586 * [ WFC: Element Type Match ]
7587 * The Name in an element's end-tag must match the element type in the
7588 * start-tag.
7589 *
7590 */
7591 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007592 if (name == NULL) name = BAD_CAST "unparseable";
7593 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007594 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007595 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007596 }
7597
7598 /*
7599 * SAX: End of Tag
7600 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007601done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007602 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7603 (!ctxt->disableSAX))
7604 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7605
Daniel Veillard0fb18932003-09-07 09:14:37 +00007606 spacePop(ctxt);
7607 if (nsNr != 0)
7608 nsPop(ctxt, nsNr);
7609 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007610}
7611
7612/**
Owen Taylor3473f882001-02-23 17:55:21 +00007613 * xmlParseCDSect:
7614 * @ctxt: an XML parser context
7615 *
7616 * Parse escaped pure raw content.
7617 *
7618 * [18] CDSect ::= CDStart CData CDEnd
7619 *
7620 * [19] CDStart ::= '<![CDATA['
7621 *
7622 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7623 *
7624 * [21] CDEnd ::= ']]>'
7625 */
7626void
7627xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7628 xmlChar *buf = NULL;
7629 int len = 0;
7630 int size = XML_PARSER_BUFFER_SIZE;
7631 int r, rl;
7632 int s, sl;
7633 int cur, l;
7634 int count = 0;
7635
Daniel Veillard8f597c32003-10-06 08:19:27 +00007636 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007637 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007638 SKIP(9);
7639 } else
7640 return;
7641
7642 ctxt->instate = XML_PARSER_CDATA_SECTION;
7643 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007644 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007645 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007646 ctxt->instate = XML_PARSER_CONTENT;
7647 return;
7648 }
7649 NEXTL(rl);
7650 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007651 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007652 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007653 ctxt->instate = XML_PARSER_CONTENT;
7654 return;
7655 }
7656 NEXTL(sl);
7657 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007658 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007659 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007660 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007661 return;
7662 }
William M. Brack871611b2003-10-18 04:53:14 +00007663 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007664 ((r != ']') || (s != ']') || (cur != '>'))) {
7665 if (len + 5 >= size) {
7666 size *= 2;
7667 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7668 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007669 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007670 return;
7671 }
7672 }
7673 COPY_BUF(rl,buf,len,r);
7674 r = s;
7675 rl = sl;
7676 s = cur;
7677 sl = l;
7678 count++;
7679 if (count > 50) {
7680 GROW;
7681 count = 0;
7682 }
7683 NEXTL(l);
7684 cur = CUR_CHAR(l);
7685 }
7686 buf[len] = 0;
7687 ctxt->instate = XML_PARSER_CONTENT;
7688 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007689 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007690 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007691 xmlFree(buf);
7692 return;
7693 }
7694 NEXTL(l);
7695
7696 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007697 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007698 */
7699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7700 if (ctxt->sax->cdataBlock != NULL)
7701 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007702 else if (ctxt->sax->characters != NULL)
7703 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007704 }
7705 xmlFree(buf);
7706}
7707
7708/**
7709 * xmlParseContent:
7710 * @ctxt: an XML parser context
7711 *
7712 * Parse a content:
7713 *
7714 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7715 */
7716
7717void
7718xmlParseContent(xmlParserCtxtPtr ctxt) {
7719 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007720 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007721 ((RAW != '<') || (NXT(1) != '/'))) {
7722 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007723 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007724 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007725
7726 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007727 * First case : a Processing Instruction.
7728 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007729 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007730 xmlParsePI(ctxt);
7731 }
7732
7733 /*
7734 * Second case : a CDSection
7735 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007736 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007737 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007738 xmlParseCDSect(ctxt);
7739 }
7740
7741 /*
7742 * Third case : a comment
7743 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007744 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007745 (NXT(2) == '-') && (NXT(3) == '-')) {
7746 xmlParseComment(ctxt);
7747 ctxt->instate = XML_PARSER_CONTENT;
7748 }
7749
7750 /*
7751 * Fourth case : a sub-element.
7752 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007753 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007754 xmlParseElement(ctxt);
7755 }
7756
7757 /*
7758 * Fifth case : a reference. If if has not been resolved,
7759 * parsing returns it's Name, create the node
7760 */
7761
Daniel Veillard21a0f912001-02-25 19:54:14 +00007762 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007763 xmlParseReference(ctxt);
7764 }
7765
7766 /*
7767 * Last case, text. Note that References are handled directly.
7768 */
7769 else {
7770 xmlParseCharData(ctxt, 0);
7771 }
7772
7773 GROW;
7774 /*
7775 * Pop-up of finished entities.
7776 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007777 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007778 xmlPopInput(ctxt);
7779 SHRINK;
7780
Daniel Veillardfdc91562002-07-01 21:52:03 +00007781 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007782 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7783 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007784 ctxt->instate = XML_PARSER_EOF;
7785 break;
7786 }
7787 }
7788}
7789
7790/**
7791 * xmlParseElement:
7792 * @ctxt: an XML parser context
7793 *
7794 * parse an XML element, this is highly recursive
7795 *
7796 * [39] element ::= EmptyElemTag | STag content ETag
7797 *
7798 * [ WFC: Element Type Match ]
7799 * The Name in an element's end-tag must match the element type in the
7800 * start-tag.
7801 *
Owen Taylor3473f882001-02-23 17:55:21 +00007802 */
7803
7804void
7805xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007806 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007807 const xmlChar *prefix;
7808 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007809 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007810 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007811 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007813
7814 /* Capture start position */
7815 if (ctxt->record_info) {
7816 node_info.begin_pos = ctxt->input->consumed +
7817 (CUR_PTR - ctxt->input->base);
7818 node_info.begin_line = ctxt->input->line;
7819 }
7820
7821 if (ctxt->spaceNr == 0)
7822 spacePush(ctxt, -1);
7823 else
7824 spacePush(ctxt, *ctxt->space);
7825
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007826 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007827#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007828 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007829#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007830 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007831#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007832 else
7833 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007834#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007835 if (name == NULL) {
7836 spacePop(ctxt);
7837 return;
7838 }
7839 namePush(ctxt, name);
7840 ret = ctxt->node;
7841
Daniel Veillard4432df22003-09-28 18:58:27 +00007842#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007843 /*
7844 * [ VC: Root Element Type ]
7845 * The Name in the document type declaration must match the element
7846 * type of the root element.
7847 */
7848 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7849 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7850 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007851#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007852
7853 /*
7854 * Check for an Empty Element.
7855 */
7856 if ((RAW == '/') && (NXT(1) == '>')) {
7857 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007858 if (ctxt->sax2) {
7859 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7860 (!ctxt->disableSAX))
7861 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007862#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 } else {
7864 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7865 (!ctxt->disableSAX))
7866 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007867#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007868 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007869 namePop(ctxt);
7870 spacePop(ctxt);
7871 if (nsNr != ctxt->nsNr)
7872 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007873 if ( ret != NULL && ctxt->record_info ) {
7874 node_info.end_pos = ctxt->input->consumed +
7875 (CUR_PTR - ctxt->input->base);
7876 node_info.end_line = ctxt->input->line;
7877 node_info.node = ret;
7878 xmlParserAddNodeInfo(ctxt, &node_info);
7879 }
7880 return;
7881 }
7882 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007883 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007884 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007885 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7886 "Couldn't find end of Start Tag %s line %d\n",
7887 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007888
7889 /*
7890 * end of parsing of this node.
7891 */
7892 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007893 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007894 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007895 if (nsNr != ctxt->nsNr)
7896 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007897
7898 /*
7899 * Capture end position and add node
7900 */
7901 if ( ret != NULL && ctxt->record_info ) {
7902 node_info.end_pos = ctxt->input->consumed +
7903 (CUR_PTR - ctxt->input->base);
7904 node_info.end_line = ctxt->input->line;
7905 node_info.node = ret;
7906 xmlParserAddNodeInfo(ctxt, &node_info);
7907 }
7908 return;
7909 }
7910
7911 /*
7912 * Parse the content of the element:
7913 */
7914 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007915 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007916 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007917 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007918 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007919
7920 /*
7921 * end of parsing of this node.
7922 */
7923 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007924 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007925 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007926 if (nsNr != ctxt->nsNr)
7927 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007928 return;
7929 }
7930
7931 /*
7932 * parse the end of tag: '</' should be here.
7933 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007934 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007935 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007936 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007937 }
7938#ifdef LIBXML_SAX1_ENABLED
7939 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007941#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007942
7943 /*
7944 * Capture end position and add node
7945 */
7946 if ( ret != NULL && ctxt->record_info ) {
7947 node_info.end_pos = ctxt->input->consumed +
7948 (CUR_PTR - ctxt->input->base);
7949 node_info.end_line = ctxt->input->line;
7950 node_info.node = ret;
7951 xmlParserAddNodeInfo(ctxt, &node_info);
7952 }
7953}
7954
7955/**
7956 * xmlParseVersionNum:
7957 * @ctxt: an XML parser context
7958 *
7959 * parse the XML version value.
7960 *
7961 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7962 *
7963 * Returns the string giving the XML version number, or NULL
7964 */
7965xmlChar *
7966xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7967 xmlChar *buf = NULL;
7968 int len = 0;
7969 int size = 10;
7970 xmlChar cur;
7971
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007972 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007973 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007974 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007975 return(NULL);
7976 }
7977 cur = CUR;
7978 while (((cur >= 'a') && (cur <= 'z')) ||
7979 ((cur >= 'A') && (cur <= 'Z')) ||
7980 ((cur >= '0') && (cur <= '9')) ||
7981 (cur == '_') || (cur == '.') ||
7982 (cur == ':') || (cur == '-')) {
7983 if (len + 1 >= size) {
7984 size *= 2;
7985 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7986 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007987 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007988 return(NULL);
7989 }
7990 }
7991 buf[len++] = cur;
7992 NEXT;
7993 cur=CUR;
7994 }
7995 buf[len] = 0;
7996 return(buf);
7997}
7998
7999/**
8000 * xmlParseVersionInfo:
8001 * @ctxt: an XML parser context
8002 *
8003 * parse the XML version.
8004 *
8005 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8006 *
8007 * [25] Eq ::= S? '=' S?
8008 *
8009 * Returns the version string, e.g. "1.0"
8010 */
8011
8012xmlChar *
8013xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8014 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008015
Daniel Veillarda07050d2003-10-19 14:46:32 +00008016 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008017 SKIP(7);
8018 SKIP_BLANKS;
8019 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008020 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008021 return(NULL);
8022 }
8023 NEXT;
8024 SKIP_BLANKS;
8025 if (RAW == '"') {
8026 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008027 version = xmlParseVersionNum(ctxt);
8028 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008029 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008030 } else
8031 NEXT;
8032 } else if (RAW == '\''){
8033 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008034 version = xmlParseVersionNum(ctxt);
8035 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008036 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008037 } else
8038 NEXT;
8039 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008040 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008041 }
8042 }
8043 return(version);
8044}
8045
8046/**
8047 * xmlParseEncName:
8048 * @ctxt: an XML parser context
8049 *
8050 * parse the XML encoding name
8051 *
8052 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8053 *
8054 * Returns the encoding name value or NULL
8055 */
8056xmlChar *
8057xmlParseEncName(xmlParserCtxtPtr ctxt) {
8058 xmlChar *buf = NULL;
8059 int len = 0;
8060 int size = 10;
8061 xmlChar cur;
8062
8063 cur = CUR;
8064 if (((cur >= 'a') && (cur <= 'z')) ||
8065 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008066 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008067 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008068 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008069 return(NULL);
8070 }
8071
8072 buf[len++] = cur;
8073 NEXT;
8074 cur = CUR;
8075 while (((cur >= 'a') && (cur <= 'z')) ||
8076 ((cur >= 'A') && (cur <= 'Z')) ||
8077 ((cur >= '0') && (cur <= '9')) ||
8078 (cur == '.') || (cur == '_') ||
8079 (cur == '-')) {
8080 if (len + 1 >= size) {
8081 size *= 2;
8082 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8083 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008085 return(NULL);
8086 }
8087 }
8088 buf[len++] = cur;
8089 NEXT;
8090 cur = CUR;
8091 if (cur == 0) {
8092 SHRINK;
8093 GROW;
8094 cur = CUR;
8095 }
8096 }
8097 buf[len] = 0;
8098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008099 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008100 }
8101 return(buf);
8102}
8103
8104/**
8105 * xmlParseEncodingDecl:
8106 * @ctxt: an XML parser context
8107 *
8108 * parse the XML encoding declaration
8109 *
8110 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8111 *
8112 * this setups the conversion filters.
8113 *
8114 * Returns the encoding value or NULL
8115 */
8116
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008117const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008118xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8119 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008120
8121 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008122 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008123 SKIP(8);
8124 SKIP_BLANKS;
8125 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008126 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008127 return(NULL);
8128 }
8129 NEXT;
8130 SKIP_BLANKS;
8131 if (RAW == '"') {
8132 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008133 encoding = xmlParseEncName(ctxt);
8134 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008135 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008136 } else
8137 NEXT;
8138 } else if (RAW == '\''){
8139 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008140 encoding = xmlParseEncName(ctxt);
8141 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008142 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008143 } else
8144 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008145 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008146 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008147 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008148 /*
8149 * UTF-16 encoding stwich has already taken place at this stage,
8150 * more over the little-endian/big-endian selection is already done
8151 */
8152 if ((encoding != NULL) &&
8153 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8154 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008155 if (ctxt->encoding != NULL)
8156 xmlFree((xmlChar *) ctxt->encoding);
8157 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008158 }
8159 /*
8160 * UTF-8 encoding is handled natively
8161 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008162 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008163 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8164 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008165 if (ctxt->encoding != NULL)
8166 xmlFree((xmlChar *) ctxt->encoding);
8167 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008168 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008169 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008170 xmlCharEncodingHandlerPtr handler;
8171
8172 if (ctxt->input->encoding != NULL)
8173 xmlFree((xmlChar *) ctxt->input->encoding);
8174 ctxt->input->encoding = encoding;
8175
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008176 handler = xmlFindCharEncodingHandler((const char *) encoding);
8177 if (handler != NULL) {
8178 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008179 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008180 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008181 "Unsupported encoding %s\n", encoding);
8182 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008183 }
8184 }
8185 }
8186 return(encoding);
8187}
8188
8189/**
8190 * xmlParseSDDecl:
8191 * @ctxt: an XML parser context
8192 *
8193 * parse the XML standalone declaration
8194 *
8195 * [32] SDDecl ::= S 'standalone' Eq
8196 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8197 *
8198 * [ VC: Standalone Document Declaration ]
8199 * TODO The standalone document declaration must have the value "no"
8200 * if any external markup declarations contain declarations of:
8201 * - attributes with default values, if elements to which these
8202 * attributes apply appear in the document without specifications
8203 * of values for these attributes, or
8204 * - entities (other than amp, lt, gt, apos, quot), if references
8205 * to those entities appear in the document, or
8206 * - attributes with values subject to normalization, where the
8207 * attribute appears in the document with a value which will change
8208 * as a result of normalization, or
8209 * - element types with element content, if white space occurs directly
8210 * within any instance of those types.
8211 *
8212 * Returns 1 if standalone, 0 otherwise
8213 */
8214
8215int
8216xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8217 int standalone = -1;
8218
8219 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008220 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008221 SKIP(10);
8222 SKIP_BLANKS;
8223 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008224 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008225 return(standalone);
8226 }
8227 NEXT;
8228 SKIP_BLANKS;
8229 if (RAW == '\''){
8230 NEXT;
8231 if ((RAW == 'n') && (NXT(1) == 'o')) {
8232 standalone = 0;
8233 SKIP(2);
8234 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8235 (NXT(2) == 's')) {
8236 standalone = 1;
8237 SKIP(3);
8238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008239 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008240 }
8241 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008242 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008243 } else
8244 NEXT;
8245 } else if (RAW == '"'){
8246 NEXT;
8247 if ((RAW == 'n') && (NXT(1) == 'o')) {
8248 standalone = 0;
8249 SKIP(2);
8250 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8251 (NXT(2) == 's')) {
8252 standalone = 1;
8253 SKIP(3);
8254 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008255 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008256 }
8257 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008258 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008259 } else
8260 NEXT;
8261 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008262 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008263 }
8264 }
8265 return(standalone);
8266}
8267
8268/**
8269 * xmlParseXMLDecl:
8270 * @ctxt: an XML parser context
8271 *
8272 * parse an XML declaration header
8273 *
8274 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8275 */
8276
8277void
8278xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8279 xmlChar *version;
8280
8281 /*
8282 * We know that '<?xml' is here.
8283 */
8284 SKIP(5);
8285
William M. Brack76e95df2003-10-18 16:20:14 +00008286 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008287 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8288 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008289 }
8290 SKIP_BLANKS;
8291
8292 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008293 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008294 */
8295 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008296 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008297 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008298 } else {
8299 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8300 /*
8301 * TODO: Blueberry should be detected here
8302 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008303 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8304 "Unsupported version '%s'\n",
8305 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008306 }
8307 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008308 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008309 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008310 }
Owen Taylor3473f882001-02-23 17:55:21 +00008311
8312 /*
8313 * We may have the encoding declaration
8314 */
William M. Brack76e95df2003-10-18 16:20:14 +00008315 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008316 if ((RAW == '?') && (NXT(1) == '>')) {
8317 SKIP(2);
8318 return;
8319 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008321 }
8322 xmlParseEncodingDecl(ctxt);
8323 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8324 /*
8325 * The XML REC instructs us to stop parsing right here
8326 */
8327 return;
8328 }
8329
8330 /*
8331 * We may have the standalone status.
8332 */
William M. Brack76e95df2003-10-18 16:20:14 +00008333 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008334 if ((RAW == '?') && (NXT(1) == '>')) {
8335 SKIP(2);
8336 return;
8337 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008339 }
8340 SKIP_BLANKS;
8341 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8342
8343 SKIP_BLANKS;
8344 if ((RAW == '?') && (NXT(1) == '>')) {
8345 SKIP(2);
8346 } else if (RAW == '>') {
8347 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008348 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008349 NEXT;
8350 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008351 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008352 MOVETO_ENDTAG(CUR_PTR);
8353 NEXT;
8354 }
8355}
8356
8357/**
8358 * xmlParseMisc:
8359 * @ctxt: an XML parser context
8360 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008361 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008362 *
8363 * [27] Misc ::= Comment | PI | S
8364 */
8365
8366void
8367xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008368 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008369 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008370 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008371 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008372 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008373 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008374 NEXT;
8375 } else
8376 xmlParseComment(ctxt);
8377 }
8378}
8379
8380/**
8381 * xmlParseDocument:
8382 * @ctxt: an XML parser context
8383 *
8384 * parse an XML document (and build a tree if using the standard SAX
8385 * interface).
8386 *
8387 * [1] document ::= prolog element Misc*
8388 *
8389 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8390 *
8391 * Returns 0, -1 in case of error. the parser context is augmented
8392 * as a result of the parsing.
8393 */
8394
8395int
8396xmlParseDocument(xmlParserCtxtPtr ctxt) {
8397 xmlChar start[4];
8398 xmlCharEncoding enc;
8399
8400 xmlInitParser();
8401
8402 GROW;
8403
8404 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008405 * SAX: detecting the level.
8406 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008407 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008408
8409 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008410 * SAX: beginning of the document processing.
8411 */
8412 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8413 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8414
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008415 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8416 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008417 /*
8418 * Get the 4 first bytes and decode the charset
8419 * if enc != XML_CHAR_ENCODING_NONE
8420 * plug some encoding conversion routines.
8421 */
8422 start[0] = RAW;
8423 start[1] = NXT(1);
8424 start[2] = NXT(2);
8425 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008426 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008427 if (enc != XML_CHAR_ENCODING_NONE) {
8428 xmlSwitchEncoding(ctxt, enc);
8429 }
Owen Taylor3473f882001-02-23 17:55:21 +00008430 }
8431
8432
8433 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008435 }
8436
8437 /*
8438 * Check for the XMLDecl in the Prolog.
8439 */
8440 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008442
8443 /*
8444 * Note that we will switch encoding on the fly.
8445 */
8446 xmlParseXMLDecl(ctxt);
8447 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8448 /*
8449 * The XML REC instructs us to stop parsing right here
8450 */
8451 return(-1);
8452 }
8453 ctxt->standalone = ctxt->input->standalone;
8454 SKIP_BLANKS;
8455 } else {
8456 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8457 }
8458 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8459 ctxt->sax->startDocument(ctxt->userData);
8460
8461 /*
8462 * The Misc part of the Prolog
8463 */
8464 GROW;
8465 xmlParseMisc(ctxt);
8466
8467 /*
8468 * Then possibly doc type declaration(s) and more Misc
8469 * (doctypedecl Misc*)?
8470 */
8471 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008472 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008473
8474 ctxt->inSubset = 1;
8475 xmlParseDocTypeDecl(ctxt);
8476 if (RAW == '[') {
8477 ctxt->instate = XML_PARSER_DTD;
8478 xmlParseInternalSubset(ctxt);
8479 }
8480
8481 /*
8482 * Create and update the external subset.
8483 */
8484 ctxt->inSubset = 2;
8485 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8486 (!ctxt->disableSAX))
8487 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8488 ctxt->extSubSystem, ctxt->extSubURI);
8489 ctxt->inSubset = 0;
8490
8491
8492 ctxt->instate = XML_PARSER_PROLOG;
8493 xmlParseMisc(ctxt);
8494 }
8495
8496 /*
8497 * Time to start parsing the tree itself
8498 */
8499 GROW;
8500 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008501 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8502 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008503 } else {
8504 ctxt->instate = XML_PARSER_CONTENT;
8505 xmlParseElement(ctxt);
8506 ctxt->instate = XML_PARSER_EPILOG;
8507
8508
8509 /*
8510 * The Misc part at the end
8511 */
8512 xmlParseMisc(ctxt);
8513
Daniel Veillard561b7f82002-03-20 21:55:57 +00008514 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008515 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008516 }
8517 ctxt->instate = XML_PARSER_EOF;
8518 }
8519
8520 /*
8521 * SAX: end of the document processing.
8522 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008523 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008524 ctxt->sax->endDocument(ctxt->userData);
8525
Daniel Veillard5997aca2002-03-18 18:36:20 +00008526 /*
8527 * Remove locally kept entity definitions if the tree was not built
8528 */
8529 if ((ctxt->myDoc != NULL) &&
8530 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8531 xmlFreeDoc(ctxt->myDoc);
8532 ctxt->myDoc = NULL;
8533 }
8534
Daniel Veillardc7612992002-02-17 22:47:37 +00008535 if (! ctxt->wellFormed) {
8536 ctxt->valid = 0;
8537 return(-1);
8538 }
Owen Taylor3473f882001-02-23 17:55:21 +00008539 return(0);
8540}
8541
8542/**
8543 * xmlParseExtParsedEnt:
8544 * @ctxt: an XML parser context
8545 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008546 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008547 * An external general parsed entity is well-formed if it matches the
8548 * production labeled extParsedEnt.
8549 *
8550 * [78] extParsedEnt ::= TextDecl? content
8551 *
8552 * Returns 0, -1 in case of error. the parser context is augmented
8553 * as a result of the parsing.
8554 */
8555
8556int
8557xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8558 xmlChar start[4];
8559 xmlCharEncoding enc;
8560
8561 xmlDefaultSAXHandlerInit();
8562
Daniel Veillard309f81d2003-09-23 09:02:53 +00008563 xmlDetectSAX2(ctxt);
8564
Owen Taylor3473f882001-02-23 17:55:21 +00008565 GROW;
8566
8567 /*
8568 * SAX: beginning of the document processing.
8569 */
8570 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8571 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8572
8573 /*
8574 * Get the 4 first bytes and decode the charset
8575 * if enc != XML_CHAR_ENCODING_NONE
8576 * plug some encoding conversion routines.
8577 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008578 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8579 start[0] = RAW;
8580 start[1] = NXT(1);
8581 start[2] = NXT(2);
8582 start[3] = NXT(3);
8583 enc = xmlDetectCharEncoding(start, 4);
8584 if (enc != XML_CHAR_ENCODING_NONE) {
8585 xmlSwitchEncoding(ctxt, enc);
8586 }
Owen Taylor3473f882001-02-23 17:55:21 +00008587 }
8588
8589
8590 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008591 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
8593
8594 /*
8595 * Check for the XMLDecl in the Prolog.
8596 */
8597 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008598 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008599
8600 /*
8601 * Note that we will switch encoding on the fly.
8602 */
8603 xmlParseXMLDecl(ctxt);
8604 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8605 /*
8606 * The XML REC instructs us to stop parsing right here
8607 */
8608 return(-1);
8609 }
8610 SKIP_BLANKS;
8611 } else {
8612 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8613 }
8614 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8615 ctxt->sax->startDocument(ctxt->userData);
8616
8617 /*
8618 * Doing validity checking on chunk doesn't make sense
8619 */
8620 ctxt->instate = XML_PARSER_CONTENT;
8621 ctxt->validate = 0;
8622 ctxt->loadsubset = 0;
8623 ctxt->depth = 0;
8624
8625 xmlParseContent(ctxt);
8626
8627 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008628 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008629 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008630 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008631 }
8632
8633 /*
8634 * SAX: end of the document processing.
8635 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008636 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008637 ctxt->sax->endDocument(ctxt->userData);
8638
8639 if (! ctxt->wellFormed) return(-1);
8640 return(0);
8641}
8642
Daniel Veillard73b013f2003-09-30 12:36:01 +00008643#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008644/************************************************************************
8645 * *
8646 * Progressive parsing interfaces *
8647 * *
8648 ************************************************************************/
8649
8650/**
8651 * xmlParseLookupSequence:
8652 * @ctxt: an XML parser context
8653 * @first: the first char to lookup
8654 * @next: the next char to lookup or zero
8655 * @third: the next char to lookup or zero
8656 *
8657 * Try to find if a sequence (first, next, third) or just (first next) or
8658 * (first) is available in the input stream.
8659 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8660 * to avoid rescanning sequences of bytes, it DOES change the state of the
8661 * parser, do not use liberally.
8662 *
8663 * Returns the index to the current parsing point if the full sequence
8664 * is available, -1 otherwise.
8665 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008666static int
Owen Taylor3473f882001-02-23 17:55:21 +00008667xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8668 xmlChar next, xmlChar third) {
8669 int base, len;
8670 xmlParserInputPtr in;
8671 const xmlChar *buf;
8672
8673 in = ctxt->input;
8674 if (in == NULL) return(-1);
8675 base = in->cur - in->base;
8676 if (base < 0) return(-1);
8677 if (ctxt->checkIndex > base)
8678 base = ctxt->checkIndex;
8679 if (in->buf == NULL) {
8680 buf = in->base;
8681 len = in->length;
8682 } else {
8683 buf = in->buf->buffer->content;
8684 len = in->buf->buffer->use;
8685 }
8686 /* take into account the sequence length */
8687 if (third) len -= 2;
8688 else if (next) len --;
8689 for (;base < len;base++) {
8690 if (buf[base] == first) {
8691 if (third != 0) {
8692 if ((buf[base + 1] != next) ||
8693 (buf[base + 2] != third)) continue;
8694 } else if (next != 0) {
8695 if (buf[base + 1] != next) continue;
8696 }
8697 ctxt->checkIndex = 0;
8698#ifdef DEBUG_PUSH
8699 if (next == 0)
8700 xmlGenericError(xmlGenericErrorContext,
8701 "PP: lookup '%c' found at %d\n",
8702 first, base);
8703 else if (third == 0)
8704 xmlGenericError(xmlGenericErrorContext,
8705 "PP: lookup '%c%c' found at %d\n",
8706 first, next, base);
8707 else
8708 xmlGenericError(xmlGenericErrorContext,
8709 "PP: lookup '%c%c%c' found at %d\n",
8710 first, next, third, base);
8711#endif
8712 return(base - (in->cur - in->base));
8713 }
8714 }
8715 ctxt->checkIndex = base;
8716#ifdef DEBUG_PUSH
8717 if (next == 0)
8718 xmlGenericError(xmlGenericErrorContext,
8719 "PP: lookup '%c' failed\n", first);
8720 else if (third == 0)
8721 xmlGenericError(xmlGenericErrorContext,
8722 "PP: lookup '%c%c' failed\n", first, next);
8723 else
8724 xmlGenericError(xmlGenericErrorContext,
8725 "PP: lookup '%c%c%c' failed\n", first, next, third);
8726#endif
8727 return(-1);
8728}
8729
8730/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008731 * xmlParseGetLasts:
8732 * @ctxt: an XML parser context
8733 * @lastlt: pointer to store the last '<' from the input
8734 * @lastgt: pointer to store the last '>' from the input
8735 *
8736 * Lookup the last < and > in the current chunk
8737 */
8738static void
8739xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8740 const xmlChar **lastgt) {
8741 const xmlChar *tmp;
8742
8743 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8744 xmlGenericError(xmlGenericErrorContext,
8745 "Internal error: xmlParseGetLasts\n");
8746 return;
8747 }
8748 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8749 tmp = ctxt->input->end;
8750 tmp--;
8751 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8752 (*tmp != '>')) tmp--;
8753 if (tmp < ctxt->input->base) {
8754 *lastlt = NULL;
8755 *lastgt = NULL;
8756 } else if (*tmp == '<') {
8757 *lastlt = tmp;
8758 tmp--;
8759 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8760 if (tmp < ctxt->input->base)
8761 *lastgt = NULL;
8762 else
8763 *lastgt = tmp;
8764 } else {
8765 *lastgt = tmp;
8766 tmp--;
8767 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8768 if (tmp < ctxt->input->base)
8769 *lastlt = NULL;
8770 else
8771 *lastlt = tmp;
8772 }
8773
8774 } else {
8775 *lastlt = NULL;
8776 *lastgt = NULL;
8777 }
8778}
8779/**
Owen Taylor3473f882001-02-23 17:55:21 +00008780 * xmlParseTryOrFinish:
8781 * @ctxt: an XML parser context
8782 * @terminate: last chunk indicator
8783 *
8784 * Try to progress on parsing
8785 *
8786 * Returns zero if no parsing was possible
8787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008788static int
Owen Taylor3473f882001-02-23 17:55:21 +00008789xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8790 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008791 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008792 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008793 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008794
8795#ifdef DEBUG_PUSH
8796 switch (ctxt->instate) {
8797 case XML_PARSER_EOF:
8798 xmlGenericError(xmlGenericErrorContext,
8799 "PP: try EOF\n"); break;
8800 case XML_PARSER_START:
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: try START\n"); break;
8803 case XML_PARSER_MISC:
8804 xmlGenericError(xmlGenericErrorContext,
8805 "PP: try MISC\n");break;
8806 case XML_PARSER_COMMENT:
8807 xmlGenericError(xmlGenericErrorContext,
8808 "PP: try COMMENT\n");break;
8809 case XML_PARSER_PROLOG:
8810 xmlGenericError(xmlGenericErrorContext,
8811 "PP: try PROLOG\n");break;
8812 case XML_PARSER_START_TAG:
8813 xmlGenericError(xmlGenericErrorContext,
8814 "PP: try START_TAG\n");break;
8815 case XML_PARSER_CONTENT:
8816 xmlGenericError(xmlGenericErrorContext,
8817 "PP: try CONTENT\n");break;
8818 case XML_PARSER_CDATA_SECTION:
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: try CDATA_SECTION\n");break;
8821 case XML_PARSER_END_TAG:
8822 xmlGenericError(xmlGenericErrorContext,
8823 "PP: try END_TAG\n");break;
8824 case XML_PARSER_ENTITY_DECL:
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: try ENTITY_DECL\n");break;
8827 case XML_PARSER_ENTITY_VALUE:
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: try ENTITY_VALUE\n");break;
8830 case XML_PARSER_ATTRIBUTE_VALUE:
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: try ATTRIBUTE_VALUE\n");break;
8833 case XML_PARSER_DTD:
8834 xmlGenericError(xmlGenericErrorContext,
8835 "PP: try DTD\n");break;
8836 case XML_PARSER_EPILOG:
8837 xmlGenericError(xmlGenericErrorContext,
8838 "PP: try EPILOG\n");break;
8839 case XML_PARSER_PI:
8840 xmlGenericError(xmlGenericErrorContext,
8841 "PP: try PI\n");break;
8842 case XML_PARSER_IGNORE:
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: try IGNORE\n");break;
8845 }
8846#endif
8847
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008848 if ((ctxt->input != NULL) &&
8849 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008850 xmlSHRINK(ctxt);
8851 ctxt->checkIndex = 0;
8852 }
8853 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008854
Daniel Veillarda880b122003-04-21 21:36:41 +00008855 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008856 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8857 return(0);
8858
8859
Owen Taylor3473f882001-02-23 17:55:21 +00008860 /*
8861 * Pop-up of finished entities.
8862 */
8863 while ((RAW == 0) && (ctxt->inputNr > 1))
8864 xmlPopInput(ctxt);
8865
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008866 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008867 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008868 avail = ctxt->input->length -
8869 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008870 else {
8871 /*
8872 * If we are operating on converted input, try to flush
8873 * remainng chars to avoid them stalling in the non-converted
8874 * buffer.
8875 */
8876 if ((ctxt->input->buf->raw != NULL) &&
8877 (ctxt->input->buf->raw->use > 0)) {
8878 int base = ctxt->input->base -
8879 ctxt->input->buf->buffer->content;
8880 int current = ctxt->input->cur - ctxt->input->base;
8881
8882 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8883 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8884 ctxt->input->cur = ctxt->input->base + current;
8885 ctxt->input->end =
8886 &ctxt->input->buf->buffer->content[
8887 ctxt->input->buf->buffer->use];
8888 }
8889 avail = ctxt->input->buf->buffer->use -
8890 (ctxt->input->cur - ctxt->input->base);
8891 }
Owen Taylor3473f882001-02-23 17:55:21 +00008892 if (avail < 1)
8893 goto done;
8894 switch (ctxt->instate) {
8895 case XML_PARSER_EOF:
8896 /*
8897 * Document parsing is done !
8898 */
8899 goto done;
8900 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008901 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8902 xmlChar start[4];
8903 xmlCharEncoding enc;
8904
8905 /*
8906 * Very first chars read from the document flow.
8907 */
8908 if (avail < 4)
8909 goto done;
8910
8911 /*
8912 * Get the 4 first bytes and decode the charset
8913 * if enc != XML_CHAR_ENCODING_NONE
8914 * plug some encoding conversion routines.
8915 */
8916 start[0] = RAW;
8917 start[1] = NXT(1);
8918 start[2] = NXT(2);
8919 start[3] = NXT(3);
8920 enc = xmlDetectCharEncoding(start, 4);
8921 if (enc != XML_CHAR_ENCODING_NONE) {
8922 xmlSwitchEncoding(ctxt, enc);
8923 }
8924 break;
8925 }
Owen Taylor3473f882001-02-23 17:55:21 +00008926
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008927 if (avail < 2)
8928 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008929 cur = ctxt->input->cur[0];
8930 next = ctxt->input->cur[1];
8931 if (cur == 0) {
8932 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8933 ctxt->sax->setDocumentLocator(ctxt->userData,
8934 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008935 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008936 ctxt->instate = XML_PARSER_EOF;
8937#ifdef DEBUG_PUSH
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: entering EOF\n");
8940#endif
8941 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8942 ctxt->sax->endDocument(ctxt->userData);
8943 goto done;
8944 }
8945 if ((cur == '<') && (next == '?')) {
8946 /* PI or XML decl */
8947 if (avail < 5) return(ret);
8948 if ((!terminate) &&
8949 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8950 return(ret);
8951 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8952 ctxt->sax->setDocumentLocator(ctxt->userData,
8953 &xmlDefaultSAXLocator);
8954 if ((ctxt->input->cur[2] == 'x') &&
8955 (ctxt->input->cur[3] == 'm') &&
8956 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008957 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008958 ret += 5;
8959#ifdef DEBUG_PUSH
8960 xmlGenericError(xmlGenericErrorContext,
8961 "PP: Parsing XML Decl\n");
8962#endif
8963 xmlParseXMLDecl(ctxt);
8964 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8965 /*
8966 * The XML REC instructs us to stop parsing right
8967 * here
8968 */
8969 ctxt->instate = XML_PARSER_EOF;
8970 return(0);
8971 }
8972 ctxt->standalone = ctxt->input->standalone;
8973 if ((ctxt->encoding == NULL) &&
8974 (ctxt->input->encoding != NULL))
8975 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8976 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8977 (!ctxt->disableSAX))
8978 ctxt->sax->startDocument(ctxt->userData);
8979 ctxt->instate = XML_PARSER_MISC;
8980#ifdef DEBUG_PUSH
8981 xmlGenericError(xmlGenericErrorContext,
8982 "PP: entering MISC\n");
8983#endif
8984 } else {
8985 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8986 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8987 (!ctxt->disableSAX))
8988 ctxt->sax->startDocument(ctxt->userData);
8989 ctxt->instate = XML_PARSER_MISC;
8990#ifdef DEBUG_PUSH
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: entering MISC\n");
8993#endif
8994 }
8995 } else {
8996 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8997 ctxt->sax->setDocumentLocator(ctxt->userData,
8998 &xmlDefaultSAXLocator);
8999 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9000 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9001 (!ctxt->disableSAX))
9002 ctxt->sax->startDocument(ctxt->userData);
9003 ctxt->instate = XML_PARSER_MISC;
9004#ifdef DEBUG_PUSH
9005 xmlGenericError(xmlGenericErrorContext,
9006 "PP: entering MISC\n");
9007#endif
9008 }
9009 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009010 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009011 const xmlChar *name;
9012 const xmlChar *prefix;
9013 const xmlChar *URI;
9014 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009015
9016 if ((avail < 2) && (ctxt->inputNr == 1))
9017 goto done;
9018 cur = ctxt->input->cur[0];
9019 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009020 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009021 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009022 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9023 ctxt->sax->endDocument(ctxt->userData);
9024 goto done;
9025 }
9026 if (!terminate) {
9027 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009028 /* > can be found unescaped in attribute values */
9029 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009030 goto done;
9031 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9032 goto done;
9033 }
9034 }
9035 if (ctxt->spaceNr == 0)
9036 spacePush(ctxt, -1);
9037 else
9038 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009039#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009040 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009041#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009042 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009043#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009044 else
9045 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009046#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009047 if (name == NULL) {
9048 spacePop(ctxt);
9049 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009050 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9051 ctxt->sax->endDocument(ctxt->userData);
9052 goto done;
9053 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009054#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009055 /*
9056 * [ VC: Root Element Type ]
9057 * The Name in the document type declaration must match
9058 * the element type of the root element.
9059 */
9060 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9061 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9062 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009063#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009064
9065 /*
9066 * Check for an Empty Element.
9067 */
9068 if ((RAW == '/') && (NXT(1) == '>')) {
9069 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009070
9071 if (ctxt->sax2) {
9072 if ((ctxt->sax != NULL) &&
9073 (ctxt->sax->endElementNs != NULL) &&
9074 (!ctxt->disableSAX))
9075 ctxt->sax->endElementNs(ctxt->userData, name,
9076 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009077#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 } else {
9079 if ((ctxt->sax != NULL) &&
9080 (ctxt->sax->endElement != NULL) &&
9081 (!ctxt->disableSAX))
9082 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009083#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009084 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009085 spacePop(ctxt);
9086 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009087 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009088 } else {
9089 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009090 }
9091 break;
9092 }
9093 if (RAW == '>') {
9094 NEXT;
9095 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009096 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009097 "Couldn't find end of Start Tag %s\n",
9098 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009099 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009100 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009101 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009102 if (ctxt->sax2)
9103 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009104#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009105 else
9106 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009107#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108
Daniel Veillarda880b122003-04-21 21:36:41 +00009109 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009110 break;
9111 }
9112 case XML_PARSER_CONTENT: {
9113 const xmlChar *test;
9114 unsigned int cons;
9115 if ((avail < 2) && (ctxt->inputNr == 1))
9116 goto done;
9117 cur = ctxt->input->cur[0];
9118 next = ctxt->input->cur[1];
9119
9120 test = CUR_PTR;
9121 cons = ctxt->input->consumed;
9122 if ((cur == '<') && (next == '/')) {
9123 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009124 break;
9125 } else if ((cur == '<') && (next == '?')) {
9126 if ((!terminate) &&
9127 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9128 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009129 xmlParsePI(ctxt);
9130 } else if ((cur == '<') && (next != '!')) {
9131 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 break;
9133 } else if ((cur == '<') && (next == '!') &&
9134 (ctxt->input->cur[2] == '-') &&
9135 (ctxt->input->cur[3] == '-')) {
9136 if ((!terminate) &&
9137 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9138 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009139 xmlParseComment(ctxt);
9140 ctxt->instate = XML_PARSER_CONTENT;
9141 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9142 (ctxt->input->cur[2] == '[') &&
9143 (ctxt->input->cur[3] == 'C') &&
9144 (ctxt->input->cur[4] == 'D') &&
9145 (ctxt->input->cur[5] == 'A') &&
9146 (ctxt->input->cur[6] == 'T') &&
9147 (ctxt->input->cur[7] == 'A') &&
9148 (ctxt->input->cur[8] == '[')) {
9149 SKIP(9);
9150 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009151 break;
9152 } else if ((cur == '<') && (next == '!') &&
9153 (avail < 9)) {
9154 goto done;
9155 } else if (cur == '&') {
9156 if ((!terminate) &&
9157 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9158 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009159 xmlParseReference(ctxt);
9160 } else {
9161 /* TODO Avoid the extra copy, handle directly !!! */
9162 /*
9163 * Goal of the following test is:
9164 * - minimize calls to the SAX 'character' callback
9165 * when they are mergeable
9166 * - handle an problem for isBlank when we only parse
9167 * a sequence of blank chars and the next one is
9168 * not available to check against '<' presence.
9169 * - tries to homogenize the differences in SAX
9170 * callbacks between the push and pull versions
9171 * of the parser.
9172 */
9173 if ((ctxt->inputNr == 1) &&
9174 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9175 if (!terminate) {
9176 if (ctxt->progressive) {
9177 if ((lastlt == NULL) ||
9178 (ctxt->input->cur > lastlt))
9179 goto done;
9180 } else if (xmlParseLookupSequence(ctxt,
9181 '<', 0, 0) < 0) {
9182 goto done;
9183 }
9184 }
9185 }
9186 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009187 xmlParseCharData(ctxt, 0);
9188 }
9189 /*
9190 * Pop-up of finished entities.
9191 */
9192 while ((RAW == 0) && (ctxt->inputNr > 1))
9193 xmlPopInput(ctxt);
9194 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009195 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9196 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009197 ctxt->instate = XML_PARSER_EOF;
9198 break;
9199 }
9200 break;
9201 }
9202 case XML_PARSER_END_TAG:
9203 if (avail < 2)
9204 goto done;
9205 if (!terminate) {
9206 if (ctxt->progressive) {
9207 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9208 goto done;
9209 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9210 goto done;
9211 }
9212 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009213 if (ctxt->sax2) {
9214 xmlParseEndTag2(ctxt,
9215 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9216 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009217 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009218 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009219 }
9220#ifdef LIBXML_SAX1_ENABLED
9221 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009222 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009223#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009224 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009225 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009226 } else {
9227 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009228 }
9229 break;
9230 case XML_PARSER_CDATA_SECTION: {
9231 /*
9232 * The Push mode need to have the SAX callback for
9233 * cdataBlock merge back contiguous callbacks.
9234 */
9235 int base;
9236
9237 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9238 if (base < 0) {
9239 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9240 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9241 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009242 ctxt->sax->cdataBlock(ctxt->userData,
9243 ctxt->input->cur,
9244 XML_PARSER_BIG_BUFFER_SIZE);
9245 else if (ctxt->sax->characters != NULL)
9246 ctxt->sax->characters(ctxt->userData,
9247 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009248 XML_PARSER_BIG_BUFFER_SIZE);
9249 }
9250 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9251 ctxt->checkIndex = 0;
9252 }
9253 goto done;
9254 } else {
9255 if ((ctxt->sax != NULL) && (base > 0) &&
9256 (!ctxt->disableSAX)) {
9257 if (ctxt->sax->cdataBlock != NULL)
9258 ctxt->sax->cdataBlock(ctxt->userData,
9259 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009260 else if (ctxt->sax->characters != NULL)
9261 ctxt->sax->characters(ctxt->userData,
9262 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009263 }
9264 SKIP(base + 3);
9265 ctxt->checkIndex = 0;
9266 ctxt->instate = XML_PARSER_CONTENT;
9267#ifdef DEBUG_PUSH
9268 xmlGenericError(xmlGenericErrorContext,
9269 "PP: entering CONTENT\n");
9270#endif
9271 }
9272 break;
9273 }
Owen Taylor3473f882001-02-23 17:55:21 +00009274 case XML_PARSER_MISC:
9275 SKIP_BLANKS;
9276 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009277 avail = ctxt->input->length -
9278 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009279 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009280 avail = ctxt->input->buf->buffer->use -
9281 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009282 if (avail < 2)
9283 goto done;
9284 cur = ctxt->input->cur[0];
9285 next = ctxt->input->cur[1];
9286 if ((cur == '<') && (next == '?')) {
9287 if ((!terminate) &&
9288 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9289 goto done;
9290#ifdef DEBUG_PUSH
9291 xmlGenericError(xmlGenericErrorContext,
9292 "PP: Parsing PI\n");
9293#endif
9294 xmlParsePI(ctxt);
9295 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009296 (ctxt->input->cur[2] == '-') &&
9297 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009298 if ((!terminate) &&
9299 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9300 goto done;
9301#ifdef DEBUG_PUSH
9302 xmlGenericError(xmlGenericErrorContext,
9303 "PP: Parsing Comment\n");
9304#endif
9305 xmlParseComment(ctxt);
9306 ctxt->instate = XML_PARSER_MISC;
9307 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009308 (ctxt->input->cur[2] == 'D') &&
9309 (ctxt->input->cur[3] == 'O') &&
9310 (ctxt->input->cur[4] == 'C') &&
9311 (ctxt->input->cur[5] == 'T') &&
9312 (ctxt->input->cur[6] == 'Y') &&
9313 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009314 (ctxt->input->cur[8] == 'E')) {
9315 if ((!terminate) &&
9316 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9317 goto done;
9318#ifdef DEBUG_PUSH
9319 xmlGenericError(xmlGenericErrorContext,
9320 "PP: Parsing internal subset\n");
9321#endif
9322 ctxt->inSubset = 1;
9323 xmlParseDocTypeDecl(ctxt);
9324 if (RAW == '[') {
9325 ctxt->instate = XML_PARSER_DTD;
9326#ifdef DEBUG_PUSH
9327 xmlGenericError(xmlGenericErrorContext,
9328 "PP: entering DTD\n");
9329#endif
9330 } else {
9331 /*
9332 * Create and update the external subset.
9333 */
9334 ctxt->inSubset = 2;
9335 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9336 (ctxt->sax->externalSubset != NULL))
9337 ctxt->sax->externalSubset(ctxt->userData,
9338 ctxt->intSubName, ctxt->extSubSystem,
9339 ctxt->extSubURI);
9340 ctxt->inSubset = 0;
9341 ctxt->instate = XML_PARSER_PROLOG;
9342#ifdef DEBUG_PUSH
9343 xmlGenericError(xmlGenericErrorContext,
9344 "PP: entering PROLOG\n");
9345#endif
9346 }
9347 } else if ((cur == '<') && (next == '!') &&
9348 (avail < 9)) {
9349 goto done;
9350 } else {
9351 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009352 ctxt->progressive = 1;
9353 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009354#ifdef DEBUG_PUSH
9355 xmlGenericError(xmlGenericErrorContext,
9356 "PP: entering START_TAG\n");
9357#endif
9358 }
9359 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009360 case XML_PARSER_PROLOG:
9361 SKIP_BLANKS;
9362 if (ctxt->input->buf == NULL)
9363 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9364 else
9365 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9366 if (avail < 2)
9367 goto done;
9368 cur = ctxt->input->cur[0];
9369 next = ctxt->input->cur[1];
9370 if ((cur == '<') && (next == '?')) {
9371 if ((!terminate) &&
9372 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9373 goto done;
9374#ifdef DEBUG_PUSH
9375 xmlGenericError(xmlGenericErrorContext,
9376 "PP: Parsing PI\n");
9377#endif
9378 xmlParsePI(ctxt);
9379 } else if ((cur == '<') && (next == '!') &&
9380 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9381 if ((!terminate) &&
9382 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9383 goto done;
9384#ifdef DEBUG_PUSH
9385 xmlGenericError(xmlGenericErrorContext,
9386 "PP: Parsing Comment\n");
9387#endif
9388 xmlParseComment(ctxt);
9389 ctxt->instate = XML_PARSER_PROLOG;
9390 } else if ((cur == '<') && (next == '!') &&
9391 (avail < 4)) {
9392 goto done;
9393 } else {
9394 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009395 ctxt->progressive = 1;
9396 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009397#ifdef DEBUG_PUSH
9398 xmlGenericError(xmlGenericErrorContext,
9399 "PP: entering START_TAG\n");
9400#endif
9401 }
9402 break;
9403 case XML_PARSER_EPILOG:
9404 SKIP_BLANKS;
9405 if (ctxt->input->buf == NULL)
9406 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9407 else
9408 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9409 if (avail < 2)
9410 goto done;
9411 cur = ctxt->input->cur[0];
9412 next = ctxt->input->cur[1];
9413 if ((cur == '<') && (next == '?')) {
9414 if ((!terminate) &&
9415 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9416 goto done;
9417#ifdef DEBUG_PUSH
9418 xmlGenericError(xmlGenericErrorContext,
9419 "PP: Parsing PI\n");
9420#endif
9421 xmlParsePI(ctxt);
9422 ctxt->instate = XML_PARSER_EPILOG;
9423 } else if ((cur == '<') && (next == '!') &&
9424 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9425 if ((!terminate) &&
9426 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9427 goto done;
9428#ifdef DEBUG_PUSH
9429 xmlGenericError(xmlGenericErrorContext,
9430 "PP: Parsing Comment\n");
9431#endif
9432 xmlParseComment(ctxt);
9433 ctxt->instate = XML_PARSER_EPILOG;
9434 } else if ((cur == '<') && (next == '!') &&
9435 (avail < 4)) {
9436 goto done;
9437 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009438 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009439 ctxt->instate = XML_PARSER_EOF;
9440#ifdef DEBUG_PUSH
9441 xmlGenericError(xmlGenericErrorContext,
9442 "PP: entering EOF\n");
9443#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009444 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009445 ctxt->sax->endDocument(ctxt->userData);
9446 goto done;
9447 }
9448 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009449 case XML_PARSER_DTD: {
9450 /*
9451 * Sorry but progressive parsing of the internal subset
9452 * is not expected to be supported. We first check that
9453 * the full content of the internal subset is available and
9454 * the parsing is launched only at that point.
9455 * Internal subset ends up with "']' S? '>'" in an unescaped
9456 * section and not in a ']]>' sequence which are conditional
9457 * sections (whoever argued to keep that crap in XML deserve
9458 * a place in hell !).
9459 */
9460 int base, i;
9461 xmlChar *buf;
9462 xmlChar quote = 0;
9463
9464 base = ctxt->input->cur - ctxt->input->base;
9465 if (base < 0) return(0);
9466 if (ctxt->checkIndex > base)
9467 base = ctxt->checkIndex;
9468 buf = ctxt->input->buf->buffer->content;
9469 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9470 base++) {
9471 if (quote != 0) {
9472 if (buf[base] == quote)
9473 quote = 0;
9474 continue;
9475 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009476 if ((quote == 0) && (buf[base] == '<')) {
9477 int found = 0;
9478 /* special handling of comments */
9479 if (((unsigned int) base + 4 <
9480 ctxt->input->buf->buffer->use) &&
9481 (buf[base + 1] == '!') &&
9482 (buf[base + 2] == '-') &&
9483 (buf[base + 3] == '-')) {
9484 for (;(unsigned int) base + 3 <
9485 ctxt->input->buf->buffer->use; base++) {
9486 if ((buf[base] == '-') &&
9487 (buf[base + 1] == '-') &&
9488 (buf[base + 2] == '>')) {
9489 found = 1;
9490 base += 2;
9491 break;
9492 }
9493 }
9494 if (!found)
9495 break;
9496 continue;
9497 }
9498 }
Owen Taylor3473f882001-02-23 17:55:21 +00009499 if (buf[base] == '"') {
9500 quote = '"';
9501 continue;
9502 }
9503 if (buf[base] == '\'') {
9504 quote = '\'';
9505 continue;
9506 }
9507 if (buf[base] == ']') {
9508 if ((unsigned int) base +1 >=
9509 ctxt->input->buf->buffer->use)
9510 break;
9511 if (buf[base + 1] == ']') {
9512 /* conditional crap, skip both ']' ! */
9513 base++;
9514 continue;
9515 }
9516 for (i = 0;
9517 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9518 i++) {
9519 if (buf[base + i] == '>')
9520 goto found_end_int_subset;
9521 }
9522 break;
9523 }
9524 }
9525 /*
9526 * We didn't found the end of the Internal subset
9527 */
9528 if (quote == 0)
9529 ctxt->checkIndex = base;
9530#ifdef DEBUG_PUSH
9531 if (next == 0)
9532 xmlGenericError(xmlGenericErrorContext,
9533 "PP: lookup of int subset end filed\n");
9534#endif
9535 goto done;
9536
9537found_end_int_subset:
9538 xmlParseInternalSubset(ctxt);
9539 ctxt->inSubset = 2;
9540 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9541 (ctxt->sax->externalSubset != NULL))
9542 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9543 ctxt->extSubSystem, ctxt->extSubURI);
9544 ctxt->inSubset = 0;
9545 ctxt->instate = XML_PARSER_PROLOG;
9546 ctxt->checkIndex = 0;
9547#ifdef DEBUG_PUSH
9548 xmlGenericError(xmlGenericErrorContext,
9549 "PP: entering PROLOG\n");
9550#endif
9551 break;
9552 }
9553 case XML_PARSER_COMMENT:
9554 xmlGenericError(xmlGenericErrorContext,
9555 "PP: internal error, state == COMMENT\n");
9556 ctxt->instate = XML_PARSER_CONTENT;
9557#ifdef DEBUG_PUSH
9558 xmlGenericError(xmlGenericErrorContext,
9559 "PP: entering CONTENT\n");
9560#endif
9561 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009562 case XML_PARSER_IGNORE:
9563 xmlGenericError(xmlGenericErrorContext,
9564 "PP: internal error, state == IGNORE");
9565 ctxt->instate = XML_PARSER_DTD;
9566#ifdef DEBUG_PUSH
9567 xmlGenericError(xmlGenericErrorContext,
9568 "PP: entering DTD\n");
9569#endif
9570 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009571 case XML_PARSER_PI:
9572 xmlGenericError(xmlGenericErrorContext,
9573 "PP: internal error, state == PI\n");
9574 ctxt->instate = XML_PARSER_CONTENT;
9575#ifdef DEBUG_PUSH
9576 xmlGenericError(xmlGenericErrorContext,
9577 "PP: entering CONTENT\n");
9578#endif
9579 break;
9580 case XML_PARSER_ENTITY_DECL:
9581 xmlGenericError(xmlGenericErrorContext,
9582 "PP: internal error, state == ENTITY_DECL\n");
9583 ctxt->instate = XML_PARSER_DTD;
9584#ifdef DEBUG_PUSH
9585 xmlGenericError(xmlGenericErrorContext,
9586 "PP: entering DTD\n");
9587#endif
9588 break;
9589 case XML_PARSER_ENTITY_VALUE:
9590 xmlGenericError(xmlGenericErrorContext,
9591 "PP: internal error, state == ENTITY_VALUE\n");
9592 ctxt->instate = XML_PARSER_CONTENT;
9593#ifdef DEBUG_PUSH
9594 xmlGenericError(xmlGenericErrorContext,
9595 "PP: entering DTD\n");
9596#endif
9597 break;
9598 case XML_PARSER_ATTRIBUTE_VALUE:
9599 xmlGenericError(xmlGenericErrorContext,
9600 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9601 ctxt->instate = XML_PARSER_START_TAG;
9602#ifdef DEBUG_PUSH
9603 xmlGenericError(xmlGenericErrorContext,
9604 "PP: entering START_TAG\n");
9605#endif
9606 break;
9607 case XML_PARSER_SYSTEM_LITERAL:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: internal error, state == SYSTEM_LITERAL\n");
9610 ctxt->instate = XML_PARSER_START_TAG;
9611#ifdef DEBUG_PUSH
9612 xmlGenericError(xmlGenericErrorContext,
9613 "PP: entering START_TAG\n");
9614#endif
9615 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009616 case XML_PARSER_PUBLIC_LITERAL:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: internal error, state == PUBLIC_LITERAL\n");
9619 ctxt->instate = XML_PARSER_START_TAG;
9620#ifdef DEBUG_PUSH
9621 xmlGenericError(xmlGenericErrorContext,
9622 "PP: entering START_TAG\n");
9623#endif
9624 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009625 }
9626 }
9627done:
9628#ifdef DEBUG_PUSH
9629 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9630#endif
9631 return(ret);
9632}
9633
9634/**
Owen Taylor3473f882001-02-23 17:55:21 +00009635 * xmlParseChunk:
9636 * @ctxt: an XML parser context
9637 * @chunk: an char array
9638 * @size: the size in byte of the chunk
9639 * @terminate: last chunk indicator
9640 *
9641 * Parse a Chunk of memory
9642 *
9643 * Returns zero if no error, the xmlParserErrors otherwise.
9644 */
9645int
9646xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9647 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009648 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9649 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009650 if (ctxt->instate == XML_PARSER_START)
9651 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009652 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9653 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9654 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9655 int cur = ctxt->input->cur - ctxt->input->base;
9656
9657 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9658 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9659 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009660 ctxt->input->end =
9661 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009662#ifdef DEBUG_PUSH
9663 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9664#endif
9665
Owen Taylor3473f882001-02-23 17:55:21 +00009666 } else if (ctxt->instate != XML_PARSER_EOF) {
9667 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9668 xmlParserInputBufferPtr in = ctxt->input->buf;
9669 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9670 (in->raw != NULL)) {
9671 int nbchars;
9672
9673 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9674 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009675 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009676 xmlGenericError(xmlGenericErrorContext,
9677 "xmlParseChunk: encoder error\n");
9678 return(XML_ERR_INVALID_ENCODING);
9679 }
9680 }
9681 }
9682 }
9683 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009684 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9685 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009686 if (terminate) {
9687 /*
9688 * Check for termination
9689 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009690 int avail = 0;
9691 if (ctxt->input->buf == NULL)
9692 avail = ctxt->input->length -
9693 (ctxt->input->cur - ctxt->input->base);
9694 else
9695 avail = ctxt->input->buf->buffer->use -
9696 (ctxt->input->cur - ctxt->input->base);
9697
Owen Taylor3473f882001-02-23 17:55:21 +00009698 if ((ctxt->instate != XML_PARSER_EOF) &&
9699 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009700 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009701 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009702 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009703 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009704 }
Owen Taylor3473f882001-02-23 17:55:21 +00009705 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009706 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009707 ctxt->sax->endDocument(ctxt->userData);
9708 }
9709 ctxt->instate = XML_PARSER_EOF;
9710 }
9711 return((xmlParserErrors) ctxt->errNo);
9712}
9713
9714/************************************************************************
9715 * *
9716 * I/O front end functions to the parser *
9717 * *
9718 ************************************************************************/
9719
9720/**
9721 * xmlStopParser:
9722 * @ctxt: an XML parser context
9723 *
9724 * Blocks further parser processing
9725 */
9726void
9727xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009728 if (ctxt == NULL)
9729 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009730 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009731 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009732 if (ctxt->input != NULL)
9733 ctxt->input->cur = BAD_CAST"";
9734}
9735
9736/**
9737 * xmlCreatePushParserCtxt:
9738 * @sax: a SAX handler
9739 * @user_data: The user data returned on SAX callbacks
9740 * @chunk: a pointer to an array of chars
9741 * @size: number of chars in the array
9742 * @filename: an optional file name or URI
9743 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009744 * Create a parser context for using the XML parser in push mode.
9745 * If @buffer and @size are non-NULL, the data is used to detect
9746 * the encoding. The remaining characters will be parsed so they
9747 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009748 * To allow content encoding detection, @size should be >= 4
9749 * The value of @filename is used for fetching external entities
9750 * and error/warning reports.
9751 *
9752 * Returns the new parser context or NULL
9753 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009754
Owen Taylor3473f882001-02-23 17:55:21 +00009755xmlParserCtxtPtr
9756xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9757 const char *chunk, int size, const char *filename) {
9758 xmlParserCtxtPtr ctxt;
9759 xmlParserInputPtr inputStream;
9760 xmlParserInputBufferPtr buf;
9761 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9762
9763 /*
9764 * plug some encoding conversion routines
9765 */
9766 if ((chunk != NULL) && (size >= 4))
9767 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9768
9769 buf = xmlAllocParserInputBuffer(enc);
9770 if (buf == NULL) return(NULL);
9771
9772 ctxt = xmlNewParserCtxt();
9773 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009774 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009775 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009776 return(NULL);
9777 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009778 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9779 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009780 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009781 xmlFreeParserInputBuffer(buf);
9782 xmlFreeParserCtxt(ctxt);
9783 return(NULL);
9784 }
Owen Taylor3473f882001-02-23 17:55:21 +00009785 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009786#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009787 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009788#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009789 xmlFree(ctxt->sax);
9790 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9791 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009793 xmlFreeParserInputBuffer(buf);
9794 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009795 return(NULL);
9796 }
9797 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9798 if (user_data != NULL)
9799 ctxt->userData = user_data;
9800 }
9801 if (filename == NULL) {
9802 ctxt->directory = NULL;
9803 } else {
9804 ctxt->directory = xmlParserGetDirectory(filename);
9805 }
9806
9807 inputStream = xmlNewInputStream(ctxt);
9808 if (inputStream == NULL) {
9809 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009810 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009811 return(NULL);
9812 }
9813
9814 if (filename == NULL)
9815 inputStream->filename = NULL;
9816 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009817 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009818 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009819 inputStream->buf = buf;
9820 inputStream->base = inputStream->buf->buffer->content;
9821 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009822 inputStream->end =
9823 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009824
9825 inputPush(ctxt, inputStream);
9826
9827 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9828 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009829 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9830 int cur = ctxt->input->cur - ctxt->input->base;
9831
Owen Taylor3473f882001-02-23 17:55:21 +00009832 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009833
9834 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9835 ctxt->input->cur = ctxt->input->base + cur;
9836 ctxt->input->end =
9837 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009838#ifdef DEBUG_PUSH
9839 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9840#endif
9841 }
9842
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009843 if (enc != XML_CHAR_ENCODING_NONE) {
9844 xmlSwitchEncoding(ctxt, enc);
9845 }
9846
Owen Taylor3473f882001-02-23 17:55:21 +00009847 return(ctxt);
9848}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009849#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009850
9851/**
9852 * xmlCreateIOParserCtxt:
9853 * @sax: a SAX handler
9854 * @user_data: The user data returned on SAX callbacks
9855 * @ioread: an I/O read function
9856 * @ioclose: an I/O close function
9857 * @ioctx: an I/O handler
9858 * @enc: the charset encoding if known
9859 *
9860 * Create a parser context for using the XML parser with an existing
9861 * I/O stream
9862 *
9863 * Returns the new parser context or NULL
9864 */
9865xmlParserCtxtPtr
9866xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9867 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9868 void *ioctx, xmlCharEncoding enc) {
9869 xmlParserCtxtPtr ctxt;
9870 xmlParserInputPtr inputStream;
9871 xmlParserInputBufferPtr buf;
9872
9873 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9874 if (buf == NULL) return(NULL);
9875
9876 ctxt = xmlNewParserCtxt();
9877 if (ctxt == NULL) {
9878 xmlFree(buf);
9879 return(NULL);
9880 }
9881 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009882#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009883 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009884#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009885 xmlFree(ctxt->sax);
9886 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9887 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009888 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009889 xmlFree(ctxt);
9890 return(NULL);
9891 }
9892 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9893 if (user_data != NULL)
9894 ctxt->userData = user_data;
9895 }
9896
9897 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9898 if (inputStream == NULL) {
9899 xmlFreeParserCtxt(ctxt);
9900 return(NULL);
9901 }
9902 inputPush(ctxt, inputStream);
9903
9904 return(ctxt);
9905}
9906
Daniel Veillard4432df22003-09-28 18:58:27 +00009907#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009908/************************************************************************
9909 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009910 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009911 * *
9912 ************************************************************************/
9913
9914/**
9915 * xmlIOParseDTD:
9916 * @sax: the SAX handler block or NULL
9917 * @input: an Input Buffer
9918 * @enc: the charset encoding if known
9919 *
9920 * Load and parse a DTD
9921 *
9922 * Returns the resulting xmlDtdPtr or NULL in case of error.
9923 * @input will be freed at parsing end.
9924 */
9925
9926xmlDtdPtr
9927xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9928 xmlCharEncoding enc) {
9929 xmlDtdPtr ret = NULL;
9930 xmlParserCtxtPtr ctxt;
9931 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009932 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009933
9934 if (input == NULL)
9935 return(NULL);
9936
9937 ctxt = xmlNewParserCtxt();
9938 if (ctxt == NULL) {
9939 return(NULL);
9940 }
9941
9942 /*
9943 * Set-up the SAX context
9944 */
9945 if (sax != NULL) {
9946 if (ctxt->sax != NULL)
9947 xmlFree(ctxt->sax);
9948 ctxt->sax = sax;
9949 ctxt->userData = NULL;
9950 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009951 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009952
9953 /*
9954 * generate a parser input from the I/O handler
9955 */
9956
Daniel Veillard43caefb2003-12-07 19:32:22 +00009957 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009958 if (pinput == NULL) {
9959 if (sax != NULL) ctxt->sax = NULL;
9960 xmlFreeParserCtxt(ctxt);
9961 return(NULL);
9962 }
9963
9964 /*
9965 * plug some encoding conversion routines here.
9966 */
9967 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009968 if (enc != XML_CHAR_ENCODING_NONE) {
9969 xmlSwitchEncoding(ctxt, enc);
9970 }
Owen Taylor3473f882001-02-23 17:55:21 +00009971
9972 pinput->filename = NULL;
9973 pinput->line = 1;
9974 pinput->col = 1;
9975 pinput->base = ctxt->input->cur;
9976 pinput->cur = ctxt->input->cur;
9977 pinput->free = NULL;
9978
9979 /*
9980 * let's parse that entity knowing it's an external subset.
9981 */
9982 ctxt->inSubset = 2;
9983 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9984 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9985 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009986
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009987 if ((enc == XML_CHAR_ENCODING_NONE) &&
9988 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00009989 /*
9990 * Get the 4 first bytes and decode the charset
9991 * if enc != XML_CHAR_ENCODING_NONE
9992 * plug some encoding conversion routines.
9993 */
9994 start[0] = RAW;
9995 start[1] = NXT(1);
9996 start[2] = NXT(2);
9997 start[3] = NXT(3);
9998 enc = xmlDetectCharEncoding(start, 4);
9999 if (enc != XML_CHAR_ENCODING_NONE) {
10000 xmlSwitchEncoding(ctxt, enc);
10001 }
10002 }
10003
Owen Taylor3473f882001-02-23 17:55:21 +000010004 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10005
10006 if (ctxt->myDoc != NULL) {
10007 if (ctxt->wellFormed) {
10008 ret = ctxt->myDoc->extSubset;
10009 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010010 if (ret != NULL) {
10011 xmlNodePtr tmp;
10012
10013 ret->doc = NULL;
10014 tmp = ret->children;
10015 while (tmp != NULL) {
10016 tmp->doc = NULL;
10017 tmp = tmp->next;
10018 }
10019 }
Owen Taylor3473f882001-02-23 17:55:21 +000010020 } else {
10021 ret = NULL;
10022 }
10023 xmlFreeDoc(ctxt->myDoc);
10024 ctxt->myDoc = NULL;
10025 }
10026 if (sax != NULL) ctxt->sax = NULL;
10027 xmlFreeParserCtxt(ctxt);
10028
10029 return(ret);
10030}
10031
10032/**
10033 * xmlSAXParseDTD:
10034 * @sax: the SAX handler block
10035 * @ExternalID: a NAME* containing the External ID of the DTD
10036 * @SystemID: a NAME* containing the URL to the DTD
10037 *
10038 * Load and parse an external subset.
10039 *
10040 * Returns the resulting xmlDtdPtr or NULL in case of error.
10041 */
10042
10043xmlDtdPtr
10044xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10045 const xmlChar *SystemID) {
10046 xmlDtdPtr ret = NULL;
10047 xmlParserCtxtPtr ctxt;
10048 xmlParserInputPtr input = NULL;
10049 xmlCharEncoding enc;
10050
10051 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10052
10053 ctxt = xmlNewParserCtxt();
10054 if (ctxt == NULL) {
10055 return(NULL);
10056 }
10057
10058 /*
10059 * Set-up the SAX context
10060 */
10061 if (sax != NULL) {
10062 if (ctxt->sax != NULL)
10063 xmlFree(ctxt->sax);
10064 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010065 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010066 }
10067
10068 /*
10069 * Ask the Entity resolver to load the damn thing
10070 */
10071
10072 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010073 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 if (input == NULL) {
10075 if (sax != NULL) ctxt->sax = NULL;
10076 xmlFreeParserCtxt(ctxt);
10077 return(NULL);
10078 }
10079
10080 /*
10081 * plug some encoding conversion routines here.
10082 */
10083 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010084 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10085 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10086 xmlSwitchEncoding(ctxt, enc);
10087 }
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010090 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010091 input->line = 1;
10092 input->col = 1;
10093 input->base = ctxt->input->cur;
10094 input->cur = ctxt->input->cur;
10095 input->free = NULL;
10096
10097 /*
10098 * let's parse that entity knowing it's an external subset.
10099 */
10100 ctxt->inSubset = 2;
10101 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10102 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10103 ExternalID, SystemID);
10104 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10105
10106 if (ctxt->myDoc != NULL) {
10107 if (ctxt->wellFormed) {
10108 ret = ctxt->myDoc->extSubset;
10109 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010110 if (ret != NULL) {
10111 xmlNodePtr tmp;
10112
10113 ret->doc = NULL;
10114 tmp = ret->children;
10115 while (tmp != NULL) {
10116 tmp->doc = NULL;
10117 tmp = tmp->next;
10118 }
10119 }
Owen Taylor3473f882001-02-23 17:55:21 +000010120 } else {
10121 ret = NULL;
10122 }
10123 xmlFreeDoc(ctxt->myDoc);
10124 ctxt->myDoc = NULL;
10125 }
10126 if (sax != NULL) ctxt->sax = NULL;
10127 xmlFreeParserCtxt(ctxt);
10128
10129 return(ret);
10130}
10131
Daniel Veillard4432df22003-09-28 18:58:27 +000010132
Owen Taylor3473f882001-02-23 17:55:21 +000010133/**
10134 * xmlParseDTD:
10135 * @ExternalID: a NAME* containing the External ID of the DTD
10136 * @SystemID: a NAME* containing the URL to the DTD
10137 *
10138 * Load and parse an external subset.
10139 *
10140 * Returns the resulting xmlDtdPtr or NULL in case of error.
10141 */
10142
10143xmlDtdPtr
10144xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10145 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10146}
Daniel Veillard4432df22003-09-28 18:58:27 +000010147#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010148
10149/************************************************************************
10150 * *
10151 * Front ends when parsing an Entity *
10152 * *
10153 ************************************************************************/
10154
10155/**
Owen Taylor3473f882001-02-23 17:55:21 +000010156 * xmlParseCtxtExternalEntity:
10157 * @ctx: the existing parsing context
10158 * @URL: the URL for the entity to load
10159 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010160 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010161 *
10162 * Parse an external general entity within an existing parsing context
10163 * An external general parsed entity is well-formed if it matches the
10164 * production labeled extParsedEnt.
10165 *
10166 * [78] extParsedEnt ::= TextDecl? content
10167 *
10168 * Returns 0 if the entity is well formed, -1 in case of args problem and
10169 * the parser error code otherwise
10170 */
10171
10172int
10173xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010174 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010175 xmlParserCtxtPtr ctxt;
10176 xmlDocPtr newDoc;
10177 xmlSAXHandlerPtr oldsax = NULL;
10178 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010179 xmlChar start[4];
10180 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010181
10182 if (ctx->depth > 40) {
10183 return(XML_ERR_ENTITY_LOOP);
10184 }
10185
Daniel Veillardcda96922001-08-21 10:56:31 +000010186 if (lst != NULL)
10187 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010188 if ((URL == NULL) && (ID == NULL))
10189 return(-1);
10190 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10191 return(-1);
10192
10193
10194 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10195 if (ctxt == NULL) return(-1);
10196 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010197 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010198 oldsax = ctxt->sax;
10199 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010200 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010201 newDoc = xmlNewDoc(BAD_CAST "1.0");
10202 if (newDoc == NULL) {
10203 xmlFreeParserCtxt(ctxt);
10204 return(-1);
10205 }
10206 if (ctx->myDoc != NULL) {
10207 newDoc->intSubset = ctx->myDoc->intSubset;
10208 newDoc->extSubset = ctx->myDoc->extSubset;
10209 }
10210 if (ctx->myDoc->URL != NULL) {
10211 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10212 }
10213 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10214 if (newDoc->children == NULL) {
10215 ctxt->sax = oldsax;
10216 xmlFreeParserCtxt(ctxt);
10217 newDoc->intSubset = NULL;
10218 newDoc->extSubset = NULL;
10219 xmlFreeDoc(newDoc);
10220 return(-1);
10221 }
10222 nodePush(ctxt, newDoc->children);
10223 if (ctx->myDoc == NULL) {
10224 ctxt->myDoc = newDoc;
10225 } else {
10226 ctxt->myDoc = ctx->myDoc;
10227 newDoc->children->doc = ctx->myDoc;
10228 }
10229
Daniel Veillard87a764e2001-06-20 17:41:10 +000010230 /*
10231 * Get the 4 first bytes and decode the charset
10232 * if enc != XML_CHAR_ENCODING_NONE
10233 * plug some encoding conversion routines.
10234 */
10235 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010236 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10237 start[0] = RAW;
10238 start[1] = NXT(1);
10239 start[2] = NXT(2);
10240 start[3] = NXT(3);
10241 enc = xmlDetectCharEncoding(start, 4);
10242 if (enc != XML_CHAR_ENCODING_NONE) {
10243 xmlSwitchEncoding(ctxt, enc);
10244 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010245 }
10246
Owen Taylor3473f882001-02-23 17:55:21 +000010247 /*
10248 * Parse a possible text declaration first
10249 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010250 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010251 xmlParseTextDecl(ctxt);
10252 }
10253
10254 /*
10255 * Doing validity checking on chunk doesn't make sense
10256 */
10257 ctxt->instate = XML_PARSER_CONTENT;
10258 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010259 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010260 ctxt->loadsubset = ctx->loadsubset;
10261 ctxt->depth = ctx->depth + 1;
10262 ctxt->replaceEntities = ctx->replaceEntities;
10263 if (ctxt->validate) {
10264 ctxt->vctxt.error = ctx->vctxt.error;
10265 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010266 } else {
10267 ctxt->vctxt.error = NULL;
10268 ctxt->vctxt.warning = NULL;
10269 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010270 ctxt->vctxt.nodeTab = NULL;
10271 ctxt->vctxt.nodeNr = 0;
10272 ctxt->vctxt.nodeMax = 0;
10273 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010274 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10275 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010276 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10277 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10278 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010279 ctxt->dictNames = ctx->dictNames;
10280 ctxt->attsDefault = ctx->attsDefault;
10281 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283 xmlParseContent(ctxt);
10284
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010285 ctx->validate = ctxt->validate;
10286 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010287 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010288 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010289 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010290 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010291 }
10292 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010293 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010294 }
10295
10296 if (!ctxt->wellFormed) {
10297 if (ctxt->errNo == 0)
10298 ret = 1;
10299 else
10300 ret = ctxt->errNo;
10301 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010302 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010303 xmlNodePtr cur;
10304
10305 /*
10306 * Return the newly created nodeset after unlinking it from
10307 * they pseudo parent.
10308 */
10309 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010310 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010311 while (cur != NULL) {
10312 cur->parent = NULL;
10313 cur = cur->next;
10314 }
10315 newDoc->children->children = NULL;
10316 }
10317 ret = 0;
10318 }
10319 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010320 ctxt->dict = NULL;
10321 ctxt->attsDefault = NULL;
10322 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010323 xmlFreeParserCtxt(ctxt);
10324 newDoc->intSubset = NULL;
10325 newDoc->extSubset = NULL;
10326 xmlFreeDoc(newDoc);
10327
10328 return(ret);
10329}
10330
10331/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010332 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010333 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010334 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010335 * @sax: the SAX handler bloc (possibly NULL)
10336 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10337 * @depth: Used for loop detection, use 0
10338 * @URL: the URL for the entity to load
10339 * @ID: the System ID for the entity to load
10340 * @list: the return value for the set of parsed nodes
10341 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010342 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010343 *
10344 * Returns 0 if the entity is well formed, -1 in case of args problem and
10345 * the parser error code otherwise
10346 */
10347
Daniel Veillard7d515752003-09-26 19:12:37 +000010348static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010349xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10350 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010351 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010352 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010353 xmlParserCtxtPtr ctxt;
10354 xmlDocPtr newDoc;
10355 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010356 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010357 xmlChar start[4];
10358 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010359
10360 if (depth > 40) {
10361 return(XML_ERR_ENTITY_LOOP);
10362 }
10363
10364
10365
10366 if (list != NULL)
10367 *list = NULL;
10368 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010369 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010370 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010371 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010372
10373
10374 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010375 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010376 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010377 if (oldctxt != NULL) {
10378 ctxt->_private = oldctxt->_private;
10379 ctxt->loadsubset = oldctxt->loadsubset;
10380 ctxt->validate = oldctxt->validate;
10381 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010382 ctxt->record_info = oldctxt->record_info;
10383 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10384 ctxt->node_seq.length = oldctxt->node_seq.length;
10385 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010386 } else {
10387 /*
10388 * Doing validity checking on chunk without context
10389 * doesn't make sense
10390 */
10391 ctxt->_private = NULL;
10392 ctxt->validate = 0;
10393 ctxt->external = 2;
10394 ctxt->loadsubset = 0;
10395 }
Owen Taylor3473f882001-02-23 17:55:21 +000010396 if (sax != NULL) {
10397 oldsax = ctxt->sax;
10398 ctxt->sax = sax;
10399 if (user_data != NULL)
10400 ctxt->userData = user_data;
10401 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010402 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010403 newDoc = xmlNewDoc(BAD_CAST "1.0");
10404 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010405 ctxt->node_seq.maximum = 0;
10406 ctxt->node_seq.length = 0;
10407 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010408 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010409 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010410 }
10411 if (doc != NULL) {
10412 newDoc->intSubset = doc->intSubset;
10413 newDoc->extSubset = doc->extSubset;
10414 }
10415 if (doc->URL != NULL) {
10416 newDoc->URL = xmlStrdup(doc->URL);
10417 }
10418 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10419 if (newDoc->children == NULL) {
10420 if (sax != NULL)
10421 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010422 ctxt->node_seq.maximum = 0;
10423 ctxt->node_seq.length = 0;
10424 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010425 xmlFreeParserCtxt(ctxt);
10426 newDoc->intSubset = NULL;
10427 newDoc->extSubset = NULL;
10428 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010429 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010430 }
10431 nodePush(ctxt, newDoc->children);
10432 if (doc == NULL) {
10433 ctxt->myDoc = newDoc;
10434 } else {
10435 ctxt->myDoc = doc;
10436 newDoc->children->doc = doc;
10437 }
10438
Daniel Veillard87a764e2001-06-20 17:41:10 +000010439 /*
10440 * Get the 4 first bytes and decode the charset
10441 * if enc != XML_CHAR_ENCODING_NONE
10442 * plug some encoding conversion routines.
10443 */
10444 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010445 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10446 start[0] = RAW;
10447 start[1] = NXT(1);
10448 start[2] = NXT(2);
10449 start[3] = NXT(3);
10450 enc = xmlDetectCharEncoding(start, 4);
10451 if (enc != XML_CHAR_ENCODING_NONE) {
10452 xmlSwitchEncoding(ctxt, enc);
10453 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010454 }
10455
Owen Taylor3473f882001-02-23 17:55:21 +000010456 /*
10457 * Parse a possible text declaration first
10458 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010459 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010460 xmlParseTextDecl(ctxt);
10461 }
10462
Owen Taylor3473f882001-02-23 17:55:21 +000010463 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010464 ctxt->depth = depth;
10465
10466 xmlParseContent(ctxt);
10467
Daniel Veillard561b7f82002-03-20 21:55:57 +000010468 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010469 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010470 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010471 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010472 }
10473 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
10476
10477 if (!ctxt->wellFormed) {
10478 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010479 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010480 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010481 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010482 } else {
10483 if (list != NULL) {
10484 xmlNodePtr cur;
10485
10486 /*
10487 * Return the newly created nodeset after unlinking it from
10488 * they pseudo parent.
10489 */
10490 cur = newDoc->children->children;
10491 *list = cur;
10492 while (cur != NULL) {
10493 cur->parent = NULL;
10494 cur = cur->next;
10495 }
10496 newDoc->children->children = NULL;
10497 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010498 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010499 }
10500 if (sax != NULL)
10501 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010502 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10503 oldctxt->node_seq.length = ctxt->node_seq.length;
10504 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010505 ctxt->node_seq.maximum = 0;
10506 ctxt->node_seq.length = 0;
10507 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010508 xmlFreeParserCtxt(ctxt);
10509 newDoc->intSubset = NULL;
10510 newDoc->extSubset = NULL;
10511 xmlFreeDoc(newDoc);
10512
10513 return(ret);
10514}
10515
Daniel Veillard81273902003-09-30 00:43:48 +000010516#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010517/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010518 * xmlParseExternalEntity:
10519 * @doc: the document the chunk pertains to
10520 * @sax: the SAX handler bloc (possibly NULL)
10521 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10522 * @depth: Used for loop detection, use 0
10523 * @URL: the URL for the entity to load
10524 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010525 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010526 *
10527 * Parse an external general entity
10528 * An external general parsed entity is well-formed if it matches the
10529 * production labeled extParsedEnt.
10530 *
10531 * [78] extParsedEnt ::= TextDecl? content
10532 *
10533 * Returns 0 if the entity is well formed, -1 in case of args problem and
10534 * the parser error code otherwise
10535 */
10536
10537int
10538xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010539 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010540 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010541 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010542}
10543
10544/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010545 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010546 * @doc: the document the chunk pertains to
10547 * @sax: the SAX handler bloc (possibly NULL)
10548 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10549 * @depth: Used for loop detection, use 0
10550 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010551 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010552 *
10553 * Parse a well-balanced chunk of an XML document
10554 * called by the parser
10555 * The allowed sequence for the Well Balanced Chunk is the one defined by
10556 * the content production in the XML grammar:
10557 *
10558 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10559 *
10560 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10561 * the parser error code otherwise
10562 */
10563
10564int
10565xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010566 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010567 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10568 depth, string, lst, 0 );
10569}
Daniel Veillard81273902003-09-30 00:43:48 +000010570#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010571
10572/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010573 * xmlParseBalancedChunkMemoryInternal:
10574 * @oldctxt: the existing parsing context
10575 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10576 * @user_data: the user data field for the parser context
10577 * @lst: the return value for the set of parsed nodes
10578 *
10579 *
10580 * Parse a well-balanced chunk of an XML document
10581 * called by the parser
10582 * The allowed sequence for the Well Balanced Chunk is the one defined by
10583 * the content production in the XML grammar:
10584 *
10585 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10586 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010587 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10588 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010589 *
10590 * In case recover is set to 1, the nodelist will not be empty even if
10591 * the parsed chunk is not well balanced.
10592 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010593static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010594xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10595 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10596 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010597 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010598 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010599 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010600 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010601 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010602
10603 if (oldctxt->depth > 40) {
10604 return(XML_ERR_ENTITY_LOOP);
10605 }
10606
10607
10608 if (lst != NULL)
10609 *lst = NULL;
10610 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010611 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010612
10613 size = xmlStrlen(string);
10614
10615 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010616 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010617 if (user_data != NULL)
10618 ctxt->userData = user_data;
10619 else
10620 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010621 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10622 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010623 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10624 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10625 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010626
10627 oldsax = ctxt->sax;
10628 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010629 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010630 ctxt->replaceEntities = oldctxt->replaceEntities;
10631 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010632
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010633 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010634 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010635 newDoc = xmlNewDoc(BAD_CAST "1.0");
10636 if (newDoc == NULL) {
10637 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010638 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010639 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010640 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010641 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010642 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010643 } else {
10644 ctxt->myDoc = oldctxt->myDoc;
10645 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010646 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010647 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010648 BAD_CAST "pseudoroot", NULL);
10649 if (ctxt->myDoc->children == NULL) {
10650 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010651 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010652 xmlFreeParserCtxt(ctxt);
10653 if (newDoc != NULL)
10654 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010655 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010656 }
10657 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010658 ctxt->instate = XML_PARSER_CONTENT;
10659 ctxt->depth = oldctxt->depth + 1;
10660
Daniel Veillard328f48c2002-11-15 15:24:34 +000010661 ctxt->validate = 0;
10662 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010663 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10664 /*
10665 * ID/IDREF registration will be done in xmlValidateElement below
10666 */
10667 ctxt->loadsubset |= XML_SKIP_IDS;
10668 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010669 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010670 ctxt->attsDefault = oldctxt->attsDefault;
10671 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010672
Daniel Veillard68e9e742002-11-16 15:35:11 +000010673 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010674 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010675 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010676 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010677 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010678 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010679 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010681 }
10682
10683 if (!ctxt->wellFormed) {
10684 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010685 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010686 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010687 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010688 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010689 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010690 }
10691
William M. Brack7b9154b2003-09-27 19:23:50 +000010692 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010693 xmlNodePtr cur;
10694
10695 /*
10696 * Return the newly created nodeset after unlinking it from
10697 * they pseudo parent.
10698 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010699 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010700 *lst = cur;
10701 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010702#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010703 if (oldctxt->validate && oldctxt->wellFormed &&
10704 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10705 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10706 oldctxt->myDoc, cur);
10707 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010708#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010709 cur->parent = NULL;
10710 cur = cur->next;
10711 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010712 ctxt->myDoc->children->children = NULL;
10713 }
10714 if (ctxt->myDoc != NULL) {
10715 xmlFreeNode(ctxt->myDoc->children);
10716 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010717 }
10718
10719 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010720 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010721 ctxt->attsDefault = NULL;
10722 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010723 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010724 if (newDoc != NULL)
10725 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010726
10727 return(ret);
10728}
10729
Daniel Veillard81273902003-09-30 00:43:48 +000010730#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010731/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010732 * xmlParseBalancedChunkMemoryRecover:
10733 * @doc: the document the chunk pertains to
10734 * @sax: the SAX handler bloc (possibly NULL)
10735 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10736 * @depth: Used for loop detection, use 0
10737 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10738 * @lst: the return value for the set of parsed nodes
10739 * @recover: return nodes even if the data is broken (use 0)
10740 *
10741 *
10742 * Parse a well-balanced chunk of an XML document
10743 * called by the parser
10744 * The allowed sequence for the Well Balanced Chunk is the one defined by
10745 * the content production in the XML grammar:
10746 *
10747 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10748 *
10749 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10750 * the parser error code otherwise
10751 *
10752 * In case recover is set to 1, the nodelist will not be empty even if
10753 * the parsed chunk is not well balanced.
10754 */
10755int
10756xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10757 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10758 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010759 xmlParserCtxtPtr ctxt;
10760 xmlDocPtr newDoc;
10761 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010762 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010763 int size;
10764 int ret = 0;
10765
10766 if (depth > 40) {
10767 return(XML_ERR_ENTITY_LOOP);
10768 }
10769
10770
Daniel Veillardcda96922001-08-21 10:56:31 +000010771 if (lst != NULL)
10772 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010773 if (string == NULL)
10774 return(-1);
10775
10776 size = xmlStrlen(string);
10777
10778 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10779 if (ctxt == NULL) return(-1);
10780 ctxt->userData = ctxt;
10781 if (sax != NULL) {
10782 oldsax = ctxt->sax;
10783 ctxt->sax = sax;
10784 if (user_data != NULL)
10785 ctxt->userData = user_data;
10786 }
10787 newDoc = xmlNewDoc(BAD_CAST "1.0");
10788 if (newDoc == NULL) {
10789 xmlFreeParserCtxt(ctxt);
10790 return(-1);
10791 }
10792 if (doc != NULL) {
10793 newDoc->intSubset = doc->intSubset;
10794 newDoc->extSubset = doc->extSubset;
10795 }
10796 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10797 if (newDoc->children == NULL) {
10798 if (sax != NULL)
10799 ctxt->sax = oldsax;
10800 xmlFreeParserCtxt(ctxt);
10801 newDoc->intSubset = NULL;
10802 newDoc->extSubset = NULL;
10803 xmlFreeDoc(newDoc);
10804 return(-1);
10805 }
10806 nodePush(ctxt, newDoc->children);
10807 if (doc == NULL) {
10808 ctxt->myDoc = newDoc;
10809 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010810 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010811 newDoc->children->doc = doc;
10812 }
10813 ctxt->instate = XML_PARSER_CONTENT;
10814 ctxt->depth = depth;
10815
10816 /*
10817 * Doing validity checking on chunk doesn't make sense
10818 */
10819 ctxt->validate = 0;
10820 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010821 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010822
Daniel Veillardb39bc392002-10-26 19:29:51 +000010823 if ( doc != NULL ){
10824 content = doc->children;
10825 doc->children = NULL;
10826 xmlParseContent(ctxt);
10827 doc->children = content;
10828 }
10829 else {
10830 xmlParseContent(ctxt);
10831 }
Owen Taylor3473f882001-02-23 17:55:21 +000010832 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010833 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010834 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010835 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010836 }
10837 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010838 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010839 }
10840
10841 if (!ctxt->wellFormed) {
10842 if (ctxt->errNo == 0)
10843 ret = 1;
10844 else
10845 ret = ctxt->errNo;
10846 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010847 ret = 0;
10848 }
10849
10850 if (lst != NULL && (ret == 0 || recover == 1)) {
10851 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010852
10853 /*
10854 * Return the newly created nodeset after unlinking it from
10855 * they pseudo parent.
10856 */
10857 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010858 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010859 while (cur != NULL) {
10860 cur->parent = NULL;
10861 cur = cur->next;
10862 }
10863 newDoc->children->children = NULL;
10864 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010865
Owen Taylor3473f882001-02-23 17:55:21 +000010866 if (sax != NULL)
10867 ctxt->sax = oldsax;
10868 xmlFreeParserCtxt(ctxt);
10869 newDoc->intSubset = NULL;
10870 newDoc->extSubset = NULL;
10871 xmlFreeDoc(newDoc);
10872
10873 return(ret);
10874}
10875
10876/**
10877 * xmlSAXParseEntity:
10878 * @sax: the SAX handler block
10879 * @filename: the filename
10880 *
10881 * parse an XML external entity out of context and build a tree.
10882 * It use the given SAX function block to handle the parsing callback.
10883 * If sax is NULL, fallback to the default DOM tree building routines.
10884 *
10885 * [78] extParsedEnt ::= TextDecl? content
10886 *
10887 * This correspond to a "Well Balanced" chunk
10888 *
10889 * Returns the resulting document tree
10890 */
10891
10892xmlDocPtr
10893xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10894 xmlDocPtr ret;
10895 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010896
10897 ctxt = xmlCreateFileParserCtxt(filename);
10898 if (ctxt == NULL) {
10899 return(NULL);
10900 }
10901 if (sax != NULL) {
10902 if (ctxt->sax != NULL)
10903 xmlFree(ctxt->sax);
10904 ctxt->sax = sax;
10905 ctxt->userData = NULL;
10906 }
10907
Owen Taylor3473f882001-02-23 17:55:21 +000010908 xmlParseExtParsedEnt(ctxt);
10909
10910 if (ctxt->wellFormed)
10911 ret = ctxt->myDoc;
10912 else {
10913 ret = NULL;
10914 xmlFreeDoc(ctxt->myDoc);
10915 ctxt->myDoc = NULL;
10916 }
10917 if (sax != NULL)
10918 ctxt->sax = NULL;
10919 xmlFreeParserCtxt(ctxt);
10920
10921 return(ret);
10922}
10923
10924/**
10925 * xmlParseEntity:
10926 * @filename: the filename
10927 *
10928 * parse an XML external entity out of context and build a tree.
10929 *
10930 * [78] extParsedEnt ::= TextDecl? content
10931 *
10932 * This correspond to a "Well Balanced" chunk
10933 *
10934 * Returns the resulting document tree
10935 */
10936
10937xmlDocPtr
10938xmlParseEntity(const char *filename) {
10939 return(xmlSAXParseEntity(NULL, filename));
10940}
Daniel Veillard81273902003-09-30 00:43:48 +000010941#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010942
10943/**
10944 * xmlCreateEntityParserCtxt:
10945 * @URL: the entity URL
10946 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010947 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010948 *
10949 * Create a parser context for an external entity
10950 * Automatic support for ZLIB/Compress compressed document is provided
10951 * by default if found at compile-time.
10952 *
10953 * Returns the new parser context or NULL
10954 */
10955xmlParserCtxtPtr
10956xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10957 const xmlChar *base) {
10958 xmlParserCtxtPtr ctxt;
10959 xmlParserInputPtr inputStream;
10960 char *directory = NULL;
10961 xmlChar *uri;
10962
10963 ctxt = xmlNewParserCtxt();
10964 if (ctxt == NULL) {
10965 return(NULL);
10966 }
10967
10968 uri = xmlBuildURI(URL, base);
10969
10970 if (uri == NULL) {
10971 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10972 if (inputStream == NULL) {
10973 xmlFreeParserCtxt(ctxt);
10974 return(NULL);
10975 }
10976
10977 inputPush(ctxt, inputStream);
10978
10979 if ((ctxt->directory == NULL) && (directory == NULL))
10980 directory = xmlParserGetDirectory((char *)URL);
10981 if ((ctxt->directory == NULL) && (directory != NULL))
10982 ctxt->directory = directory;
10983 } else {
10984 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10985 if (inputStream == NULL) {
10986 xmlFree(uri);
10987 xmlFreeParserCtxt(ctxt);
10988 return(NULL);
10989 }
10990
10991 inputPush(ctxt, inputStream);
10992
10993 if ((ctxt->directory == NULL) && (directory == NULL))
10994 directory = xmlParserGetDirectory((char *)uri);
10995 if ((ctxt->directory == NULL) && (directory != NULL))
10996 ctxt->directory = directory;
10997 xmlFree(uri);
10998 }
Owen Taylor3473f882001-02-23 17:55:21 +000010999 return(ctxt);
11000}
11001
11002/************************************************************************
11003 * *
11004 * Front ends when parsing from a file *
11005 * *
11006 ************************************************************************/
11007
11008/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011009 * xmlCreateURLParserCtxt:
11010 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011011 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011012 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011013 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011014 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011015 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011016 *
11017 * Returns the new parser context or NULL
11018 */
11019xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011020xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011021{
11022 xmlParserCtxtPtr ctxt;
11023 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011024 char *directory = NULL;
11025
Owen Taylor3473f882001-02-23 17:55:21 +000011026 ctxt = xmlNewParserCtxt();
11027 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011028 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011029 return(NULL);
11030 }
11031
Daniel Veillard61b93382003-11-03 14:28:31 +000011032 if (options != 0)
11033 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011034
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011035 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011036 if (inputStream == NULL) {
11037 xmlFreeParserCtxt(ctxt);
11038 return(NULL);
11039 }
11040
Owen Taylor3473f882001-02-23 17:55:21 +000011041 inputPush(ctxt, inputStream);
11042 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011043 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011044 if ((ctxt->directory == NULL) && (directory != NULL))
11045 ctxt->directory = directory;
11046
11047 return(ctxt);
11048}
11049
Daniel Veillard61b93382003-11-03 14:28:31 +000011050/**
11051 * xmlCreateFileParserCtxt:
11052 * @filename: the filename
11053 *
11054 * Create a parser context for a file content.
11055 * Automatic support for ZLIB/Compress compressed document is provided
11056 * by default if found at compile-time.
11057 *
11058 * Returns the new parser context or NULL
11059 */
11060xmlParserCtxtPtr
11061xmlCreateFileParserCtxt(const char *filename)
11062{
11063 return(xmlCreateURLParserCtxt(filename, 0));
11064}
11065
Daniel Veillard81273902003-09-30 00:43:48 +000011066#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011067/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011068 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011069 * @sax: the SAX handler block
11070 * @filename: the filename
11071 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11072 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011073 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011074 *
11075 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11076 * compressed document is provided by default if found at compile-time.
11077 * It use the given SAX function block to handle the parsing callback.
11078 * If sax is NULL, fallback to the default DOM tree building routines.
11079 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011080 * User data (void *) is stored within the parser context in the
11081 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011082 *
Owen Taylor3473f882001-02-23 17:55:21 +000011083 * Returns the resulting document tree
11084 */
11085
11086xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011087xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11088 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011089 xmlDocPtr ret;
11090 xmlParserCtxtPtr ctxt;
11091 char *directory = NULL;
11092
Daniel Veillard635ef722001-10-29 11:48:19 +000011093 xmlInitParser();
11094
Owen Taylor3473f882001-02-23 17:55:21 +000011095 ctxt = xmlCreateFileParserCtxt(filename);
11096 if (ctxt == NULL) {
11097 return(NULL);
11098 }
11099 if (sax != NULL) {
11100 if (ctxt->sax != NULL)
11101 xmlFree(ctxt->sax);
11102 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011103 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011104 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011105 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011106 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011107 }
Owen Taylor3473f882001-02-23 17:55:21 +000011108
11109 if ((ctxt->directory == NULL) && (directory == NULL))
11110 directory = xmlParserGetDirectory(filename);
11111 if ((ctxt->directory == NULL) && (directory != NULL))
11112 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11113
Daniel Veillarddad3f682002-11-17 16:47:27 +000011114 ctxt->recovery = recovery;
11115
Owen Taylor3473f882001-02-23 17:55:21 +000011116 xmlParseDocument(ctxt);
11117
William M. Brackc07329e2003-09-08 01:57:30 +000011118 if ((ctxt->wellFormed) || recovery) {
11119 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011120 if (ret != NULL) {
11121 if (ctxt->input->buf->compressed > 0)
11122 ret->compression = 9;
11123 else
11124 ret->compression = ctxt->input->buf->compressed;
11125 }
William M. Brackc07329e2003-09-08 01:57:30 +000011126 }
Owen Taylor3473f882001-02-23 17:55:21 +000011127 else {
11128 ret = NULL;
11129 xmlFreeDoc(ctxt->myDoc);
11130 ctxt->myDoc = NULL;
11131 }
11132 if (sax != NULL)
11133 ctxt->sax = NULL;
11134 xmlFreeParserCtxt(ctxt);
11135
11136 return(ret);
11137}
11138
11139/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011140 * xmlSAXParseFile:
11141 * @sax: the SAX handler block
11142 * @filename: the filename
11143 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11144 * documents
11145 *
11146 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11147 * compressed document is provided by default if found at compile-time.
11148 * It use the given SAX function block to handle the parsing callback.
11149 * If sax is NULL, fallback to the default DOM tree building routines.
11150 *
11151 * Returns the resulting document tree
11152 */
11153
11154xmlDocPtr
11155xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11156 int recovery) {
11157 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11158}
11159
11160/**
Owen Taylor3473f882001-02-23 17:55:21 +000011161 * xmlRecoverDoc:
11162 * @cur: a pointer to an array of xmlChar
11163 *
11164 * parse an XML in-memory document and build a tree.
11165 * In the case the document is not Well Formed, a tree is built anyway
11166 *
11167 * Returns the resulting document tree
11168 */
11169
11170xmlDocPtr
11171xmlRecoverDoc(xmlChar *cur) {
11172 return(xmlSAXParseDoc(NULL, cur, 1));
11173}
11174
11175/**
11176 * xmlParseFile:
11177 * @filename: the filename
11178 *
11179 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11180 * compressed document is provided by default if found at compile-time.
11181 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011182 * Returns the resulting document tree if the file was wellformed,
11183 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011184 */
11185
11186xmlDocPtr
11187xmlParseFile(const char *filename) {
11188 return(xmlSAXParseFile(NULL, filename, 0));
11189}
11190
11191/**
11192 * xmlRecoverFile:
11193 * @filename: the filename
11194 *
11195 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11196 * compressed document is provided by default if found at compile-time.
11197 * In the case the document is not Well Formed, a tree is built anyway
11198 *
11199 * Returns the resulting document tree
11200 */
11201
11202xmlDocPtr
11203xmlRecoverFile(const char *filename) {
11204 return(xmlSAXParseFile(NULL, filename, 1));
11205}
11206
11207
11208/**
11209 * xmlSetupParserForBuffer:
11210 * @ctxt: an XML parser context
11211 * @buffer: a xmlChar * buffer
11212 * @filename: a file name
11213 *
11214 * Setup the parser context to parse a new buffer; Clears any prior
11215 * contents from the parser context. The buffer parameter must not be
11216 * NULL, but the filename parameter can be
11217 */
11218void
11219xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11220 const char* filename)
11221{
11222 xmlParserInputPtr input;
11223
11224 input = xmlNewInputStream(ctxt);
11225 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011226 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011227 xmlFree(ctxt);
11228 return;
11229 }
11230
11231 xmlClearParserCtxt(ctxt);
11232 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011233 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011234 input->base = buffer;
11235 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011236 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011237 inputPush(ctxt, input);
11238}
11239
11240/**
11241 * xmlSAXUserParseFile:
11242 * @sax: a SAX handler
11243 * @user_data: The user data returned on SAX callbacks
11244 * @filename: a file name
11245 *
11246 * parse an XML file and call the given SAX handler routines.
11247 * Automatic support for ZLIB/Compress compressed document is provided
11248 *
11249 * Returns 0 in case of success or a error number otherwise
11250 */
11251int
11252xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11253 const char *filename) {
11254 int ret = 0;
11255 xmlParserCtxtPtr ctxt;
11256
11257 ctxt = xmlCreateFileParserCtxt(filename);
11258 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011259#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011260 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011261#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011262 xmlFree(ctxt->sax);
11263 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011264 xmlDetectSAX2(ctxt);
11265
Owen Taylor3473f882001-02-23 17:55:21 +000011266 if (user_data != NULL)
11267 ctxt->userData = user_data;
11268
11269 xmlParseDocument(ctxt);
11270
11271 if (ctxt->wellFormed)
11272 ret = 0;
11273 else {
11274 if (ctxt->errNo != 0)
11275 ret = ctxt->errNo;
11276 else
11277 ret = -1;
11278 }
11279 if (sax != NULL)
11280 ctxt->sax = NULL;
11281 xmlFreeParserCtxt(ctxt);
11282
11283 return ret;
11284}
Daniel Veillard81273902003-09-30 00:43:48 +000011285#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011286
11287/************************************************************************
11288 * *
11289 * Front ends when parsing from memory *
11290 * *
11291 ************************************************************************/
11292
11293/**
11294 * xmlCreateMemoryParserCtxt:
11295 * @buffer: a pointer to a char array
11296 * @size: the size of the array
11297 *
11298 * Create a parser context for an XML in-memory document.
11299 *
11300 * Returns the new parser context or NULL
11301 */
11302xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011303xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011304 xmlParserCtxtPtr ctxt;
11305 xmlParserInputPtr input;
11306 xmlParserInputBufferPtr buf;
11307
11308 if (buffer == NULL)
11309 return(NULL);
11310 if (size <= 0)
11311 return(NULL);
11312
11313 ctxt = xmlNewParserCtxt();
11314 if (ctxt == NULL)
11315 return(NULL);
11316
Daniel Veillard53350552003-09-18 13:35:51 +000011317 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011318 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011319 if (buf == NULL) {
11320 xmlFreeParserCtxt(ctxt);
11321 return(NULL);
11322 }
Owen Taylor3473f882001-02-23 17:55:21 +000011323
11324 input = xmlNewInputStream(ctxt);
11325 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011326 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011327 xmlFreeParserCtxt(ctxt);
11328 return(NULL);
11329 }
11330
11331 input->filename = NULL;
11332 input->buf = buf;
11333 input->base = input->buf->buffer->content;
11334 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011335 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011336
11337 inputPush(ctxt, input);
11338 return(ctxt);
11339}
11340
Daniel Veillard81273902003-09-30 00:43:48 +000011341#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011342/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011343 * xmlSAXParseMemoryWithData:
11344 * @sax: the SAX handler block
11345 * @buffer: an pointer to a char array
11346 * @size: the size of the array
11347 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11348 * documents
11349 * @data: the userdata
11350 *
11351 * parse an XML in-memory block and use the given SAX function block
11352 * to handle the parsing callback. If sax is NULL, fallback to the default
11353 * DOM tree building routines.
11354 *
11355 * User data (void *) is stored within the parser context in the
11356 * context's _private member, so it is available nearly everywhere in libxml
11357 *
11358 * Returns the resulting document tree
11359 */
11360
11361xmlDocPtr
11362xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11363 int size, int recovery, void *data) {
11364 xmlDocPtr ret;
11365 xmlParserCtxtPtr ctxt;
11366
11367 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11368 if (ctxt == NULL) return(NULL);
11369 if (sax != NULL) {
11370 if (ctxt->sax != NULL)
11371 xmlFree(ctxt->sax);
11372 ctxt->sax = sax;
11373 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011374 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011375 if (data!=NULL) {
11376 ctxt->_private=data;
11377 }
11378
Daniel Veillardadba5f12003-04-04 16:09:01 +000011379 ctxt->recovery = recovery;
11380
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011381 xmlParseDocument(ctxt);
11382
11383 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11384 else {
11385 ret = NULL;
11386 xmlFreeDoc(ctxt->myDoc);
11387 ctxt->myDoc = NULL;
11388 }
11389 if (sax != NULL)
11390 ctxt->sax = NULL;
11391 xmlFreeParserCtxt(ctxt);
11392
11393 return(ret);
11394}
11395
11396/**
Owen Taylor3473f882001-02-23 17:55:21 +000011397 * xmlSAXParseMemory:
11398 * @sax: the SAX handler block
11399 * @buffer: an pointer to a char array
11400 * @size: the size of the array
11401 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11402 * documents
11403 *
11404 * parse an XML in-memory block and use the given SAX function block
11405 * to handle the parsing callback. If sax is NULL, fallback to the default
11406 * DOM tree building routines.
11407 *
11408 * Returns the resulting document tree
11409 */
11410xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011411xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11412 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011413 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011414}
11415
11416/**
11417 * xmlParseMemory:
11418 * @buffer: an pointer to a char array
11419 * @size: the size of the array
11420 *
11421 * parse an XML in-memory block and build a tree.
11422 *
11423 * Returns the resulting document tree
11424 */
11425
Daniel Veillard50822cb2001-07-26 20:05:51 +000011426xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011427 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11428}
11429
11430/**
11431 * xmlRecoverMemory:
11432 * @buffer: an pointer to a char array
11433 * @size: the size of the array
11434 *
11435 * parse an XML in-memory block and build a tree.
11436 * In the case the document is not Well Formed, a tree is built anyway
11437 *
11438 * Returns the resulting document tree
11439 */
11440
Daniel Veillard50822cb2001-07-26 20:05:51 +000011441xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011442 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11443}
11444
11445/**
11446 * xmlSAXUserParseMemory:
11447 * @sax: a SAX handler
11448 * @user_data: The user data returned on SAX callbacks
11449 * @buffer: an in-memory XML document input
11450 * @size: the length of the XML document in bytes
11451 *
11452 * A better SAX parsing routine.
11453 * parse an XML in-memory buffer and call the given SAX handler routines.
11454 *
11455 * Returns 0 in case of success or a error number otherwise
11456 */
11457int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011458 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011459 int ret = 0;
11460 xmlParserCtxtPtr ctxt;
11461 xmlSAXHandlerPtr oldsax = NULL;
11462
Daniel Veillard9e923512002-08-14 08:48:52 +000011463 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011464 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11465 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011466 oldsax = ctxt->sax;
11467 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011468 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011469 if (user_data != NULL)
11470 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011471
11472 xmlParseDocument(ctxt);
11473
11474 if (ctxt->wellFormed)
11475 ret = 0;
11476 else {
11477 if (ctxt->errNo != 0)
11478 ret = ctxt->errNo;
11479 else
11480 ret = -1;
11481 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011482 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011483 xmlFreeParserCtxt(ctxt);
11484
11485 return ret;
11486}
Daniel Veillard81273902003-09-30 00:43:48 +000011487#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011488
11489/**
11490 * xmlCreateDocParserCtxt:
11491 * @cur: a pointer to an array of xmlChar
11492 *
11493 * Creates a parser context for an XML in-memory document.
11494 *
11495 * Returns the new parser context or NULL
11496 */
11497xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011498xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011499 int len;
11500
11501 if (cur == NULL)
11502 return(NULL);
11503 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011504 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011505}
11506
Daniel Veillard81273902003-09-30 00:43:48 +000011507#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011508/**
11509 * xmlSAXParseDoc:
11510 * @sax: the SAX handler block
11511 * @cur: a pointer to an array of xmlChar
11512 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11513 * documents
11514 *
11515 * parse an XML in-memory document and build a tree.
11516 * It use the given SAX function block to handle the parsing callback.
11517 * If sax is NULL, fallback to the default DOM tree building routines.
11518 *
11519 * Returns the resulting document tree
11520 */
11521
11522xmlDocPtr
11523xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11524 xmlDocPtr ret;
11525 xmlParserCtxtPtr ctxt;
11526
11527 if (cur == NULL) return(NULL);
11528
11529
11530 ctxt = xmlCreateDocParserCtxt(cur);
11531 if (ctxt == NULL) return(NULL);
11532 if (sax != NULL) {
11533 ctxt->sax = sax;
11534 ctxt->userData = NULL;
11535 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011537
11538 xmlParseDocument(ctxt);
11539 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11540 else {
11541 ret = NULL;
11542 xmlFreeDoc(ctxt->myDoc);
11543 ctxt->myDoc = NULL;
11544 }
11545 if (sax != NULL)
11546 ctxt->sax = NULL;
11547 xmlFreeParserCtxt(ctxt);
11548
11549 return(ret);
11550}
11551
11552/**
11553 * xmlParseDoc:
11554 * @cur: a pointer to an array of xmlChar
11555 *
11556 * parse an XML in-memory document and build a tree.
11557 *
11558 * Returns the resulting document tree
11559 */
11560
11561xmlDocPtr
11562xmlParseDoc(xmlChar *cur) {
11563 return(xmlSAXParseDoc(NULL, cur, 0));
11564}
Daniel Veillard81273902003-09-30 00:43:48 +000011565#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011566
Daniel Veillard81273902003-09-30 00:43:48 +000011567#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011568/************************************************************************
11569 * *
11570 * Specific function to keep track of entities references *
11571 * and used by the XSLT debugger *
11572 * *
11573 ************************************************************************/
11574
11575static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11576
11577/**
11578 * xmlAddEntityReference:
11579 * @ent : A valid entity
11580 * @firstNode : A valid first node for children of entity
11581 * @lastNode : A valid last node of children entity
11582 *
11583 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11584 */
11585static void
11586xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11587 xmlNodePtr lastNode)
11588{
11589 if (xmlEntityRefFunc != NULL) {
11590 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11591 }
11592}
11593
11594
11595/**
11596 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011597 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011598 *
11599 * Set the function to call call back when a xml reference has been made
11600 */
11601void
11602xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11603{
11604 xmlEntityRefFunc = func;
11605}
Daniel Veillard81273902003-09-30 00:43:48 +000011606#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011607
11608/************************************************************************
11609 * *
11610 * Miscellaneous *
11611 * *
11612 ************************************************************************/
11613
11614#ifdef LIBXML_XPATH_ENABLED
11615#include <libxml/xpath.h>
11616#endif
11617
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011618extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011619static int xmlParserInitialized = 0;
11620
11621/**
11622 * xmlInitParser:
11623 *
11624 * Initialization function for the XML parser.
11625 * This is not reentrant. Call once before processing in case of
11626 * use in multithreaded programs.
11627 */
11628
11629void
11630xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011631 if (xmlParserInitialized != 0)
11632 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011633
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011634 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11635 (xmlGenericError == NULL))
11636 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011637 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011638 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011639 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011640 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011641 xmlDefaultSAXHandlerInit();
11642 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011643#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011644 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011645#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011646#ifdef LIBXML_HTML_ENABLED
11647 htmlInitAutoClose();
11648 htmlDefaultSAXHandlerInit();
11649#endif
11650#ifdef LIBXML_XPATH_ENABLED
11651 xmlXPathInit();
11652#endif
11653 xmlParserInitialized = 1;
11654}
11655
11656/**
11657 * xmlCleanupParser:
11658 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011659 * Cleanup function for the XML library. It tries to reclaim all
11660 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011661 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011662 * function should not prevent reusing the library but one should
11663 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011664 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011665 */
11666
11667void
11668xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011669 if (!xmlParserInitialized)
11670 return;
11671
Owen Taylor3473f882001-02-23 17:55:21 +000011672 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011673#ifdef LIBXML_CATALOG_ENABLED
11674 xmlCatalogCleanup();
11675#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011676 xmlCleanupInputCallbacks();
11677#ifdef LIBXML_OUTPUT_ENABLED
11678 xmlCleanupOutputCallbacks();
11679#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011680 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011681 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011682 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011683 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011684 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011685}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011686
11687/************************************************************************
11688 * *
11689 * New set (2.6.0) of simpler and more flexible APIs *
11690 * *
11691 ************************************************************************/
11692
11693/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011694 * DICT_FREE:
11695 * @str: a string
11696 *
11697 * Free a string if it is not owned by the "dict" dictionnary in the
11698 * current scope
11699 */
11700#define DICT_FREE(str) \
11701 if ((str) && ((!dict) || \
11702 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11703 xmlFree((char *)(str));
11704
11705/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011706 * xmlCtxtReset:
11707 * @ctxt: an XML parser context
11708 *
11709 * Reset a parser context
11710 */
11711void
11712xmlCtxtReset(xmlParserCtxtPtr ctxt)
11713{
11714 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011715 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011716
11717 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11718 xmlFreeInputStream(input);
11719 }
11720 ctxt->inputNr = 0;
11721 ctxt->input = NULL;
11722
11723 ctxt->spaceNr = 0;
11724 ctxt->spaceTab[0] = -1;
11725 ctxt->space = &ctxt->spaceTab[0];
11726
11727
11728 ctxt->nodeNr = 0;
11729 ctxt->node = NULL;
11730
11731 ctxt->nameNr = 0;
11732 ctxt->name = NULL;
11733
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011734 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011735 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011736 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011737 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011738 DICT_FREE(ctxt->directory);
11739 ctxt->directory = NULL;
11740 DICT_FREE(ctxt->extSubURI);
11741 ctxt->extSubURI = NULL;
11742 DICT_FREE(ctxt->extSubSystem);
11743 ctxt->extSubSystem = NULL;
11744 if (ctxt->myDoc != NULL)
11745 xmlFreeDoc(ctxt->myDoc);
11746 ctxt->myDoc = NULL;
11747
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011748 ctxt->standalone = -1;
11749 ctxt->hasExternalSubset = 0;
11750 ctxt->hasPErefs = 0;
11751 ctxt->html = 0;
11752 ctxt->external = 0;
11753 ctxt->instate = XML_PARSER_START;
11754 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011755
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011756 ctxt->wellFormed = 1;
11757 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011758 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011759 ctxt->valid = 1;
11760 ctxt->vctxt.userData = ctxt;
11761 ctxt->vctxt.error = xmlParserValidityError;
11762 ctxt->vctxt.warning = xmlParserValidityWarning;
11763 ctxt->record_info = 0;
11764 ctxt->nbChars = 0;
11765 ctxt->checkIndex = 0;
11766 ctxt->inSubset = 0;
11767 ctxt->errNo = XML_ERR_OK;
11768 ctxt->depth = 0;
11769 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11770 ctxt->catalogs = NULL;
11771 xmlInitNodeInfoSeq(&ctxt->node_seq);
11772
11773 if (ctxt->attsDefault != NULL) {
11774 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11775 ctxt->attsDefault = NULL;
11776 }
11777 if (ctxt->attsSpecial != NULL) {
11778 xmlHashFree(ctxt->attsSpecial, NULL);
11779 ctxt->attsSpecial = NULL;
11780 }
11781
Daniel Veillard4432df22003-09-28 18:58:27 +000011782#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011783 if (ctxt->catalogs != NULL)
11784 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011785#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011786 if (ctxt->lastError.code != XML_ERR_OK)
11787 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011788}
11789
11790/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011791 * xmlCtxtResetPush:
11792 * @ctxt: an XML parser context
11793 * @chunk: a pointer to an array of chars
11794 * @size: number of chars in the array
11795 * @filename: an optional file name or URI
11796 * @encoding: the document encoding, or NULL
11797 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011798 * Reset a push parser context
11799 *
11800 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011801 */
11802int
11803xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11804 int size, const char *filename, const char *encoding)
11805{
11806 xmlParserInputPtr inputStream;
11807 xmlParserInputBufferPtr buf;
11808 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11809
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011810 if (ctxt == NULL)
11811 return(1);
11812
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011813 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11814 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11815
11816 buf = xmlAllocParserInputBuffer(enc);
11817 if (buf == NULL)
11818 return(1);
11819
11820 if (ctxt == NULL) {
11821 xmlFreeParserInputBuffer(buf);
11822 return(1);
11823 }
11824
11825 xmlCtxtReset(ctxt);
11826
11827 if (ctxt->pushTab == NULL) {
11828 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11829 sizeof(xmlChar *));
11830 if (ctxt->pushTab == NULL) {
11831 xmlErrMemory(ctxt, NULL);
11832 xmlFreeParserInputBuffer(buf);
11833 return(1);
11834 }
11835 }
11836
11837 if (filename == NULL) {
11838 ctxt->directory = NULL;
11839 } else {
11840 ctxt->directory = xmlParserGetDirectory(filename);
11841 }
11842
11843 inputStream = xmlNewInputStream(ctxt);
11844 if (inputStream == NULL) {
11845 xmlFreeParserInputBuffer(buf);
11846 return(1);
11847 }
11848
11849 if (filename == NULL)
11850 inputStream->filename = NULL;
11851 else
11852 inputStream->filename = (char *)
11853 xmlCanonicPath((const xmlChar *) filename);
11854 inputStream->buf = buf;
11855 inputStream->base = inputStream->buf->buffer->content;
11856 inputStream->cur = inputStream->buf->buffer->content;
11857 inputStream->end =
11858 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11859
11860 inputPush(ctxt, inputStream);
11861
11862 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11863 (ctxt->input->buf != NULL)) {
11864 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11865 int cur = ctxt->input->cur - ctxt->input->base;
11866
11867 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11868
11869 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11870 ctxt->input->cur = ctxt->input->base + cur;
11871 ctxt->input->end =
11872 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11873 use];
11874#ifdef DEBUG_PUSH
11875 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11876#endif
11877 }
11878
11879 if (encoding != NULL) {
11880 xmlCharEncodingHandlerPtr hdlr;
11881
11882 hdlr = xmlFindCharEncodingHandler(encoding);
11883 if (hdlr != NULL) {
11884 xmlSwitchToEncoding(ctxt, hdlr);
11885 } else {
11886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11887 "Unsupported encoding %s\n", BAD_CAST encoding);
11888 }
11889 } else if (enc != XML_CHAR_ENCODING_NONE) {
11890 xmlSwitchEncoding(ctxt, enc);
11891 }
11892
11893 return(0);
11894}
11895
11896/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011897 * xmlCtxtUseOptions:
11898 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011899 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011900 *
11901 * Applies the options to the parser context
11902 *
11903 * Returns 0 in case of success, the set of unknown or unimplemented options
11904 * in case of error.
11905 */
11906int
11907xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11908{
11909 if (options & XML_PARSE_RECOVER) {
11910 ctxt->recovery = 1;
11911 options -= XML_PARSE_RECOVER;
11912 } else
11913 ctxt->recovery = 0;
11914 if (options & XML_PARSE_DTDLOAD) {
11915 ctxt->loadsubset = XML_DETECT_IDS;
11916 options -= XML_PARSE_DTDLOAD;
11917 } else
11918 ctxt->loadsubset = 0;
11919 if (options & XML_PARSE_DTDATTR) {
11920 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11921 options -= XML_PARSE_DTDATTR;
11922 }
11923 if (options & XML_PARSE_NOENT) {
11924 ctxt->replaceEntities = 1;
11925 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11926 options -= XML_PARSE_NOENT;
11927 } else
11928 ctxt->replaceEntities = 0;
11929 if (options & XML_PARSE_NOWARNING) {
11930 ctxt->sax->warning = NULL;
11931 options -= XML_PARSE_NOWARNING;
11932 }
11933 if (options & XML_PARSE_NOERROR) {
11934 ctxt->sax->error = NULL;
11935 ctxt->sax->fatalError = NULL;
11936 options -= XML_PARSE_NOERROR;
11937 }
11938 if (options & XML_PARSE_PEDANTIC) {
11939 ctxt->pedantic = 1;
11940 options -= XML_PARSE_PEDANTIC;
11941 } else
11942 ctxt->pedantic = 0;
11943 if (options & XML_PARSE_NOBLANKS) {
11944 ctxt->keepBlanks = 0;
11945 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11946 options -= XML_PARSE_NOBLANKS;
11947 } else
11948 ctxt->keepBlanks = 1;
11949 if (options & XML_PARSE_DTDVALID) {
11950 ctxt->validate = 1;
11951 if (options & XML_PARSE_NOWARNING)
11952 ctxt->vctxt.warning = NULL;
11953 if (options & XML_PARSE_NOERROR)
11954 ctxt->vctxt.error = NULL;
11955 options -= XML_PARSE_DTDVALID;
11956 } else
11957 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011958#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011959 if (options & XML_PARSE_SAX1) {
11960 ctxt->sax->startElement = xmlSAX2StartElement;
11961 ctxt->sax->endElement = xmlSAX2EndElement;
11962 ctxt->sax->startElementNs = NULL;
11963 ctxt->sax->endElementNs = NULL;
11964 ctxt->sax->initialized = 1;
11965 options -= XML_PARSE_SAX1;
11966 }
Daniel Veillard81273902003-09-30 00:43:48 +000011967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011968 if (options & XML_PARSE_NODICT) {
11969 ctxt->dictNames = 0;
11970 options -= XML_PARSE_NODICT;
11971 } else {
11972 ctxt->dictNames = 1;
11973 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000011974 if (options & XML_PARSE_NOCDATA) {
11975 ctxt->sax->cdataBlock = NULL;
11976 options -= XML_PARSE_NOCDATA;
11977 }
11978 if (options & XML_PARSE_NSCLEAN) {
11979 ctxt->options |= XML_PARSE_NSCLEAN;
11980 options -= XML_PARSE_NSCLEAN;
11981 }
Daniel Veillard61b93382003-11-03 14:28:31 +000011982 if (options & XML_PARSE_NONET) {
11983 ctxt->options |= XML_PARSE_NONET;
11984 options -= XML_PARSE_NONET;
11985 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000011986 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011987 return (options);
11988}
11989
11990/**
11991 * xmlDoRead:
11992 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000011993 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011994 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011995 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011996 * @reuse: keep the context for reuse
11997 *
11998 * Common front-end for the xmlRead functions
11999 *
12000 * Returns the resulting document tree or NULL
12001 */
12002static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012003xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12004 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012005{
12006 xmlDocPtr ret;
12007
12008 xmlCtxtUseOptions(ctxt, options);
12009 if (encoding != NULL) {
12010 xmlCharEncodingHandlerPtr hdlr;
12011
12012 hdlr = xmlFindCharEncodingHandler(encoding);
12013 if (hdlr != NULL)
12014 xmlSwitchToEncoding(ctxt, hdlr);
12015 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012016 if ((URL != NULL) && (ctxt->input != NULL) &&
12017 (ctxt->input->filename == NULL))
12018 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012019 xmlParseDocument(ctxt);
12020 if ((ctxt->wellFormed) || ctxt->recovery)
12021 ret = ctxt->myDoc;
12022 else {
12023 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012024 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012025 if ((ctxt->dictNames) &&
12026 (ctxt->myDoc->dict == ctxt->dict))
12027 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012028 xmlFreeDoc(ctxt->myDoc);
12029 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012030 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012031 ctxt->myDoc = NULL;
12032 if (!reuse) {
12033 if ((ctxt->dictNames) &&
12034 (ret != NULL) &&
12035 (ret->dict == ctxt->dict))
12036 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012037 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012038 } else {
12039 /* Must duplicate the reference to the dictionary */
12040 if ((ctxt->dictNames) &&
12041 (ret != NULL) &&
12042 (ret->dict == ctxt->dict))
12043 xmlDictReference(ctxt->dict);
12044 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012045
12046 return (ret);
12047}
12048
12049/**
12050 * xmlReadDoc:
12051 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012052 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012053 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012054 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012055 *
12056 * parse an XML in-memory document and build a tree.
12057 *
12058 * Returns the resulting document tree
12059 */
12060xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012061xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012062{
12063 xmlParserCtxtPtr ctxt;
12064
12065 if (cur == NULL)
12066 return (NULL);
12067
12068 ctxt = xmlCreateDocParserCtxt(cur);
12069 if (ctxt == NULL)
12070 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012071 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012072}
12073
12074/**
12075 * xmlReadFile:
12076 * @filename: a file or URL
12077 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012078 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012079 *
12080 * parse an XML file from the filesystem or the network.
12081 *
12082 * Returns the resulting document tree
12083 */
12084xmlDocPtr
12085xmlReadFile(const char *filename, const char *encoding, int options)
12086{
12087 xmlParserCtxtPtr ctxt;
12088
Daniel Veillard61b93382003-11-03 14:28:31 +000012089 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012090 if (ctxt == NULL)
12091 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012092 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012093}
12094
12095/**
12096 * xmlReadMemory:
12097 * @buffer: a pointer to a char array
12098 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012099 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012100 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012101 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012102 *
12103 * parse an XML in-memory document and build a tree.
12104 *
12105 * Returns the resulting document tree
12106 */
12107xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012108xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012109{
12110 xmlParserCtxtPtr ctxt;
12111
12112 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12113 if (ctxt == NULL)
12114 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012115 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012116}
12117
12118/**
12119 * xmlReadFd:
12120 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012121 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012122 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012123 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012124 *
12125 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012126 * NOTE that the file descriptor will not be closed when the
12127 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012128 *
12129 * Returns the resulting document tree
12130 */
12131xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012132xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012133{
12134 xmlParserCtxtPtr ctxt;
12135 xmlParserInputBufferPtr input;
12136 xmlParserInputPtr stream;
12137
12138 if (fd < 0)
12139 return (NULL);
12140
12141 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12142 if (input == NULL)
12143 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012144 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012145 ctxt = xmlNewParserCtxt();
12146 if (ctxt == NULL) {
12147 xmlFreeParserInputBuffer(input);
12148 return (NULL);
12149 }
12150 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12151 if (stream == NULL) {
12152 xmlFreeParserInputBuffer(input);
12153 xmlFreeParserCtxt(ctxt);
12154 return (NULL);
12155 }
12156 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012157 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012158}
12159
12160/**
12161 * xmlReadIO:
12162 * @ioread: an I/O read function
12163 * @ioclose: an I/O close function
12164 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012165 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012166 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012167 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012168 *
12169 * parse an XML document from I/O functions and source and build a tree.
12170 *
12171 * Returns the resulting document tree
12172 */
12173xmlDocPtr
12174xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012175 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012176{
12177 xmlParserCtxtPtr ctxt;
12178 xmlParserInputBufferPtr input;
12179 xmlParserInputPtr stream;
12180
12181 if (ioread == NULL)
12182 return (NULL);
12183
12184 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12185 XML_CHAR_ENCODING_NONE);
12186 if (input == NULL)
12187 return (NULL);
12188 ctxt = xmlNewParserCtxt();
12189 if (ctxt == NULL) {
12190 xmlFreeParserInputBuffer(input);
12191 return (NULL);
12192 }
12193 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12194 if (stream == NULL) {
12195 xmlFreeParserInputBuffer(input);
12196 xmlFreeParserCtxt(ctxt);
12197 return (NULL);
12198 }
12199 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012200 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012201}
12202
12203/**
12204 * xmlCtxtReadDoc:
12205 * @ctxt: an XML parser context
12206 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012207 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012208 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012209 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012210 *
12211 * parse an XML in-memory document and build a tree.
12212 * This reuses the existing @ctxt parser context
12213 *
12214 * Returns the resulting document tree
12215 */
12216xmlDocPtr
12217xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012218 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012219{
12220 xmlParserInputPtr stream;
12221
12222 if (cur == NULL)
12223 return (NULL);
12224 if (ctxt == NULL)
12225 return (NULL);
12226
12227 xmlCtxtReset(ctxt);
12228
12229 stream = xmlNewStringInputStream(ctxt, cur);
12230 if (stream == NULL) {
12231 return (NULL);
12232 }
12233 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012234 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012235}
12236
12237/**
12238 * xmlCtxtReadFile:
12239 * @ctxt: an XML parser context
12240 * @filename: a file or URL
12241 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012242 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243 *
12244 * parse an XML file from the filesystem or the network.
12245 * This reuses the existing @ctxt parser context
12246 *
12247 * Returns the resulting document tree
12248 */
12249xmlDocPtr
12250xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12251 const char *encoding, int options)
12252{
12253 xmlParserInputPtr stream;
12254
12255 if (filename == NULL)
12256 return (NULL);
12257 if (ctxt == NULL)
12258 return (NULL);
12259
12260 xmlCtxtReset(ctxt);
12261
12262 stream = xmlNewInputFromFile(ctxt, filename);
12263 if (stream == NULL) {
12264 return (NULL);
12265 }
12266 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012267 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012268}
12269
12270/**
12271 * xmlCtxtReadMemory:
12272 * @ctxt: an XML parser context
12273 * @buffer: a pointer to a char array
12274 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012275 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012276 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012277 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012278 *
12279 * parse an XML in-memory document and build a tree.
12280 * This reuses the existing @ctxt parser context
12281 *
12282 * Returns the resulting document tree
12283 */
12284xmlDocPtr
12285xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012286 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012287{
12288 xmlParserInputBufferPtr input;
12289 xmlParserInputPtr stream;
12290
12291 if (ctxt == NULL)
12292 return (NULL);
12293 if (buffer == NULL)
12294 return (NULL);
12295
12296 xmlCtxtReset(ctxt);
12297
12298 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12299 if (input == NULL) {
12300 return(NULL);
12301 }
12302
12303 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12304 if (stream == NULL) {
12305 xmlFreeParserInputBuffer(input);
12306 return(NULL);
12307 }
12308
12309 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012310 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012311}
12312
12313/**
12314 * xmlCtxtReadFd:
12315 * @ctxt: an XML parser context
12316 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012317 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012318 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012319 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012320 *
12321 * parse an XML from a file descriptor and build a tree.
12322 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012323 * NOTE that the file descriptor will not be closed when the
12324 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325 *
12326 * Returns the resulting document tree
12327 */
12328xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012329xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12330 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012331{
12332 xmlParserInputBufferPtr input;
12333 xmlParserInputPtr stream;
12334
12335 if (fd < 0)
12336 return (NULL);
12337 if (ctxt == NULL)
12338 return (NULL);
12339
12340 xmlCtxtReset(ctxt);
12341
12342
12343 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12344 if (input == NULL)
12345 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012346 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012347 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12348 if (stream == NULL) {
12349 xmlFreeParserInputBuffer(input);
12350 return (NULL);
12351 }
12352 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012353 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012354}
12355
12356/**
12357 * xmlCtxtReadIO:
12358 * @ctxt: an XML parser context
12359 * @ioread: an I/O read function
12360 * @ioclose: an I/O close function
12361 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012362 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012363 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012364 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012365 *
12366 * parse an XML document from I/O functions and source and build a tree.
12367 * This reuses the existing @ctxt parser context
12368 *
12369 * Returns the resulting document tree
12370 */
12371xmlDocPtr
12372xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12373 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012374 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012375 const char *encoding, int options)
12376{
12377 xmlParserInputBufferPtr input;
12378 xmlParserInputPtr stream;
12379
12380 if (ioread == NULL)
12381 return (NULL);
12382 if (ctxt == NULL)
12383 return (NULL);
12384
12385 xmlCtxtReset(ctxt);
12386
12387 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12388 XML_CHAR_ENCODING_NONE);
12389 if (input == NULL)
12390 return (NULL);
12391 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12392 if (stream == NULL) {
12393 xmlFreeParserInputBuffer(input);
12394 return (NULL);
12395 }
12396 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012397 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012398}