blob: bbec0bee2f7ac73ae1d780bf92f20eaab55a917e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillard0b787f32004-03-26 17:29:53 +00001263#define SKIPL(val) do { \
1264 int skipl; \
1265 for(skipl=0; skipl<val; skipl++) { \
1266 if (*(ctxt->input->cur) == '\n') { \
1267 ctxt->input->line++; ctxt->input->col = 1; \
1268 } else ctxt->input->col++; \
1269 ctxt->nbChars++; \
1270 ctxt->input->cur++; \
1271 } \
1272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1273 if ((*ctxt->input->cur == 0) && \
1274 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1275 xmlPopInput(ctxt); \
1276 } while (0)
1277
Daniel Veillarda880b122003-04-21 21:36:41 +00001278#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001279 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1280 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001281 xmlSHRINK (ctxt);
1282
1283static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1284 xmlParserInputShrink(ctxt->input);
1285 if ((*ctxt->input->cur == 0) &&
1286 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1287 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001288 }
Owen Taylor3473f882001-02-23 17:55:21 +00001289
Daniel Veillarda880b122003-04-21 21:36:41 +00001290#define GROW if ((ctxt->progressive == 0) && \
1291 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001292 xmlGROW (ctxt);
1293
1294static void xmlGROW (xmlParserCtxtPtr ctxt) {
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((*ctxt->input->cur == 0) &&
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1298 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001299}
Owen Taylor3473f882001-02-23 17:55:21 +00001300
1301#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1302
1303#define NEXT xmlNextChar(ctxt)
1304
Daniel Veillard21a0f912001-02-25 19:54:14 +00001305#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001306 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001307 ctxt->input->cur++; \
1308 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001309 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1311 }
1312
Owen Taylor3473f882001-02-23 17:55:21 +00001313#define NEXTL(l) do { \
1314 if (*(ctxt->input->cur) == '\n') { \
1315 ctxt->input->line++; ctxt->input->col = 1; \
1316 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001317 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001318 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001319 } while (0)
1320
1321#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1322#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1323
1324#define COPY_BUF(l,b,i,v) \
1325 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001326 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001327
1328/**
1329 * xmlSkipBlankChars:
1330 * @ctxt: the XML parser context
1331 *
1332 * skip all blanks character found at that point in the input streams.
1333 * It pops up finished entities in the process if allowable at that point.
1334 *
1335 * Returns the number of space chars skipped
1336 */
1337
1338int
1339xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001340 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342 /*
1343 * It's Okay to use CUR/NEXT here since all the blanks are on
1344 * the ASCII range.
1345 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001346 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1347 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001348 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001349 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001351 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001352 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001353 if (*cur == '\n') {
1354 ctxt->input->line++; ctxt->input->col = 1;
1355 }
1356 cur++;
1357 res++;
1358 if (*cur == 0) {
1359 ctxt->input->cur = cur;
1360 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1361 cur = ctxt->input->cur;
1362 }
1363 }
1364 ctxt->input->cur = cur;
1365 } else {
1366 int cur;
1367 do {
1368 cur = CUR;
1369 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1370 NEXT;
1371 cur = CUR;
1372 res++;
1373 }
1374 while ((cur == 0) && (ctxt->inputNr > 1) &&
1375 (ctxt->instate != XML_PARSER_COMMENT)) {
1376 xmlPopInput(ctxt);
1377 cur = CUR;
1378 }
1379 /*
1380 * Need to handle support of entities branching here
1381 */
1382 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1383 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1384 }
Owen Taylor3473f882001-02-23 17:55:21 +00001385 return(res);
1386}
1387
1388/************************************************************************
1389 * *
1390 * Commodity functions to handle entities *
1391 * *
1392 ************************************************************************/
1393
1394/**
1395 * xmlPopInput:
1396 * @ctxt: an XML parser context
1397 *
1398 * xmlPopInput: the current input pointed by ctxt->input came to an end
1399 * pop it and return the next char.
1400 *
1401 * Returns the current xmlChar in the parser context
1402 */
1403xmlChar
1404xmlPopInput(xmlParserCtxtPtr ctxt) {
1405 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1406 if (xmlParserDebugEntities)
1407 xmlGenericError(xmlGenericErrorContext,
1408 "Popping input %d\n", ctxt->inputNr);
1409 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001410 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001411 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1412 return(xmlPopInput(ctxt));
1413 return(CUR);
1414}
1415
1416/**
1417 * xmlPushInput:
1418 * @ctxt: an XML parser context
1419 * @input: an XML parser input fragment (entity, XML fragment ...).
1420 *
1421 * xmlPushInput: switch to a new input stream which is stacked on top
1422 * of the previous one(s).
1423 */
1424void
1425xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1426 if (input == NULL) return;
1427
1428 if (xmlParserDebugEntities) {
1429 if ((ctxt->input != NULL) && (ctxt->input->filename))
1430 xmlGenericError(xmlGenericErrorContext,
1431 "%s(%d): ", ctxt->input->filename,
1432 ctxt->input->line);
1433 xmlGenericError(xmlGenericErrorContext,
1434 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1435 }
1436 inputPush(ctxt, input);
1437 GROW;
1438}
1439
1440/**
1441 * xmlParseCharRef:
1442 * @ctxt: an XML parser context
1443 *
1444 * parse Reference declarations
1445 *
1446 * [66] CharRef ::= '&#' [0-9]+ ';' |
1447 * '&#x' [0-9a-fA-F]+ ';'
1448 *
1449 * [ WFC: Legal Character ]
1450 * Characters referred to using character references must match the
1451 * production for Char.
1452 *
1453 * Returns the value parsed (as an int), 0 in case of error
1454 */
1455int
1456xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001457 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001458 int count = 0;
1459
Owen Taylor3473f882001-02-23 17:55:21 +00001460 /*
1461 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1462 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001463 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001464 (NXT(2) == 'x')) {
1465 SKIP(3);
1466 GROW;
1467 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001468 if (count++ > 20) {
1469 count = 0;
1470 GROW;
1471 }
1472 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001473 val = val * 16 + (CUR - '0');
1474 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1475 val = val * 16 + (CUR - 'a') + 10;
1476 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1477 val = val * 16 + (CUR - 'A') + 10;
1478 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001479 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001480 val = 0;
1481 break;
1482 }
1483 NEXT;
1484 count++;
1485 }
1486 if (RAW == ';') {
1487 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001488 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001489 ctxt->nbChars ++;
1490 ctxt->input->cur++;
1491 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001492 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001493 SKIP(2);
1494 GROW;
1495 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001496 if (count++ > 20) {
1497 count = 0;
1498 GROW;
1499 }
1500 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001501 val = val * 10 + (CUR - '0');
1502 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 val = 0;
1505 break;
1506 }
1507 NEXT;
1508 count++;
1509 }
1510 if (RAW == ';') {
1511 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001512 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001513 ctxt->nbChars ++;
1514 ctxt->input->cur++;
1515 }
1516 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001517 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 }
1519
1520 /*
1521 * [ WFC: Legal Character ]
1522 * Characters referred to using character references must match the
1523 * production for Char.
1524 */
William M. Brack871611b2003-10-18 04:53:14 +00001525 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001526 return(val);
1527 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001528 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1529 "xmlParseCharRef: invalid xmlChar value %d\n",
1530 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 }
1532 return(0);
1533}
1534
1535/**
1536 * xmlParseStringCharRef:
1537 * @ctxt: an XML parser context
1538 * @str: a pointer to an index in the string
1539 *
1540 * parse Reference declarations, variant parsing from a string rather
1541 * than an an input flow.
1542 *
1543 * [66] CharRef ::= '&#' [0-9]+ ';' |
1544 * '&#x' [0-9a-fA-F]+ ';'
1545 *
1546 * [ WFC: Legal Character ]
1547 * Characters referred to using character references must match the
1548 * production for Char.
1549 *
1550 * Returns the value parsed (as an int), 0 in case of error, str will be
1551 * updated to the current value of the index
1552 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001553static int
Owen Taylor3473f882001-02-23 17:55:21 +00001554xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1555 const xmlChar *ptr;
1556 xmlChar cur;
1557 int val = 0;
1558
1559 if ((str == NULL) || (*str == NULL)) return(0);
1560 ptr = *str;
1561 cur = *ptr;
1562 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1563 ptr += 3;
1564 cur = *ptr;
1565 while (cur != ';') { /* Non input consuming loop */
1566 if ((cur >= '0') && (cur <= '9'))
1567 val = val * 16 + (cur - '0');
1568 else if ((cur >= 'a') && (cur <= 'f'))
1569 val = val * 16 + (cur - 'a') + 10;
1570 else if ((cur >= 'A') && (cur <= 'F'))
1571 val = val * 16 + (cur - 'A') + 10;
1572 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001573 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001574 val = 0;
1575 break;
1576 }
1577 ptr++;
1578 cur = *ptr;
1579 }
1580 if (cur == ';')
1581 ptr++;
1582 } else if ((cur == '&') && (ptr[1] == '#')){
1583 ptr += 2;
1584 cur = *ptr;
1585 while (cur != ';') { /* Non input consuming loops */
1586 if ((cur >= '0') && (cur <= '9'))
1587 val = val * 10 + (cur - '0');
1588 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001589 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001590 val = 0;
1591 break;
1592 }
1593 ptr++;
1594 cur = *ptr;
1595 }
1596 if (cur == ';')
1597 ptr++;
1598 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001599 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 return(0);
1601 }
1602 *str = ptr;
1603
1604 /*
1605 * [ WFC: Legal Character ]
1606 * Characters referred to using character references must match the
1607 * production for Char.
1608 */
William M. Brack871611b2003-10-18 04:53:14 +00001609 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001610 return(val);
1611 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001612 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1613 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1614 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001615 }
1616 return(0);
1617}
1618
1619/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001620 * xmlNewBlanksWrapperInputStream:
1621 * @ctxt: an XML parser context
1622 * @entity: an Entity pointer
1623 *
1624 * Create a new input stream for wrapping
1625 * blanks around a PEReference
1626 *
1627 * Returns the new input stream or NULL
1628 */
1629
1630static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1631
Daniel Veillardf4862f02002-09-10 11:13:43 +00001632static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001633xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1634 xmlParserInputPtr input;
1635 xmlChar *buffer;
1636 size_t length;
1637 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001638 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1639 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001640 return(NULL);
1641 }
1642 if (xmlParserDebugEntities)
1643 xmlGenericError(xmlGenericErrorContext,
1644 "new blanks wrapper for entity: %s\n", entity->name);
1645 input = xmlNewInputStream(ctxt);
1646 if (input == NULL) {
1647 return(NULL);
1648 }
1649 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001650 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001651 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001652 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001653 return(NULL);
1654 }
1655 buffer [0] = ' ';
1656 buffer [1] = '%';
1657 buffer [length-3] = ';';
1658 buffer [length-2] = ' ';
1659 buffer [length-1] = 0;
1660 memcpy(buffer + 2, entity->name, length - 5);
1661 input->free = deallocblankswrapper;
1662 input->base = buffer;
1663 input->cur = buffer;
1664 input->length = length;
1665 input->end = &buffer[length];
1666 return(input);
1667}
1668
1669/**
Owen Taylor3473f882001-02-23 17:55:21 +00001670 * xmlParserHandlePEReference:
1671 * @ctxt: the parser context
1672 *
1673 * [69] PEReference ::= '%' Name ';'
1674 *
1675 * [ WFC: No Recursion ]
1676 * A parsed entity must not contain a recursive
1677 * reference to itself, either directly or indirectly.
1678 *
1679 * [ WFC: Entity Declared ]
1680 * In a document without any DTD, a document with only an internal DTD
1681 * subset which contains no parameter entity references, or a document
1682 * with "standalone='yes'", ... ... The declaration of a parameter
1683 * entity must precede any reference to it...
1684 *
1685 * [ VC: Entity Declared ]
1686 * In a document with an external subset or external parameter entities
1687 * with "standalone='no'", ... ... The declaration of a parameter entity
1688 * must precede any reference to it...
1689 *
1690 * [ WFC: In DTD ]
1691 * Parameter-entity references may only appear in the DTD.
1692 * NOTE: misleading but this is handled.
1693 *
1694 * A PEReference may have been detected in the current input stream
1695 * the handling is done accordingly to
1696 * http://www.w3.org/TR/REC-xml#entproc
1697 * i.e.
1698 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001699 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001700 */
1701void
1702xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001703 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 xmlEntityPtr entity = NULL;
1705 xmlParserInputPtr input;
1706
Owen Taylor3473f882001-02-23 17:55:21 +00001707 if (RAW != '%') return;
1708 switch(ctxt->instate) {
1709 case XML_PARSER_CDATA_SECTION:
1710 return;
1711 case XML_PARSER_COMMENT:
1712 return;
1713 case XML_PARSER_START_TAG:
1714 return;
1715 case XML_PARSER_END_TAG:
1716 return;
1717 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001718 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001719 return;
1720 case XML_PARSER_PROLOG:
1721 case XML_PARSER_START:
1722 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001723 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001724 return;
1725 case XML_PARSER_ENTITY_DECL:
1726 case XML_PARSER_CONTENT:
1727 case XML_PARSER_ATTRIBUTE_VALUE:
1728 case XML_PARSER_PI:
1729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001731 /* we just ignore it there */
1732 return;
1733 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001734 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001735 return;
1736 case XML_PARSER_ENTITY_VALUE:
1737 /*
1738 * NOTE: in the case of entity values, we don't do the
1739 * substitution here since we need the literal
1740 * entity value to be able to save the internal
1741 * subset of the document.
1742 * This will be handled by xmlStringDecodeEntities
1743 */
1744 return;
1745 case XML_PARSER_DTD:
1746 /*
1747 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1748 * In the internal DTD subset, parameter-entity references
1749 * can occur only where markup declarations can occur, not
1750 * within markup declarations.
1751 * In that case this is handled in xmlParseMarkupDecl
1752 */
1753 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1754 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001755 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001756 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 break;
1758 case XML_PARSER_IGNORE:
1759 return;
1760 }
1761
1762 NEXT;
1763 name = xmlParseName(ctxt);
1764 if (xmlParserDebugEntities)
1765 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001766 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001767 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001768 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001769 } else {
1770 if (RAW == ';') {
1771 NEXT;
1772 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1773 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1774 if (entity == NULL) {
1775
1776 /*
1777 * [ WFC: Entity Declared ]
1778 * In a document without any DTD, a document with only an
1779 * internal DTD subset which contains no parameter entity
1780 * references, or a document with "standalone='yes'", ...
1781 * ... The declaration of a parameter entity must precede
1782 * any reference to it...
1783 */
1784 if ((ctxt->standalone == 1) ||
1785 ((ctxt->hasExternalSubset == 0) &&
1786 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001787 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001788 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001789 } else {
1790 /*
1791 * [ VC: Entity Declared ]
1792 * In a document with an external subset or external
1793 * parameter entities with "standalone='no'", ...
1794 * ... The declaration of a parameter entity must precede
1795 * any reference to it...
1796 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001797 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1798 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1799 "PEReference: %%%s; not found\n",
1800 name);
1801 } else
1802 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1803 "PEReference: %%%s; not found\n",
1804 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 ctxt->valid = 0;
1806 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001807 } else if (ctxt->input->free != deallocblankswrapper) {
1808 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1809 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 } else {
1811 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1812 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001813 xmlChar start[4];
1814 xmlCharEncoding enc;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 /*
1817 * handle the extra spaces added before and after
1818 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001819 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001820 */
1821 input = xmlNewEntityInputStream(ctxt, entity);
1822 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001823
1824 /*
1825 * Get the 4 first bytes and decode the charset
1826 * if enc != XML_CHAR_ENCODING_NONE
1827 * plug some encoding conversion routines.
1828 */
1829 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001830 if (entity->length >= 4) {
1831 start[0] = RAW;
1832 start[1] = NXT(1);
1833 start[2] = NXT(2);
1834 start[3] = NXT(3);
1835 enc = xmlDetectCharEncoding(start, 4);
1836 if (enc != XML_CHAR_ENCODING_NONE) {
1837 xmlSwitchEncoding(ctxt, enc);
1838 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001839 }
1840
Owen Taylor3473f882001-02-23 17:55:21 +00001841 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001842 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1843 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001844 xmlParseTextDecl(ctxt);
1845 }
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001847 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1848 "PEReference: %s is not a parameter entity\n",
1849 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001850 }
1851 }
1852 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001853 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001854 }
Owen Taylor3473f882001-02-23 17:55:21 +00001855 }
1856}
1857
1858/*
1859 * Macro used to grow the current buffer.
1860 */
1861#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001862 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001863 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001864 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001865 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001866 if (tmp == NULL) goto mem_error; \
1867 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001868}
1869
1870/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001871 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001872 * @ctxt: the parser context
1873 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1876 * @end: an end marker xmlChar, 0 if none
1877 * @end2: an end marker xmlChar, 0 if none
1878 * @end3: an end marker xmlChar, 0 if none
1879 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001880 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001881 *
1882 * [67] Reference ::= EntityRef | CharRef
1883 *
1884 * [69] PEReference ::= '%' Name ';'
1885 *
1886 * Returns A newly allocated string with the substitution done. The caller
1887 * must deallocate it !
1888 */
1889xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1891 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001892 xmlChar *buffer = NULL;
1893 int buffer_size = 0;
1894
1895 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001896 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001897 xmlEntityPtr ent;
1898 int c,l;
1899 int nbchars = 0;
1900
Daniel Veillarde57ec792003-09-10 10:50:59 +00001901 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001902 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001904
1905 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001906 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001907 return(NULL);
1908 }
1909
1910 /*
1911 * allocate a translation buffer.
1912 */
1913 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001914 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001915 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001916
1917 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001918 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001919 * we are operating on already parsed values.
1920 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 if (str < last)
1922 c = CUR_SCHAR(str, l);
1923 else
1924 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 while ((c != 0) && (c != end) && /* non input consuming loop */
1926 (c != end2) && (c != end3)) {
1927
1928 if (c == 0) break;
1929 if ((c == '&') && (str[1] == '#')) {
1930 int val = xmlParseStringCharRef(ctxt, &str);
1931 if (val != 0) {
1932 COPY_BUF(0,buffer,nbchars,val);
1933 }
1934 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1935 if (xmlParserDebugEntities)
1936 xmlGenericError(xmlGenericErrorContext,
1937 "String decoding Entity Reference: %.30s\n",
1938 str);
1939 ent = xmlParseStringEntityRef(ctxt, &str);
1940 if ((ent != NULL) &&
1941 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1942 if (ent->content != NULL) {
1943 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1944 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001945 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1946 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001947 }
1948 } else if ((ent != NULL) && (ent->content != NULL)) {
1949 xmlChar *rep;
1950
1951 ctxt->depth++;
1952 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1953 0, 0, 0);
1954 ctxt->depth--;
1955 if (rep != NULL) {
1956 current = rep;
1957 while (*current != 0) { /* non input consuming loop */
1958 buffer[nbchars++] = *current++;
1959 if (nbchars >
1960 buffer_size - XML_PARSER_BUFFER_SIZE) {
1961 growBuffer(buffer);
1962 }
1963 }
1964 xmlFree(rep);
1965 }
1966 } else if (ent != NULL) {
1967 int i = xmlStrlen(ent->name);
1968 const xmlChar *cur = ent->name;
1969
1970 buffer[nbchars++] = '&';
1971 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1972 growBuffer(buffer);
1973 }
1974 for (;i > 0;i--)
1975 buffer[nbchars++] = *cur++;
1976 buffer[nbchars++] = ';';
1977 }
1978 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1979 if (xmlParserDebugEntities)
1980 xmlGenericError(xmlGenericErrorContext,
1981 "String decoding PE Reference: %.30s\n", str);
1982 ent = xmlParseStringPEReference(ctxt, &str);
1983 if (ent != NULL) {
1984 xmlChar *rep;
1985
1986 ctxt->depth++;
1987 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1988 0, 0, 0);
1989 ctxt->depth--;
1990 if (rep != NULL) {
1991 current = rep;
1992 while (*current != 0) { /* non input consuming loop */
1993 buffer[nbchars++] = *current++;
1994 if (nbchars >
1995 buffer_size - XML_PARSER_BUFFER_SIZE) {
1996 growBuffer(buffer);
1997 }
1998 }
1999 xmlFree(rep);
2000 }
2001 }
2002 } else {
2003 COPY_BUF(l,buffer,nbchars,c);
2004 str += l;
2005 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2006 growBuffer(buffer);
2007 }
2008 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002009 if (str < last)
2010 c = CUR_SCHAR(str, l);
2011 else
2012 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002013 }
2014 buffer[nbchars++] = 0;
2015 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002016
2017mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002018 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002019 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002020}
2021
Daniel Veillarde57ec792003-09-10 10:50:59 +00002022/**
2023 * xmlStringDecodeEntities:
2024 * @ctxt: the parser context
2025 * @str: the input string
2026 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2027 * @end: an end marker xmlChar, 0 if none
2028 * @end2: an end marker xmlChar, 0 if none
2029 * @end3: an end marker xmlChar, 0 if none
2030 *
2031 * Takes a entity string content and process to do the adequate substitutions.
2032 *
2033 * [67] Reference ::= EntityRef | CharRef
2034 *
2035 * [69] PEReference ::= '%' Name ';'
2036 *
2037 * Returns A newly allocated string with the substitution done. The caller
2038 * must deallocate it !
2039 */
2040xmlChar *
2041xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2042 xmlChar end, xmlChar end2, xmlChar end3) {
2043 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2044 end, end2, end3));
2045}
Owen Taylor3473f882001-02-23 17:55:21 +00002046
2047/************************************************************************
2048 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002049 * Commodity functions, cleanup needed ? *
2050 * *
2051 ************************************************************************/
2052
2053/**
2054 * areBlanks:
2055 * @ctxt: an XML parser context
2056 * @str: a xmlChar *
2057 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002058 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002059 *
2060 * Is this a sequence of blank chars that one can ignore ?
2061 *
2062 * Returns 1 if ignorable 0 otherwise.
2063 */
2064
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002065static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2066 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002067 int i, ret;
2068 xmlNodePtr lastChild;
2069
Daniel Veillard05c13a22001-09-09 08:38:09 +00002070 /*
2071 * Don't spend time trying to differentiate them, the same callback is
2072 * used !
2073 */
2074 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002075 return(0);
2076
Owen Taylor3473f882001-02-23 17:55:21 +00002077 /*
2078 * Check for xml:space value.
2079 */
2080 if (*(ctxt->space) == 1)
2081 return(0);
2082
2083 /*
2084 * Check that the string is made of blanks
2085 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002086 if (blank_chars == 0) {
2087 for (i = 0;i < len;i++)
2088 if (!(IS_BLANK_CH(str[i]))) return(0);
2089 }
Owen Taylor3473f882001-02-23 17:55:21 +00002090
2091 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002092 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002093 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002094 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 if (ctxt->myDoc != NULL) {
2096 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2097 if (ret == 0) return(1);
2098 if (ret == 1) return(0);
2099 }
2100
2101 /*
2102 * Otherwise, heuristic :-\
2103 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002104 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002105 if ((ctxt->node->children == NULL) &&
2106 (RAW == '<') && (NXT(1) == '/')) return(0);
2107
2108 lastChild = xmlGetLastChild(ctxt->node);
2109 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002110 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2111 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002112 } else if (xmlNodeIsText(lastChild))
2113 return(0);
2114 else if ((ctxt->node->children != NULL) &&
2115 (xmlNodeIsText(ctxt->node->children)))
2116 return(0);
2117 return(1);
2118}
2119
Owen Taylor3473f882001-02-23 17:55:21 +00002120/************************************************************************
2121 * *
2122 * Extra stuff for namespace support *
2123 * Relates to http://www.w3.org/TR/WD-xml-names *
2124 * *
2125 ************************************************************************/
2126
2127/**
2128 * xmlSplitQName:
2129 * @ctxt: an XML parser context
2130 * @name: an XML parser context
2131 * @prefix: a xmlChar **
2132 *
2133 * parse an UTF8 encoded XML qualified name string
2134 *
2135 * [NS 5] QName ::= (Prefix ':')? LocalPart
2136 *
2137 * [NS 6] Prefix ::= NCName
2138 *
2139 * [NS 7] LocalPart ::= NCName
2140 *
2141 * Returns the local part, and prefix is updated
2142 * to get the Prefix if any.
2143 */
2144
2145xmlChar *
2146xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2147 xmlChar buf[XML_MAX_NAMELEN + 5];
2148 xmlChar *buffer = NULL;
2149 int len = 0;
2150 int max = XML_MAX_NAMELEN;
2151 xmlChar *ret = NULL;
2152 const xmlChar *cur = name;
2153 int c;
2154
2155 *prefix = NULL;
2156
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002157 if (cur == NULL) return(NULL);
2158
Owen Taylor3473f882001-02-23 17:55:21 +00002159#ifndef XML_XML_NAMESPACE
2160 /* xml: prefix is not really a namespace */
2161 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2162 (cur[2] == 'l') && (cur[3] == ':'))
2163 return(xmlStrdup(name));
2164#endif
2165
Daniel Veillard597bc482003-07-24 16:08:28 +00002166 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002167 if (cur[0] == ':')
2168 return(xmlStrdup(name));
2169
2170 c = *cur++;
2171 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2172 buf[len++] = c;
2173 c = *cur++;
2174 }
2175 if (len >= max) {
2176 /*
2177 * Okay someone managed to make a huge name, so he's ready to pay
2178 * for the processing speed.
2179 */
2180 max = len * 2;
2181
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002182 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002183 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187 memcpy(buffer, buf, len);
2188 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2189 if (len + 10 > max) {
2190 max *= 2;
2191 buffer = (xmlChar *) xmlRealloc(buffer,
2192 max * sizeof(xmlChar));
2193 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002194 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 return(NULL);
2196 }
2197 }
2198 buffer[len++] = c;
2199 c = *cur++;
2200 }
2201 buffer[len] = 0;
2202 }
2203
Daniel Veillard597bc482003-07-24 16:08:28 +00002204 /* nasty but well=formed
2205 if ((c == ':') && (*cur == 0)) {
2206 return(xmlStrdup(name));
2207 } */
2208
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (buffer == NULL)
2210 ret = xmlStrndup(buf, len);
2211 else {
2212 ret = buffer;
2213 buffer = NULL;
2214 max = XML_MAX_NAMELEN;
2215 }
2216
2217
2218 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002219 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002220 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002221 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002222 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002223 }
Owen Taylor3473f882001-02-23 17:55:21 +00002224 len = 0;
2225
Daniel Veillardbb284f42002-10-16 18:02:47 +00002226 /*
2227 * Check that the first character is proper to start
2228 * a new name
2229 */
2230 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2231 ((c >= 0x41) && (c <= 0x5A)) ||
2232 (c == '_') || (c == ':'))) {
2233 int l;
2234 int first = CUR_SCHAR(cur, l);
2235
2236 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002237 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002238 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002239 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002240 }
2241 }
2242 cur++;
2243
Owen Taylor3473f882001-02-23 17:55:21 +00002244 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2245 buf[len++] = c;
2246 c = *cur++;
2247 }
2248 if (len >= max) {
2249 /*
2250 * Okay someone managed to make a huge name, so he's ready to pay
2251 * for the processing speed.
2252 */
2253 max = len * 2;
2254
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002255 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002256 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002257 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002258 return(NULL);
2259 }
2260 memcpy(buffer, buf, len);
2261 while (c != 0) { /* tested bigname2.xml */
2262 if (len + 10 > max) {
2263 max *= 2;
2264 buffer = (xmlChar *) xmlRealloc(buffer,
2265 max * sizeof(xmlChar));
2266 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002267 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002268 return(NULL);
2269 }
2270 }
2271 buffer[len++] = c;
2272 c = *cur++;
2273 }
2274 buffer[len] = 0;
2275 }
2276
2277 if (buffer == NULL)
2278 ret = xmlStrndup(buf, len);
2279 else {
2280 ret = buffer;
2281 }
2282 }
2283
2284 return(ret);
2285}
2286
2287/************************************************************************
2288 * *
2289 * The parser itself *
2290 * Relates to http://www.w3.org/TR/REC-xml *
2291 * *
2292 ************************************************************************/
2293
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002294static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002295static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002296 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002297
Owen Taylor3473f882001-02-23 17:55:21 +00002298/**
2299 * xmlParseName:
2300 * @ctxt: an XML parser context
2301 *
2302 * parse an XML name.
2303 *
2304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2305 * CombiningChar | Extender
2306 *
2307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2308 *
2309 * [6] Names ::= Name (S Name)*
2310 *
2311 * Returns the Name parsed or NULL
2312 */
2313
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002314const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002315xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002316 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002317 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002318 int count = 0;
2319
2320 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002321
2322 /*
2323 * Accelerator for simple ASCII names
2324 */
2325 in = ctxt->input->cur;
2326 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2327 ((*in >= 0x41) && (*in <= 0x5A)) ||
2328 (*in == '_') || (*in == ':')) {
2329 in++;
2330 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2331 ((*in >= 0x41) && (*in <= 0x5A)) ||
2332 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002333 (*in == '_') || (*in == '-') ||
2334 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002335 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002336 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002337 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002338 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002339 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002340 ctxt->nbChars += count;
2341 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 if (ret == NULL)
2343 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002344 return(ret);
2345 }
2346 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002347 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002348}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002349
Daniel Veillard46de64e2002-05-29 08:21:33 +00002350/**
2351 * xmlParseNameAndCompare:
2352 * @ctxt: an XML parser context
2353 *
2354 * parse an XML name and compares for match
2355 * (specialized for endtag parsing)
2356 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002357 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2358 * and the name for mismatch
2359 */
2360
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002361static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002362xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002363 register const xmlChar *cmp = other;
2364 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002366
2367 GROW;
2368
2369 in = ctxt->input->cur;
2370 while (*in != 0 && *in == *cmp) {
2371 ++in;
2372 ++cmp;
2373 }
William M. Brack76e95df2003-10-18 16:20:14 +00002374 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002375 /* success */
2376 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002377 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002378 }
2379 /* failure (or end of input buffer), check with full function */
2380 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002381 /* strings coming from the dictionnary direct compare possible */
2382 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002383 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002384 }
2385 return ret;
2386}
2387
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002389xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002390 int len = 0, l;
2391 int c;
2392 int count = 0;
2393
2394 /*
2395 * Handler for more complex cases
2396 */
2397 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002398 c = CUR_CHAR(l);
2399 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2400 (!IS_LETTER(c) && (c != '_') &&
2401 (c != ':'))) {
2402 return(NULL);
2403 }
2404
2405 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002406 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002407 (c == '.') || (c == '-') ||
2408 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002409 (IS_COMBINING(c)) ||
2410 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002411 if (count++ > 100) {
2412 count = 0;
2413 GROW;
2414 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002416 NEXTL(l);
2417 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002418 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002419 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002420}
2421
2422/**
2423 * xmlParseStringName:
2424 * @ctxt: an XML parser context
2425 * @str: a pointer to the string pointer (IN/OUT)
2426 *
2427 * parse an XML name.
2428 *
2429 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2430 * CombiningChar | Extender
2431 *
2432 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2433 *
2434 * [6] Names ::= Name (S Name)*
2435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002436 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002437 * is updated to the current location in the string.
2438 */
2439
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002440static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002441xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2442 xmlChar buf[XML_MAX_NAMELEN + 5];
2443 const xmlChar *cur = *str;
2444 int len = 0, l;
2445 int c;
2446
2447 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002448 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002449 (c != ':')) {
2450 return(NULL);
2451 }
2452
William M. Brack871611b2003-10-18 04:53:14 +00002453 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 (c == '.') || (c == '-') ||
2455 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002456 (IS_COMBINING(c)) ||
2457 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002458 COPY_BUF(l,buf,len,c);
2459 cur += l;
2460 c = CUR_SCHAR(cur, l);
2461 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2462 /*
2463 * Okay someone managed to make a huge name, so he's ready to pay
2464 * for the processing speed.
2465 */
2466 xmlChar *buffer;
2467 int max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002475 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002476 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002477 (c == '.') || (c == '-') ||
2478 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002479 (IS_COMBINING(c)) ||
2480 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002481 if (len + 10 > max) {
2482 max *= 2;
2483 buffer = (xmlChar *) xmlRealloc(buffer,
2484 max * sizeof(xmlChar));
2485 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002486 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002487 return(NULL);
2488 }
2489 }
2490 COPY_BUF(l,buffer,len,c);
2491 cur += l;
2492 c = CUR_SCHAR(cur, l);
2493 }
2494 buffer[len] = 0;
2495 *str = cur;
2496 return(buffer);
2497 }
2498 }
2499 *str = cur;
2500 return(xmlStrndup(buf, len));
2501}
2502
2503/**
2504 * xmlParseNmtoken:
2505 * @ctxt: an XML parser context
2506 *
2507 * parse an XML Nmtoken.
2508 *
2509 * [7] Nmtoken ::= (NameChar)+
2510 *
2511 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2512 *
2513 * Returns the Nmtoken parsed or NULL
2514 */
2515
2516xmlChar *
2517xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2518 xmlChar buf[XML_MAX_NAMELEN + 5];
2519 int len = 0, l;
2520 int c;
2521 int count = 0;
2522
2523 GROW;
2524 c = CUR_CHAR(l);
2525
William M. Brack871611b2003-10-18 04:53:14 +00002526 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002527 (c == '.') || (c == '-') ||
2528 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002529 (IS_COMBINING(c)) ||
2530 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002531 if (count++ > 100) {
2532 count = 0;
2533 GROW;
2534 }
2535 COPY_BUF(l,buf,len,c);
2536 NEXTL(l);
2537 c = CUR_CHAR(l);
2538 if (len >= XML_MAX_NAMELEN) {
2539 /*
2540 * Okay someone managed to make a huge token, so he's ready to pay
2541 * for the processing speed.
2542 */
2543 xmlChar *buffer;
2544 int max = len * 2;
2545
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002546 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002548 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002549 return(NULL);
2550 }
2551 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002552 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002553 (c == '.') || (c == '-') ||
2554 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002555 (IS_COMBINING(c)) ||
2556 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002557 if (count++ > 100) {
2558 count = 0;
2559 GROW;
2560 }
2561 if (len + 10 > max) {
2562 max *= 2;
2563 buffer = (xmlChar *) xmlRealloc(buffer,
2564 max * sizeof(xmlChar));
2565 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002566 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 return(NULL);
2568 }
2569 }
2570 COPY_BUF(l,buffer,len,c);
2571 NEXTL(l);
2572 c = CUR_CHAR(l);
2573 }
2574 buffer[len] = 0;
2575 return(buffer);
2576 }
2577 }
2578 if (len == 0)
2579 return(NULL);
2580 return(xmlStrndup(buf, len));
2581}
2582
2583/**
2584 * xmlParseEntityValue:
2585 * @ctxt: an XML parser context
2586 * @orig: if non-NULL store a copy of the original entity value
2587 *
2588 * parse a value for ENTITY declarations
2589 *
2590 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2591 * "'" ([^%&'] | PEReference | Reference)* "'"
2592 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002593 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002594 */
2595
2596xmlChar *
2597xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2598 xmlChar *buf = NULL;
2599 int len = 0;
2600 int size = XML_PARSER_BUFFER_SIZE;
2601 int c, l;
2602 xmlChar stop;
2603 xmlChar *ret = NULL;
2604 const xmlChar *cur = NULL;
2605 xmlParserInputPtr input;
2606
2607 if (RAW == '"') stop = '"';
2608 else if (RAW == '\'') stop = '\'';
2609 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002610 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002611 return(NULL);
2612 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002613 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002614 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002615 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002616 return(NULL);
2617 }
2618
2619 /*
2620 * The content of the entity definition is copied in a buffer.
2621 */
2622
2623 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2624 input = ctxt->input;
2625 GROW;
2626 NEXT;
2627 c = CUR_CHAR(l);
2628 /*
2629 * NOTE: 4.4.5 Included in Literal
2630 * When a parameter entity reference appears in a literal entity
2631 * value, ... a single or double quote character in the replacement
2632 * text is always treated as a normal data character and will not
2633 * terminate the literal.
2634 * In practice it means we stop the loop only when back at parsing
2635 * the initial entity and the quote is found
2636 */
William M. Brack871611b2003-10-18 04:53:14 +00002637 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002638 (ctxt->input != input))) {
2639 if (len + 5 >= size) {
2640 size *= 2;
2641 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2642 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002643 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002644 return(NULL);
2645 }
2646 }
2647 COPY_BUF(l,buf,len,c);
2648 NEXTL(l);
2649 /*
2650 * Pop-up of finished entities.
2651 */
2652 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2653 xmlPopInput(ctxt);
2654
2655 GROW;
2656 c = CUR_CHAR(l);
2657 if (c == 0) {
2658 GROW;
2659 c = CUR_CHAR(l);
2660 }
2661 }
2662 buf[len] = 0;
2663
2664 /*
2665 * Raise problem w.r.t. '&' and '%' being used in non-entities
2666 * reference constructs. Note Charref will be handled in
2667 * xmlStringDecodeEntities()
2668 */
2669 cur = buf;
2670 while (*cur != 0) { /* non input consuming */
2671 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2672 xmlChar *name;
2673 xmlChar tmp = *cur;
2674
2675 cur++;
2676 name = xmlParseStringName(ctxt, &cur);
2677 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002678 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002679 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002680 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002682 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2683 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002684 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002685 }
2686 if (name != NULL)
2687 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002688 if (*cur == 0)
2689 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 }
2691 cur++;
2692 }
2693
2694 /*
2695 * Then PEReference entities are substituted.
2696 */
2697 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002698 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002699 xmlFree(buf);
2700 } else {
2701 NEXT;
2702 /*
2703 * NOTE: 4.4.7 Bypassed
2704 * When a general entity reference appears in the EntityValue in
2705 * an entity declaration, it is bypassed and left as is.
2706 * so XML_SUBSTITUTE_REF is not set here.
2707 */
2708 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2709 0, 0, 0);
2710 if (orig != NULL)
2711 *orig = buf;
2712 else
2713 xmlFree(buf);
2714 }
2715
2716 return(ret);
2717}
2718
2719/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002720 * xmlParseAttValueComplex:
2721 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002722 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002723 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002724 *
2725 * parse a value for an attribute, this is the fallback function
2726 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002727 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002728 *
2729 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2730 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002731static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002732xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002733 xmlChar limit = 0;
2734 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 int len = 0;
2736 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002737 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 xmlChar *current = NULL;
2739 xmlEntityPtr ent;
2740
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (NXT(0) == '"') {
2742 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2743 limit = '"';
2744 NEXT;
2745 } else if (NXT(0) == '\'') {
2746 limit = '\'';
2747 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2748 NEXT;
2749 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002750 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002751 return(NULL);
2752 }
2753
2754 /*
2755 * allocate a translation buffer.
2756 */
2757 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002758 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002759 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002760
2761 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002762 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002763 */
2764 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002765 while ((NXT(0) != limit) && /* checked */
2766 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002767 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002768 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002769 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (NXT(1) == '#') {
2771 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002772
Owen Taylor3473f882001-02-23 17:55:21 +00002773 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002774 if (ctxt->replaceEntities) {
2775 if (len > buf_size - 10) {
2776 growBuffer(buf);
2777 }
2778 buf[len++] = '&';
2779 } else {
2780 /*
2781 * The reparsing will be done in xmlStringGetNodeList()
2782 * called by the attribute() function in SAX.c
2783 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002784 if (len > buf_size - 10) {
2785 growBuffer(buf);
2786 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002787 buf[len++] = '&';
2788 buf[len++] = '#';
2789 buf[len++] = '3';
2790 buf[len++] = '8';
2791 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002794 if (len > buf_size - 10) {
2795 growBuffer(buf);
2796 }
Owen Taylor3473f882001-02-23 17:55:21 +00002797 len += xmlCopyChar(0, &buf[len], val);
2798 }
2799 } else {
2800 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002801 if ((ent != NULL) &&
2802 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2803 if (len > buf_size - 10) {
2804 growBuffer(buf);
2805 }
2806 if ((ctxt->replaceEntities == 0) &&
2807 (ent->content[0] == '&')) {
2808 buf[len++] = '&';
2809 buf[len++] = '#';
2810 buf[len++] = '3';
2811 buf[len++] = '8';
2812 buf[len++] = ';';
2813 } else {
2814 buf[len++] = ent->content[0];
2815 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002816 } else if ((ent != NULL) &&
2817 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002818 xmlChar *rep;
2819
2820 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2821 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002822 XML_SUBSTITUTE_REF,
2823 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002824 if (rep != NULL) {
2825 current = rep;
2826 while (*current != 0) { /* non input consuming */
2827 buf[len++] = *current++;
2828 if (len > buf_size - 10) {
2829 growBuffer(buf);
2830 }
2831 }
2832 xmlFree(rep);
2833 }
2834 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002835 if (len > buf_size - 10) {
2836 growBuffer(buf);
2837 }
Owen Taylor3473f882001-02-23 17:55:21 +00002838 if (ent->content != NULL)
2839 buf[len++] = ent->content[0];
2840 }
2841 } else if (ent != NULL) {
2842 int i = xmlStrlen(ent->name);
2843 const xmlChar *cur = ent->name;
2844
2845 /*
2846 * This may look absurd but is needed to detect
2847 * entities problems
2848 */
2849 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2850 (ent->content != NULL)) {
2851 xmlChar *rep;
2852 rep = xmlStringDecodeEntities(ctxt, ent->content,
2853 XML_SUBSTITUTE_REF, 0, 0, 0);
2854 if (rep != NULL)
2855 xmlFree(rep);
2856 }
2857
2858 /*
2859 * Just output the reference
2860 */
2861 buf[len++] = '&';
2862 if (len > buf_size - i - 10) {
2863 growBuffer(buf);
2864 }
2865 for (;i > 0;i--)
2866 buf[len++] = *cur++;
2867 buf[len++] = ';';
2868 }
2869 }
2870 } else {
2871 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002872 if ((len != 0) || (!normalize)) {
2873 if ((!normalize) || (!in_space)) {
2874 COPY_BUF(l,buf,len,0x20);
2875 if (len > buf_size - 10) {
2876 growBuffer(buf);
2877 }
2878 }
2879 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002880 }
2881 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002882 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002883 COPY_BUF(l,buf,len,c);
2884 if (len > buf_size - 10) {
2885 growBuffer(buf);
2886 }
2887 }
2888 NEXTL(l);
2889 }
2890 GROW;
2891 c = CUR_CHAR(l);
2892 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002893 if ((in_space) && (normalize)) {
2894 while (buf[len - 1] == 0x20) len--;
2895 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002896 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002897 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002898 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002899 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002900 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2901 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002902 } else
2903 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002904 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002905 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002906
2907mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002908 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002909 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002910}
2911
2912/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002913 * xmlParseAttValue:
2914 * @ctxt: an XML parser context
2915 *
2916 * parse a value for an attribute
2917 * Note: the parser won't do substitution of entities here, this
2918 * will be handled later in xmlStringGetNodeList
2919 *
2920 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2921 * "'" ([^<&'] | Reference)* "'"
2922 *
2923 * 3.3.3 Attribute-Value Normalization:
2924 * Before the value of an attribute is passed to the application or
2925 * checked for validity, the XML processor must normalize it as follows:
2926 * - a character reference is processed by appending the referenced
2927 * character to the attribute value
2928 * - an entity reference is processed by recursively processing the
2929 * replacement text of the entity
2930 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2931 * appending #x20 to the normalized value, except that only a single
2932 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2933 * parsed entity or the literal entity value of an internal parsed entity
2934 * - other characters are processed by appending them to the normalized value
2935 * If the declared value is not CDATA, then the XML processor must further
2936 * process the normalized attribute value by discarding any leading and
2937 * trailing space (#x20) characters, and by replacing sequences of space
2938 * (#x20) characters by a single space (#x20) character.
2939 * All attributes for which no declaration has been read should be treated
2940 * by a non-validating parser as if declared CDATA.
2941 *
2942 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2943 */
2944
2945
2946xmlChar *
2947xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002948 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002949}
2950
2951/**
Owen Taylor3473f882001-02-23 17:55:21 +00002952 * xmlParseSystemLiteral:
2953 * @ctxt: an XML parser context
2954 *
2955 * parse an XML Literal
2956 *
2957 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2958 *
2959 * Returns the SystemLiteral parsed or NULL
2960 */
2961
2962xmlChar *
2963xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2964 xmlChar *buf = NULL;
2965 int len = 0;
2966 int size = XML_PARSER_BUFFER_SIZE;
2967 int cur, l;
2968 xmlChar stop;
2969 int state = ctxt->instate;
2970 int count = 0;
2971
2972 SHRINK;
2973 if (RAW == '"') {
2974 NEXT;
2975 stop = '"';
2976 } else if (RAW == '\'') {
2977 NEXT;
2978 stop = '\'';
2979 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002980 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002981 return(NULL);
2982 }
2983
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002984 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002985 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002986 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002987 return(NULL);
2988 }
2989 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2990 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002991 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002992 if (len + 5 >= size) {
2993 size *= 2;
2994 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2995 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002996 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002997 ctxt->instate = (xmlParserInputState) state;
2998 return(NULL);
2999 }
3000 }
3001 count++;
3002 if (count > 50) {
3003 GROW;
3004 count = 0;
3005 }
3006 COPY_BUF(l,buf,len,cur);
3007 NEXTL(l);
3008 cur = CUR_CHAR(l);
3009 if (cur == 0) {
3010 GROW;
3011 SHRINK;
3012 cur = CUR_CHAR(l);
3013 }
3014 }
3015 buf[len] = 0;
3016 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003017 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003018 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 } else {
3020 NEXT;
3021 }
3022 return(buf);
3023}
3024
3025/**
3026 * xmlParsePubidLiteral:
3027 * @ctxt: an XML parser context
3028 *
3029 * parse an XML public literal
3030 *
3031 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3032 *
3033 * Returns the PubidLiteral parsed or NULL.
3034 */
3035
3036xmlChar *
3037xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3038 xmlChar *buf = NULL;
3039 int len = 0;
3040 int size = XML_PARSER_BUFFER_SIZE;
3041 xmlChar cur;
3042 xmlChar stop;
3043 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003044 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003045
3046 SHRINK;
3047 if (RAW == '"') {
3048 NEXT;
3049 stop = '"';
3050 } else if (RAW == '\'') {
3051 NEXT;
3052 stop = '\'';
3053 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003054 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003055 return(NULL);
3056 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003057 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003058 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003059 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003060 return(NULL);
3061 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003062 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003063 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003064 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003065 if (len + 1 >= size) {
3066 size *= 2;
3067 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3068 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003069 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003070 return(NULL);
3071 }
3072 }
3073 buf[len++] = cur;
3074 count++;
3075 if (count > 50) {
3076 GROW;
3077 count = 0;
3078 }
3079 NEXT;
3080 cur = CUR;
3081 if (cur == 0) {
3082 GROW;
3083 SHRINK;
3084 cur = CUR;
3085 }
3086 }
3087 buf[len] = 0;
3088 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003089 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003090 } else {
3091 NEXT;
3092 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003093 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003094 return(buf);
3095}
3096
Daniel Veillard48b2f892001-02-25 16:11:03 +00003097void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003098/**
3099 * xmlParseCharData:
3100 * @ctxt: an XML parser context
3101 * @cdata: int indicating whether we are within a CDATA section
3102 *
3103 * parse a CharData section.
3104 * if we are within a CDATA section ']]>' marks an end of section.
3105 *
3106 * The right angle bracket (>) may be represented using the string "&gt;",
3107 * and must, for compatibility, be escaped using "&gt;" or a character
3108 * reference when it appears in the string "]]>" in content, when that
3109 * string is not marking the end of a CDATA section.
3110 *
3111 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3112 */
3113
3114void
3115xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003116 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003117 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003118 int line = ctxt->input->line;
3119 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003120
3121 SHRINK;
3122 GROW;
3123 /*
3124 * Accelerated common case where input don't need to be
3125 * modified before passing it to the handler.
3126 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003127 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003128 in = ctxt->input->cur;
3129 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003130get_more_space:
3131 while (*in == 0x20) in++;
3132 if (*in == 0xA) {
3133 ctxt->input->line++;
3134 in++;
3135 while (*in == 0xA) {
3136 ctxt->input->line++;
3137 in++;
3138 }
3139 goto get_more_space;
3140 }
3141 if (*in == '<') {
3142 nbchar = in - ctxt->input->cur;
3143 if (nbchar > 0) {
3144 const xmlChar *tmp = ctxt->input->cur;
3145 ctxt->input->cur = in;
3146
3147 if (ctxt->sax->ignorableWhitespace !=
3148 ctxt->sax->characters) {
3149 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3150 ctxt->sax->ignorableWhitespace(ctxt->userData,
3151 tmp, nbchar);
3152 } else if (ctxt->sax->characters != NULL)
3153 ctxt->sax->characters(ctxt->userData,
3154 tmp, nbchar);
3155 } else if (ctxt->sax->characters != NULL) {
3156 ctxt->sax->characters(ctxt->userData,
3157 tmp, nbchar);
3158 }
3159 }
3160 return;
3161 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003162get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003163 while (((*in > ']') && (*in <= 0x7F)) ||
3164 ((*in > '&') && (*in < '<')) ||
3165 ((*in > '<') && (*in < ']')) ||
3166 ((*in >= 0x20) && (*in < '&')) ||
3167 (*in == 0x09))
3168 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003169 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003171 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003172 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003173 ctxt->input->line++;
3174 in++;
3175 }
3176 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003177 }
3178 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003179 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003180 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003181 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003182 return;
3183 }
3184 in++;
3185 goto get_more;
3186 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003187 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003188 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003189 if ((ctxt->sax->ignorableWhitespace !=
3190 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003191 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003192 const xmlChar *tmp = ctxt->input->cur;
3193 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003194
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003195 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003196 ctxt->sax->ignorableWhitespace(ctxt->userData,
3197 tmp, nbchar);
3198 } else if (ctxt->sax->characters != NULL)
3199 ctxt->sax->characters(ctxt->userData,
3200 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003201 line = ctxt->input->line;
3202 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003203 } else {
3204 if (ctxt->sax->characters != NULL)
3205 ctxt->sax->characters(ctxt->userData,
3206 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003207 line = ctxt->input->line;
3208 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003209 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003210 }
3211 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003212 if (*in == 0xD) {
3213 in++;
3214 if (*in == 0xA) {
3215 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003216 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003217 ctxt->input->line++;
3218 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003219 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003220 in--;
3221 }
3222 if (*in == '<') {
3223 return;
3224 }
3225 if (*in == '&') {
3226 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003227 }
3228 SHRINK;
3229 GROW;
3230 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003231 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003232 nbchar = 0;
3233 }
Daniel Veillard50582112001-03-26 22:52:16 +00003234 ctxt->input->line = line;
3235 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003236 xmlParseCharDataComplex(ctxt, cdata);
3237}
3238
Daniel Veillard01c13b52002-12-10 15:19:08 +00003239/**
3240 * xmlParseCharDataComplex:
3241 * @ctxt: an XML parser context
3242 * @cdata: int indicating whether we are within a CDATA section
3243 *
3244 * parse a CharData section.this is the fallback function
3245 * of xmlParseCharData() when the parsing requires handling
3246 * of non-ASCII characters.
3247 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003248void
3249xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003250 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3251 int nbchar = 0;
3252 int cur, l;
3253 int count = 0;
3254
3255 SHRINK;
3256 GROW;
3257 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003258 while ((cur != '<') && /* checked */
3259 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003260 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003261 if ((cur == ']') && (NXT(1) == ']') &&
3262 (NXT(2) == '>')) {
3263 if (cdata) break;
3264 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003265 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003266 }
3267 }
3268 COPY_BUF(l,buf,nbchar,cur);
3269 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003270 buf[nbchar] = 0;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003273 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003274 */
3275 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003276 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003277 if (ctxt->sax->ignorableWhitespace != NULL)
3278 ctxt->sax->ignorableWhitespace(ctxt->userData,
3279 buf, nbchar);
3280 } else {
3281 if (ctxt->sax->characters != NULL)
3282 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3283 }
3284 }
3285 nbchar = 0;
3286 }
3287 count++;
3288 if (count > 50) {
3289 GROW;
3290 count = 0;
3291 }
3292 NEXTL(l);
3293 cur = CUR_CHAR(l);
3294 }
3295 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003296 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003297 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003298 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003299 */
3300 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003301 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (ctxt->sax->ignorableWhitespace != NULL)
3303 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3304 } else {
3305 if (ctxt->sax->characters != NULL)
3306 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3307 }
3308 }
3309 }
3310}
3311
3312/**
3313 * xmlParseExternalID:
3314 * @ctxt: an XML parser context
3315 * @publicID: a xmlChar** receiving PubidLiteral
3316 * @strict: indicate whether we should restrict parsing to only
3317 * production [75], see NOTE below
3318 *
3319 * Parse an External ID or a Public ID
3320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003321 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003322 * 'PUBLIC' S PubidLiteral S SystemLiteral
3323 *
3324 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3325 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3326 *
3327 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3328 *
3329 * Returns the function returns SystemLiteral and in the second
3330 * case publicID receives PubidLiteral, is strict is off
3331 * it is possible to return NULL and have publicID set.
3332 */
3333
3334xmlChar *
3335xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3336 xmlChar *URI = NULL;
3337
3338 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003339
3340 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003341 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003342 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003343 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003344 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3345 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003346 }
3347 SKIP_BLANKS;
3348 URI = xmlParseSystemLiteral(ctxt);
3349 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003350 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003351 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003352 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003354 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003355 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003356 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 SKIP_BLANKS;
3359 *publicID = xmlParsePubidLiteral(ctxt);
3360 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003361 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003362 }
3363 if (strict) {
3364 /*
3365 * We don't handle [83] so "S SystemLiteral" is required.
3366 */
William M. Brack76e95df2003-10-18 16:20:14 +00003367 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003368 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003369 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003370 }
3371 } else {
3372 /*
3373 * We handle [83] so we return immediately, if
3374 * "S SystemLiteral" is not detected. From a purely parsing
3375 * point of view that's a nice mess.
3376 */
3377 const xmlChar *ptr;
3378 GROW;
3379
3380 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003381 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003382
William M. Brack76e95df2003-10-18 16:20:14 +00003383 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003384 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3385 }
3386 SKIP_BLANKS;
3387 URI = xmlParseSystemLiteral(ctxt);
3388 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003389 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003390 }
3391 }
3392 return(URI);
3393}
3394
3395/**
3396 * xmlParseComment:
3397 * @ctxt: an XML parser context
3398 *
3399 * Skip an XML (SGML) comment <!-- .... -->
3400 * The spec says that "For compatibility, the string "--" (double-hyphen)
3401 * must not occur within comments. "
3402 *
3403 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3404 */
3405void
3406xmlParseComment(xmlParserCtxtPtr ctxt) {
3407 xmlChar *buf = NULL;
3408 int len;
3409 int size = XML_PARSER_BUFFER_SIZE;
3410 int q, ql;
3411 int r, rl;
3412 int cur, l;
3413 xmlParserInputState state;
3414 xmlParserInputPtr input = ctxt->input;
3415 int count = 0;
3416
3417 /*
3418 * Check that there is a comment right here.
3419 */
3420 if ((RAW != '<') || (NXT(1) != '!') ||
3421 (NXT(2) != '-') || (NXT(3) != '-')) return;
3422
3423 state = ctxt->instate;
3424 ctxt->instate = XML_PARSER_COMMENT;
3425 SHRINK;
3426 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003427 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003429 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003430 ctxt->instate = state;
3431 return;
3432 }
3433 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003434 if (q == 0)
3435 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003436 NEXTL(ql);
3437 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003438 if (r == 0)
3439 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003440 NEXTL(rl);
3441 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003442 if (cur == 0)
3443 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003445 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003446 ((cur != '>') ||
3447 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003448 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003449 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003450 }
3451 if (len + 5 >= size) {
3452 size *= 2;
3453 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3454 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003455 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003456 ctxt->instate = state;
3457 return;
3458 }
3459 }
3460 COPY_BUF(ql,buf,len,q);
3461 q = r;
3462 ql = rl;
3463 r = cur;
3464 rl = l;
3465
3466 count++;
3467 if (count > 50) {
3468 GROW;
3469 count = 0;
3470 }
3471 NEXTL(l);
3472 cur = CUR_CHAR(l);
3473 if (cur == 0) {
3474 SHRINK;
3475 GROW;
3476 cur = CUR_CHAR(l);
3477 }
3478 }
3479 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003480 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003481 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003482 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003483 xmlFree(buf);
3484 } else {
3485 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003486 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3487 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003488 }
3489 NEXT;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3491 (!ctxt->disableSAX))
3492 ctxt->sax->comment(ctxt->userData, buf);
3493 xmlFree(buf);
3494 }
3495 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003496 return;
3497not_terminated:
3498 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3499 "Comment not terminated\n", NULL);
3500 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003501}
3502
3503/**
3504 * xmlParsePITarget:
3505 * @ctxt: an XML parser context
3506 *
3507 * parse the name of a PI
3508 *
3509 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3510 *
3511 * Returns the PITarget name or NULL
3512 */
3513
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003514const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003515xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003516 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003517
3518 name = xmlParseName(ctxt);
3519 if ((name != NULL) &&
3520 ((name[0] == 'x') || (name[0] == 'X')) &&
3521 ((name[1] == 'm') || (name[1] == 'M')) &&
3522 ((name[2] == 'l') || (name[2] == 'L'))) {
3523 int i;
3524 if ((name[0] == 'x') && (name[1] == 'm') &&
3525 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003526 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003527 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003528 return(name);
3529 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003530 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003531 return(name);
3532 }
3533 for (i = 0;;i++) {
3534 if (xmlW3CPIs[i] == NULL) break;
3535 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3536 return(name);
3537 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003538 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3539 "xmlParsePITarget: invalid name prefix 'xml'\n",
3540 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003541 }
3542 return(name);
3543}
3544
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003545#ifdef LIBXML_CATALOG_ENABLED
3546/**
3547 * xmlParseCatalogPI:
3548 * @ctxt: an XML parser context
3549 * @catalog: the PI value string
3550 *
3551 * parse an XML Catalog Processing Instruction.
3552 *
3553 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3554 *
3555 * Occurs only if allowed by the user and if happening in the Misc
3556 * part of the document before any doctype informations
3557 * This will add the given catalog to the parsing context in order
3558 * to be used if there is a resolution need further down in the document
3559 */
3560
3561static void
3562xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3563 xmlChar *URL = NULL;
3564 const xmlChar *tmp, *base;
3565 xmlChar marker;
3566
3567 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003568 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003569 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3570 goto error;
3571 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003572 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003573 if (*tmp != '=') {
3574 return;
3575 }
3576 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003577 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003578 marker = *tmp;
3579 if ((marker != '\'') && (marker != '"'))
3580 goto error;
3581 tmp++;
3582 base = tmp;
3583 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3584 if (*tmp == 0)
3585 goto error;
3586 URL = xmlStrndup(base, tmp - base);
3587 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003588 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003589 if (*tmp != 0)
3590 goto error;
3591
3592 if (URL != NULL) {
3593 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3594 xmlFree(URL);
3595 }
3596 return;
3597
3598error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003599 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3600 "Catalog PI syntax error: %s\n",
3601 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003602 if (URL != NULL)
3603 xmlFree(URL);
3604}
3605#endif
3606
Owen Taylor3473f882001-02-23 17:55:21 +00003607/**
3608 * xmlParsePI:
3609 * @ctxt: an XML parser context
3610 *
3611 * parse an XML Processing Instruction.
3612 *
3613 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3614 *
3615 * The processing is transfered to SAX once parsed.
3616 */
3617
3618void
3619xmlParsePI(xmlParserCtxtPtr ctxt) {
3620 xmlChar *buf = NULL;
3621 int len = 0;
3622 int size = XML_PARSER_BUFFER_SIZE;
3623 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003624 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 xmlParserInputState state;
3626 int count = 0;
3627
3628 if ((RAW == '<') && (NXT(1) == '?')) {
3629 xmlParserInputPtr input = ctxt->input;
3630 state = ctxt->instate;
3631 ctxt->instate = XML_PARSER_PI;
3632 /*
3633 * this is a Processing Instruction.
3634 */
3635 SKIP(2);
3636 SHRINK;
3637
3638 /*
3639 * Parse the target name and check for special support like
3640 * namespace.
3641 */
3642 target = xmlParsePITarget(ctxt);
3643 if (target != NULL) {
3644 if ((RAW == '?') && (NXT(1) == '>')) {
3645 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003646 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3647 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003648 }
3649 SKIP(2);
3650
3651 /*
3652 * SAX: PI detected.
3653 */
3654 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3655 (ctxt->sax->processingInstruction != NULL))
3656 ctxt->sax->processingInstruction(ctxt->userData,
3657 target, NULL);
3658 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003659 return;
3660 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003661 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003662 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003663 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003664 ctxt->instate = state;
3665 return;
3666 }
3667 cur = CUR;
3668 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003669 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3670 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 }
3672 SKIP_BLANKS;
3673 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003674 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003675 ((cur != '?') || (NXT(1) != '>'))) {
3676 if (len + 5 >= size) {
3677 size *= 2;
3678 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3679 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003680 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003681 ctxt->instate = state;
3682 return;
3683 }
3684 }
3685 count++;
3686 if (count > 50) {
3687 GROW;
3688 count = 0;
3689 }
3690 COPY_BUF(l,buf,len,cur);
3691 NEXTL(l);
3692 cur = CUR_CHAR(l);
3693 if (cur == 0) {
3694 SHRINK;
3695 GROW;
3696 cur = CUR_CHAR(l);
3697 }
3698 }
3699 buf[len] = 0;
3700 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003701 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3702 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003703 } else {
3704 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3706 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 SKIP(2);
3709
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003710#ifdef LIBXML_CATALOG_ENABLED
3711 if (((state == XML_PARSER_MISC) ||
3712 (state == XML_PARSER_START)) &&
3713 (xmlStrEqual(target, XML_CATALOG_PI))) {
3714 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3715 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3716 (allow == XML_CATA_ALLOW_ALL))
3717 xmlParseCatalogPI(ctxt, buf);
3718 }
3719#endif
3720
3721
Owen Taylor3473f882001-02-23 17:55:21 +00003722 /*
3723 * SAX: PI detected.
3724 */
3725 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3726 (ctxt->sax->processingInstruction != NULL))
3727 ctxt->sax->processingInstruction(ctxt->userData,
3728 target, buf);
3729 }
3730 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003732 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 }
3734 ctxt->instate = state;
3735 }
3736}
3737
3738/**
3739 * xmlParseNotationDecl:
3740 * @ctxt: an XML parser context
3741 *
3742 * parse a notation declaration
3743 *
3744 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3745 *
3746 * Hence there is actually 3 choices:
3747 * 'PUBLIC' S PubidLiteral
3748 * 'PUBLIC' S PubidLiteral S SystemLiteral
3749 * and 'SYSTEM' S SystemLiteral
3750 *
3751 * See the NOTE on xmlParseExternalID().
3752 */
3753
3754void
3755xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003756 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003757 xmlChar *Pubid;
3758 xmlChar *Systemid;
3759
Daniel Veillarda07050d2003-10-19 14:46:32 +00003760 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003761 xmlParserInputPtr input = ctxt->input;
3762 SHRINK;
3763 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3766 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003767 return;
3768 }
3769 SKIP_BLANKS;
3770
Daniel Veillard76d66f42001-05-16 21:05:17 +00003771 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003772 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003773 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003774 return;
3775 }
William M. Brack76e95df2003-10-18 16:20:14 +00003776 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003777 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003778 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003779 return;
3780 }
3781 SKIP_BLANKS;
3782
3783 /*
3784 * Parse the IDs.
3785 */
3786 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3787 SKIP_BLANKS;
3788
3789 if (RAW == '>') {
3790 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3792 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003793 }
3794 NEXT;
3795 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3796 (ctxt->sax->notationDecl != NULL))
3797 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3798 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003799 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003800 }
Owen Taylor3473f882001-02-23 17:55:21 +00003801 if (Systemid != NULL) xmlFree(Systemid);
3802 if (Pubid != NULL) xmlFree(Pubid);
3803 }
3804}
3805
3806/**
3807 * xmlParseEntityDecl:
3808 * @ctxt: an XML parser context
3809 *
3810 * parse <!ENTITY declarations
3811 *
3812 * [70] EntityDecl ::= GEDecl | PEDecl
3813 *
3814 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3815 *
3816 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3817 *
3818 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3819 *
3820 * [74] PEDef ::= EntityValue | ExternalID
3821 *
3822 * [76] NDataDecl ::= S 'NDATA' S Name
3823 *
3824 * [ VC: Notation Declared ]
3825 * The Name must match the declared name of a notation.
3826 */
3827
3828void
3829xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003830 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 xmlChar *value = NULL;
3832 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003833 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003834 int isParameter = 0;
3835 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003836 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003837
3838 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003839 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003840 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003841 SHRINK;
3842 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003843 skipped = SKIP_BLANKS;
3844 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003845 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3846 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003847 }
Owen Taylor3473f882001-02-23 17:55:21 +00003848
3849 if (RAW == '%') {
3850 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003851 skipped = SKIP_BLANKS;
3852 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003853 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3854 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003855 }
Owen Taylor3473f882001-02-23 17:55:21 +00003856 isParameter = 1;
3857 }
3858
Daniel Veillard76d66f42001-05-16 21:05:17 +00003859 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003860 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003861 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3862 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003863 return;
3864 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003865 skipped = SKIP_BLANKS;
3866 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3868 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
Owen Taylor3473f882001-02-23 17:55:21 +00003870
Daniel Veillardf5582f12002-06-11 10:08:16 +00003871 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003872 /*
3873 * handle the various case of definitions...
3874 */
3875 if (isParameter) {
3876 if ((RAW == '"') || (RAW == '\'')) {
3877 value = xmlParseEntityValue(ctxt, &orig);
3878 if (value) {
3879 if ((ctxt->sax != NULL) &&
3880 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3881 ctxt->sax->entityDecl(ctxt->userData, name,
3882 XML_INTERNAL_PARAMETER_ENTITY,
3883 NULL, NULL, value);
3884 }
3885 } else {
3886 URI = xmlParseExternalID(ctxt, &literal, 1);
3887 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003888 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003889 }
3890 if (URI) {
3891 xmlURIPtr uri;
3892
3893 uri = xmlParseURI((const char *) URI);
3894 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003895 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3896 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003897 /*
3898 * This really ought to be a well formedness error
3899 * but the XML Core WG decided otherwise c.f. issue
3900 * E26 of the XML erratas.
3901 */
Owen Taylor3473f882001-02-23 17:55:21 +00003902 } else {
3903 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003904 /*
3905 * Okay this is foolish to block those but not
3906 * invalid URIs.
3907 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003908 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003909 } else {
3910 if ((ctxt->sax != NULL) &&
3911 (!ctxt->disableSAX) &&
3912 (ctxt->sax->entityDecl != NULL))
3913 ctxt->sax->entityDecl(ctxt->userData, name,
3914 XML_EXTERNAL_PARAMETER_ENTITY,
3915 literal, URI, NULL);
3916 }
3917 xmlFreeURI(uri);
3918 }
3919 }
3920 }
3921 } else {
3922 if ((RAW == '"') || (RAW == '\'')) {
3923 value = xmlParseEntityValue(ctxt, &orig);
3924 if ((ctxt->sax != NULL) &&
3925 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3926 ctxt->sax->entityDecl(ctxt->userData, name,
3927 XML_INTERNAL_GENERAL_ENTITY,
3928 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003929 /*
3930 * For expat compatibility in SAX mode.
3931 */
3932 if ((ctxt->myDoc == NULL) ||
3933 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3934 if (ctxt->myDoc == NULL) {
3935 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3936 }
3937 if (ctxt->myDoc->intSubset == NULL)
3938 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3939 BAD_CAST "fake", NULL, NULL);
3940
Daniel Veillard1af9a412003-08-20 22:54:39 +00003941 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3942 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003943 }
Owen Taylor3473f882001-02-23 17:55:21 +00003944 } else {
3945 URI = xmlParseExternalID(ctxt, &literal, 1);
3946 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003947 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003948 }
3949 if (URI) {
3950 xmlURIPtr uri;
3951
3952 uri = xmlParseURI((const char *)URI);
3953 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003954 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3955 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003956 /*
3957 * This really ought to be a well formedness error
3958 * but the XML Core WG decided otherwise c.f. issue
3959 * E26 of the XML erratas.
3960 */
Owen Taylor3473f882001-02-23 17:55:21 +00003961 } else {
3962 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003963 /*
3964 * Okay this is foolish to block those but not
3965 * invalid URIs.
3966 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003967 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003968 }
3969 xmlFreeURI(uri);
3970 }
3971 }
William M. Brack76e95df2003-10-18 16:20:14 +00003972 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3974 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003975 }
3976 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003977 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003978 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003979 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003980 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3981 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003982 }
3983 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003984 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3986 (ctxt->sax->unparsedEntityDecl != NULL))
3987 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3988 literal, URI, ndata);
3989 } else {
3990 if ((ctxt->sax != NULL) &&
3991 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3992 ctxt->sax->entityDecl(ctxt->userData, name,
3993 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3994 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003995 /*
3996 * For expat compatibility in SAX mode.
3997 * assuming the entity repalcement was asked for
3998 */
3999 if ((ctxt->replaceEntities != 0) &&
4000 ((ctxt->myDoc == NULL) ||
4001 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4002 if (ctxt->myDoc == NULL) {
4003 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4004 }
4005
4006 if (ctxt->myDoc->intSubset == NULL)
4007 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4008 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004009 xmlSAX2EntityDecl(ctxt, name,
4010 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4011 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 }
4014 }
4015 }
4016 SKIP_BLANKS;
4017 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004018 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004019 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004020 } else {
4021 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004022 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4023 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004024 }
4025 NEXT;
4026 }
4027 if (orig != NULL) {
4028 /*
4029 * Ugly mechanism to save the raw entity value.
4030 */
4031 xmlEntityPtr cur = NULL;
4032
4033 if (isParameter) {
4034 if ((ctxt->sax != NULL) &&
4035 (ctxt->sax->getParameterEntity != NULL))
4036 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4037 } else {
4038 if ((ctxt->sax != NULL) &&
4039 (ctxt->sax->getEntity != NULL))
4040 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004041 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004042 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004043 }
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 if (cur != NULL) {
4046 if (cur->orig != NULL)
4047 xmlFree(orig);
4048 else
4049 cur->orig = orig;
4050 } else
4051 xmlFree(orig);
4052 }
Owen Taylor3473f882001-02-23 17:55:21 +00004053 if (value != NULL) xmlFree(value);
4054 if (URI != NULL) xmlFree(URI);
4055 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004056 }
4057}
4058
4059/**
4060 * xmlParseDefaultDecl:
4061 * @ctxt: an XML parser context
4062 * @value: Receive a possible fixed default value for the attribute
4063 *
4064 * Parse an attribute default declaration
4065 *
4066 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4067 *
4068 * [ VC: Required Attribute ]
4069 * if the default declaration is the keyword #REQUIRED, then the
4070 * attribute must be specified for all elements of the type in the
4071 * attribute-list declaration.
4072 *
4073 * [ VC: Attribute Default Legal ]
4074 * The declared default value must meet the lexical constraints of
4075 * the declared attribute type c.f. xmlValidateAttributeDecl()
4076 *
4077 * [ VC: Fixed Attribute Default ]
4078 * if an attribute has a default value declared with the #FIXED
4079 * keyword, instances of that attribute must match the default value.
4080 *
4081 * [ WFC: No < in Attribute Values ]
4082 * handled in xmlParseAttValue()
4083 *
4084 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4085 * or XML_ATTRIBUTE_FIXED.
4086 */
4087
4088int
4089xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4090 int val;
4091 xmlChar *ret;
4092
4093 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004094 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004095 SKIP(9);
4096 return(XML_ATTRIBUTE_REQUIRED);
4097 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004098 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004099 SKIP(8);
4100 return(XML_ATTRIBUTE_IMPLIED);
4101 }
4102 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004103 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004104 SKIP(6);
4105 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004106 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004107 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4108 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004109 }
4110 SKIP_BLANKS;
4111 }
4112 ret = xmlParseAttValue(ctxt);
4113 ctxt->instate = XML_PARSER_DTD;
4114 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004115 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004116 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004117 } else
4118 *value = ret;
4119 return(val);
4120}
4121
4122/**
4123 * xmlParseNotationType:
4124 * @ctxt: an XML parser context
4125 *
4126 * parse an Notation attribute type.
4127 *
4128 * Note: the leading 'NOTATION' S part has already being parsed...
4129 *
4130 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4131 *
4132 * [ VC: Notation Attributes ]
4133 * Values of this type must match one of the notation names included
4134 * in the declaration; all notation names in the declaration must be declared.
4135 *
4136 * Returns: the notation attribute tree built while parsing
4137 */
4138
4139xmlEnumerationPtr
4140xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004141 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004142 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4143
4144 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004145 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004146 return(NULL);
4147 }
4148 SHRINK;
4149 do {
4150 NEXT;
4151 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004152 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004154 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4155 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004156 return(ret);
4157 }
4158 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 if (cur == NULL) return(ret);
4160 if (last == NULL) ret = last = cur;
4161 else {
4162 last->next = cur;
4163 last = cur;
4164 }
4165 SKIP_BLANKS;
4166 } while (RAW == '|');
4167 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004168 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004169 if ((last != NULL) && (last != ret))
4170 xmlFreeEnumeration(last);
4171 return(ret);
4172 }
4173 NEXT;
4174 return(ret);
4175}
4176
4177/**
4178 * xmlParseEnumerationType:
4179 * @ctxt: an XML parser context
4180 *
4181 * parse an Enumeration attribute type.
4182 *
4183 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4184 *
4185 * [ VC: Enumeration ]
4186 * Values of this type must match one of the Nmtoken tokens in
4187 * the declaration
4188 *
4189 * Returns: the enumeration attribute tree built while parsing
4190 */
4191
4192xmlEnumerationPtr
4193xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4194 xmlChar *name;
4195 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4196
4197 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004198 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 return(NULL);
4200 }
4201 SHRINK;
4202 do {
4203 NEXT;
4204 SKIP_BLANKS;
4205 name = xmlParseNmtoken(ctxt);
4206 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004207 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004208 return(ret);
4209 }
4210 cur = xmlCreateEnumeration(name);
4211 xmlFree(name);
4212 if (cur == NULL) return(ret);
4213 if (last == NULL) ret = last = cur;
4214 else {
4215 last->next = cur;
4216 last = cur;
4217 }
4218 SKIP_BLANKS;
4219 } while (RAW == '|');
4220 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004221 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004222 return(ret);
4223 }
4224 NEXT;
4225 return(ret);
4226}
4227
4228/**
4229 * xmlParseEnumeratedType:
4230 * @ctxt: an XML parser context
4231 * @tree: the enumeration tree built while parsing
4232 *
4233 * parse an Enumerated attribute type.
4234 *
4235 * [57] EnumeratedType ::= NotationType | Enumeration
4236 *
4237 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4238 *
4239 *
4240 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4241 */
4242
4243int
4244xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004245 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004246 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004247 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004248 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4249 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004250 return(0);
4251 }
4252 SKIP_BLANKS;
4253 *tree = xmlParseNotationType(ctxt);
4254 if (*tree == NULL) return(0);
4255 return(XML_ATTRIBUTE_NOTATION);
4256 }
4257 *tree = xmlParseEnumerationType(ctxt);
4258 if (*tree == NULL) return(0);
4259 return(XML_ATTRIBUTE_ENUMERATION);
4260}
4261
4262/**
4263 * xmlParseAttributeType:
4264 * @ctxt: an XML parser context
4265 * @tree: the enumeration tree built while parsing
4266 *
4267 * parse the Attribute list def for an element
4268 *
4269 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4270 *
4271 * [55] StringType ::= 'CDATA'
4272 *
4273 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4274 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4275 *
4276 * Validity constraints for attribute values syntax are checked in
4277 * xmlValidateAttributeValue()
4278 *
4279 * [ VC: ID ]
4280 * Values of type ID must match the Name production. A name must not
4281 * appear more than once in an XML document as a value of this type;
4282 * i.e., ID values must uniquely identify the elements which bear them.
4283 *
4284 * [ VC: One ID per Element Type ]
4285 * No element type may have more than one ID attribute specified.
4286 *
4287 * [ VC: ID Attribute Default ]
4288 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4289 *
4290 * [ VC: IDREF ]
4291 * Values of type IDREF must match the Name production, and values
4292 * of type IDREFS must match Names; each IDREF Name must match the value
4293 * of an ID attribute on some element in the XML document; i.e. IDREF
4294 * values must match the value of some ID attribute.
4295 *
4296 * [ VC: Entity Name ]
4297 * Values of type ENTITY must match the Name production, values
4298 * of type ENTITIES must match Names; each Entity Name must match the
4299 * name of an unparsed entity declared in the DTD.
4300 *
4301 * [ VC: Name Token ]
4302 * Values of type NMTOKEN must match the Nmtoken production; values
4303 * of type NMTOKENS must match Nmtokens.
4304 *
4305 * Returns the attribute type
4306 */
4307int
4308xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4309 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004310 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004311 SKIP(5);
4312 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(6);
4315 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004316 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004317 SKIP(5);
4318 return(XML_ATTRIBUTE_IDREF);
4319 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4320 SKIP(2);
4321 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004322 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004323 SKIP(6);
4324 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004325 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004326 SKIP(8);
4327 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004328 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004329 SKIP(8);
4330 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004331 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004332 SKIP(7);
4333 return(XML_ATTRIBUTE_NMTOKEN);
4334 }
4335 return(xmlParseEnumeratedType(ctxt, tree));
4336}
4337
4338/**
4339 * xmlParseAttributeListDecl:
4340 * @ctxt: an XML parser context
4341 *
4342 * : parse the Attribute list def for an element
4343 *
4344 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4345 *
4346 * [53] AttDef ::= S Name S AttType S DefaultDecl
4347 *
4348 */
4349void
4350xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004351 const xmlChar *elemName;
4352 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlEnumerationPtr tree;
4354
Daniel Veillarda07050d2003-10-19 14:46:32 +00004355 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004356 xmlParserInputPtr input = ctxt->input;
4357
4358 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004359 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004361 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004364 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004365 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4367 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004368 return;
4369 }
4370 SKIP_BLANKS;
4371 GROW;
4372 while (RAW != '>') {
4373 const xmlChar *check = CUR_PTR;
4374 int type;
4375 int def;
4376 xmlChar *defaultValue = NULL;
4377
4378 GROW;
4379 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004380 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004381 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004382 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4383 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004384 break;
4385 }
4386 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004387 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004389 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (defaultValue != NULL)
4391 xmlFree(defaultValue);
4392 break;
4393 }
4394 SKIP_BLANKS;
4395
4396 type = xmlParseAttributeType(ctxt, &tree);
4397 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004398 if (defaultValue != NULL)
4399 xmlFree(defaultValue);
4400 break;
4401 }
4402
4403 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004404 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004405 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4406 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004407 if (defaultValue != NULL)
4408 xmlFree(defaultValue);
4409 if (tree != NULL)
4410 xmlFreeEnumeration(tree);
4411 break;
4412 }
4413 SKIP_BLANKS;
4414
4415 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4416 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004417 if (defaultValue != NULL)
4418 xmlFree(defaultValue);
4419 if (tree != NULL)
4420 xmlFreeEnumeration(tree);
4421 break;
4422 }
4423
4424 GROW;
4425 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004426 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004428 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004429 if (defaultValue != NULL)
4430 xmlFree(defaultValue);
4431 if (tree != NULL)
4432 xmlFreeEnumeration(tree);
4433 break;
4434 }
4435 SKIP_BLANKS;
4436 }
4437 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4439 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004440 if (defaultValue != NULL)
4441 xmlFree(defaultValue);
4442 if (tree != NULL)
4443 xmlFreeEnumeration(tree);
4444 break;
4445 }
4446 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4447 (ctxt->sax->attributeDecl != NULL))
4448 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4449 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004450 else if (tree != NULL)
4451 xmlFreeEnumeration(tree);
4452
4453 if ((ctxt->sax2) && (defaultValue != NULL) &&
4454 (def != XML_ATTRIBUTE_IMPLIED) &&
4455 (def != XML_ATTRIBUTE_REQUIRED)) {
4456 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4457 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004458 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4459 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4460 }
Owen Taylor3473f882001-02-23 17:55:21 +00004461 if (defaultValue != NULL)
4462 xmlFree(defaultValue);
4463 GROW;
4464 }
4465 if (RAW == '>') {
4466 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004467 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4468 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004469 }
4470 NEXT;
4471 }
Owen Taylor3473f882001-02-23 17:55:21 +00004472 }
4473}
4474
4475/**
4476 * xmlParseElementMixedContentDecl:
4477 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004478 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004479 *
4480 * parse the declaration for a Mixed Element content
4481 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4482 *
4483 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4484 * '(' S? '#PCDATA' S? ')'
4485 *
4486 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4487 *
4488 * [ VC: No Duplicate Types ]
4489 * The same name must not appear more than once in a single
4490 * mixed-content declaration.
4491 *
4492 * returns: the list of the xmlElementContentPtr describing the element choices
4493 */
4494xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004495xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004496 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004497 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004498
4499 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004500 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004501 SKIP(7);
4502 SKIP_BLANKS;
4503 SHRINK;
4504 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004506 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4507"Element content declaration doesn't start and stop in the same entity\n",
4508 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004509 }
Owen Taylor3473f882001-02-23 17:55:21 +00004510 NEXT;
4511 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4512 if (RAW == '*') {
4513 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4514 NEXT;
4515 }
4516 return(ret);
4517 }
4518 if ((RAW == '(') || (RAW == '|')) {
4519 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4520 if (ret == NULL) return(NULL);
4521 }
4522 while (RAW == '|') {
4523 NEXT;
4524 if (elem == NULL) {
4525 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4526 if (ret == NULL) return(NULL);
4527 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004528 if (cur != NULL)
4529 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004530 cur = ret;
4531 } else {
4532 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4533 if (n == NULL) return(NULL);
4534 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004535 if (n->c1 != NULL)
4536 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004537 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004538 if (n != NULL)
4539 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004540 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004541 }
4542 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004543 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004544 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004545 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004546 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004547 xmlFreeElementContent(cur);
4548 return(NULL);
4549 }
4550 SKIP_BLANKS;
4551 GROW;
4552 }
4553 if ((RAW == ')') && (NXT(1) == '*')) {
4554 if (elem != NULL) {
4555 cur->c2 = xmlNewElementContent(elem,
4556 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004557 if (cur->c2 != NULL)
4558 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004559 }
4560 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004561 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004562 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4563"Element content declaration doesn't start and stop in the same entity\n",
4564 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004565 }
Owen Taylor3473f882001-02-23 17:55:21 +00004566 SKIP(2);
4567 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004568 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004569 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004570 return(NULL);
4571 }
4572
4573 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004574 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004575 }
4576 return(ret);
4577}
4578
4579/**
4580 * xmlParseElementChildrenContentDecl:
4581 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004582 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004583 *
4584 * parse the declaration for a Mixed Element content
4585 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4586 *
4587 *
4588 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4589 *
4590 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4591 *
4592 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4593 *
4594 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4595 *
4596 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4597 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004598 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004599 * opening or closing parentheses in a choice, seq, or Mixed
4600 * construct is contained in the replacement text for a parameter
4601 * entity, both must be contained in the same replacement text. For
4602 * interoperability, if a parameter-entity reference appears in a
4603 * choice, seq, or Mixed construct, its replacement text should not
4604 * be empty, and neither the first nor last non-blank character of
4605 * the replacement text should be a connector (| or ,).
4606 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004607 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004608 * hierarchy.
4609 */
4610xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004611xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004612 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004613 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 xmlChar type = 0;
4615
4616 SKIP_BLANKS;
4617 GROW;
4618 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004619 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004620
Owen Taylor3473f882001-02-23 17:55:21 +00004621 /* Recurse on first child */
4622 NEXT;
4623 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004624 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004625 SKIP_BLANKS;
4626 GROW;
4627 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004628 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004629 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004630 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004631 return(NULL);
4632 }
4633 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004634 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004635 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004636 return(NULL);
4637 }
Owen Taylor3473f882001-02-23 17:55:21 +00004638 GROW;
4639 if (RAW == '?') {
4640 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4641 NEXT;
4642 } else if (RAW == '*') {
4643 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4644 NEXT;
4645 } else if (RAW == '+') {
4646 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4647 NEXT;
4648 } else {
4649 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4650 }
Owen Taylor3473f882001-02-23 17:55:21 +00004651 GROW;
4652 }
4653 SKIP_BLANKS;
4654 SHRINK;
4655 while (RAW != ')') {
4656 /*
4657 * Each loop we parse one separator and one element.
4658 */
4659 if (RAW == ',') {
4660 if (type == 0) type = CUR;
4661
4662 /*
4663 * Detect "Name | Name , Name" error
4664 */
4665 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004666 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004667 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004669 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004670 xmlFreeElementContent(last);
4671 if (ret != NULL)
4672 xmlFreeElementContent(ret);
4673 return(NULL);
4674 }
4675 NEXT;
4676
4677 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4678 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004679 if ((last != NULL) && (last != ret))
4680 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004681 xmlFreeElementContent(ret);
4682 return(NULL);
4683 }
4684 if (last == NULL) {
4685 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004686 if (ret != NULL)
4687 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004688 ret = cur = op;
4689 } else {
4690 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004691 if (op != NULL)
4692 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004693 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004694 if (last != NULL)
4695 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004696 cur =op;
4697 last = NULL;
4698 }
4699 } else if (RAW == '|') {
4700 if (type == 0) type = CUR;
4701
4702 /*
4703 * Detect "Name , Name | Name" error
4704 */
4705 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004706 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004707 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004708 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004709 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004710 xmlFreeElementContent(last);
4711 if (ret != NULL)
4712 xmlFreeElementContent(ret);
4713 return(NULL);
4714 }
4715 NEXT;
4716
4717 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4718 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004719 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004720 xmlFreeElementContent(last);
4721 if (ret != NULL)
4722 xmlFreeElementContent(ret);
4723 return(NULL);
4724 }
4725 if (last == NULL) {
4726 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004727 if (ret != NULL)
4728 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004729 ret = cur = op;
4730 } else {
4731 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004732 if (op != NULL)
4733 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004734 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004735 if (last != NULL)
4736 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004737 cur =op;
4738 last = NULL;
4739 }
4740 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004741 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004742 if (ret != NULL)
4743 xmlFreeElementContent(ret);
4744 return(NULL);
4745 }
4746 GROW;
4747 SKIP_BLANKS;
4748 GROW;
4749 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004750 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004751 /* Recurse on second child */
4752 NEXT;
4753 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004754 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004755 SKIP_BLANKS;
4756 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004757 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004758 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004759 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 if (ret != NULL)
4761 xmlFreeElementContent(ret);
4762 return(NULL);
4763 }
4764 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004765 if (RAW == '?') {
4766 last->ocur = XML_ELEMENT_CONTENT_OPT;
4767 NEXT;
4768 } else if (RAW == '*') {
4769 last->ocur = XML_ELEMENT_CONTENT_MULT;
4770 NEXT;
4771 } else if (RAW == '+') {
4772 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4773 NEXT;
4774 } else {
4775 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4776 }
4777 }
4778 SKIP_BLANKS;
4779 GROW;
4780 }
4781 if ((cur != NULL) && (last != NULL)) {
4782 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004783 if (last != NULL)
4784 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004785 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004786 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004787 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4788"Element content declaration doesn't start and stop in the same entity\n",
4789 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004790 }
Owen Taylor3473f882001-02-23 17:55:21 +00004791 NEXT;
4792 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004793 if (ret != NULL)
4794 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004795 NEXT;
4796 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004797 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004798 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004799 cur = ret;
4800 /*
4801 * Some normalization:
4802 * (a | b* | c?)* == (a | b | c)*
4803 */
4804 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4805 if ((cur->c1 != NULL) &&
4806 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4807 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4808 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4809 if ((cur->c2 != NULL) &&
4810 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4811 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4812 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4813 cur = cur->c2;
4814 }
4815 }
Owen Taylor3473f882001-02-23 17:55:21 +00004816 NEXT;
4817 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004818 if (ret != NULL) {
4819 int found = 0;
4820
Daniel Veillarde470df72001-04-18 21:41:07 +00004821 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004822 /*
4823 * Some normalization:
4824 * (a | b*)+ == (a | b)*
4825 * (a | b?)+ == (a | b)*
4826 */
4827 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4828 if ((cur->c1 != NULL) &&
4829 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4830 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4831 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4832 found = 1;
4833 }
4834 if ((cur->c2 != NULL) &&
4835 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4836 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4837 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4838 found = 1;
4839 }
4840 cur = cur->c2;
4841 }
4842 if (found)
4843 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4844 }
Owen Taylor3473f882001-02-23 17:55:21 +00004845 NEXT;
4846 }
4847 return(ret);
4848}
4849
4850/**
4851 * xmlParseElementContentDecl:
4852 * @ctxt: an XML parser context
4853 * @name: the name of the element being defined.
4854 * @result: the Element Content pointer will be stored here if any
4855 *
4856 * parse the declaration for an Element content either Mixed or Children,
4857 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4858 *
4859 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4860 *
4861 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4862 */
4863
4864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004865xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004866 xmlElementContentPtr *result) {
4867
4868 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004870 int res;
4871
4872 *result = NULL;
4873
4874 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004875 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004876 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004877 return(-1);
4878 }
4879 NEXT;
4880 GROW;
4881 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004882 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004883 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004884 res = XML_ELEMENT_TYPE_MIXED;
4885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004886 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004887 res = XML_ELEMENT_TYPE_ELEMENT;
4888 }
Owen Taylor3473f882001-02-23 17:55:21 +00004889 SKIP_BLANKS;
4890 *result = tree;
4891 return(res);
4892}
4893
4894/**
4895 * xmlParseElementDecl:
4896 * @ctxt: an XML parser context
4897 *
4898 * parse an Element declaration.
4899 *
4900 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4901 *
4902 * [ VC: Unique Element Type Declaration ]
4903 * No element type may be declared more than once
4904 *
4905 * Returns the type of the element, or -1 in case of error
4906 */
4907int
4908xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004909 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004910 int ret = -1;
4911 xmlElementContentPtr content = NULL;
4912
4913 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004914 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004915 xmlParserInputPtr input = ctxt->input;
4916
4917 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004918 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004919 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4920 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004921 }
4922 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004923 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004925 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4926 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004927 return(-1);
4928 }
4929 while ((RAW == 0) && (ctxt->inputNr > 1))
4930 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004931 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4933 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004934 }
4935 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004936 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004937 SKIP(5);
4938 /*
4939 * Element must always be empty.
4940 */
4941 ret = XML_ELEMENT_TYPE_EMPTY;
4942 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4943 (NXT(2) == 'Y')) {
4944 SKIP(3);
4945 /*
4946 * Element is a generic container.
4947 */
4948 ret = XML_ELEMENT_TYPE_ANY;
4949 } else if (RAW == '(') {
4950 ret = xmlParseElementContentDecl(ctxt, name, &content);
4951 } else {
4952 /*
4953 * [ WFC: PEs in Internal Subset ] error handling.
4954 */
4955 if ((RAW == '%') && (ctxt->external == 0) &&
4956 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004957 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004958 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004959 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004960 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004961 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4962 }
Owen Taylor3473f882001-02-23 17:55:21 +00004963 return(-1);
4964 }
4965
4966 SKIP_BLANKS;
4967 /*
4968 * Pop-up of finished entities.
4969 */
4970 while ((RAW == 0) && (ctxt->inputNr > 1))
4971 xmlPopInput(ctxt);
4972 SKIP_BLANKS;
4973
4974 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004975 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004976 } else {
4977 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004978 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4979 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004980 }
4981
4982 NEXT;
4983 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4984 (ctxt->sax->elementDecl != NULL))
4985 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4986 content);
4987 }
4988 if (content != NULL) {
4989 xmlFreeElementContent(content);
4990 }
Owen Taylor3473f882001-02-23 17:55:21 +00004991 }
4992 return(ret);
4993}
4994
4995/**
Owen Taylor3473f882001-02-23 17:55:21 +00004996 * xmlParseConditionalSections
4997 * @ctxt: an XML parser context
4998 *
4999 * [61] conditionalSect ::= includeSect | ignoreSect
5000 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5001 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5002 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5003 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5004 */
5005
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005006static void
Owen Taylor3473f882001-02-23 17:55:21 +00005007xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5008 SKIP(3);
5009 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005010 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005011 SKIP(7);
5012 SKIP_BLANKS;
5013 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005014 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 } else {
5016 NEXT;
5017 }
5018 if (xmlParserDebugEntities) {
5019 if ((ctxt->input != NULL) && (ctxt->input->filename))
5020 xmlGenericError(xmlGenericErrorContext,
5021 "%s(%d): ", ctxt->input->filename,
5022 ctxt->input->line);
5023 xmlGenericError(xmlGenericErrorContext,
5024 "Entering INCLUDE Conditional Section\n");
5025 }
5026
5027 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5028 (NXT(2) != '>'))) {
5029 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005030 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005031
5032 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5033 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005034 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005035 NEXT;
5036 } else if (RAW == '%') {
5037 xmlParsePEReference(ctxt);
5038 } else
5039 xmlParseMarkupDecl(ctxt);
5040
5041 /*
5042 * Pop-up of finished entities.
5043 */
5044 while ((RAW == 0) && (ctxt->inputNr > 1))
5045 xmlPopInput(ctxt);
5046
Daniel Veillardfdc91562002-07-01 21:52:03 +00005047 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 break;
5050 }
5051 }
5052 if (xmlParserDebugEntities) {
5053 if ((ctxt->input != NULL) && (ctxt->input->filename))
5054 xmlGenericError(xmlGenericErrorContext,
5055 "%s(%d): ", ctxt->input->filename,
5056 ctxt->input->line);
5057 xmlGenericError(xmlGenericErrorContext,
5058 "Leaving INCLUDE Conditional Section\n");
5059 }
5060
Daniel Veillarda07050d2003-10-19 14:46:32 +00005061 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005062 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005063 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 int depth = 0;
5065
5066 SKIP(6);
5067 SKIP_BLANKS;
5068 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005069 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 } else {
5071 NEXT;
5072 }
5073 if (xmlParserDebugEntities) {
5074 if ((ctxt->input != NULL) && (ctxt->input->filename))
5075 xmlGenericError(xmlGenericErrorContext,
5076 "%s(%d): ", ctxt->input->filename,
5077 ctxt->input->line);
5078 xmlGenericError(xmlGenericErrorContext,
5079 "Entering IGNORE Conditional Section\n");
5080 }
5081
5082 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005083 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005084 * But disable SAX event generating DTD building in the meantime
5085 */
5086 state = ctxt->disableSAX;
5087 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005088 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005089 ctxt->instate = XML_PARSER_IGNORE;
5090
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005091 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005092 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5093 depth++;
5094 SKIP(3);
5095 continue;
5096 }
5097 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5098 if (--depth >= 0) SKIP(3);
5099 continue;
5100 }
5101 NEXT;
5102 continue;
5103 }
5104
5105 ctxt->disableSAX = state;
5106 ctxt->instate = instate;
5107
5108 if (xmlParserDebugEntities) {
5109 if ((ctxt->input != NULL) && (ctxt->input->filename))
5110 xmlGenericError(xmlGenericErrorContext,
5111 "%s(%d): ", ctxt->input->filename,
5112 ctxt->input->line);
5113 xmlGenericError(xmlGenericErrorContext,
5114 "Leaving IGNORE Conditional Section\n");
5115 }
5116
5117 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005118 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005119 }
5120
5121 if (RAW == 0)
5122 SHRINK;
5123
5124 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005125 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 SKIP(3);
5128 }
5129}
5130
5131/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005132 * xmlParseMarkupDecl:
5133 * @ctxt: an XML parser context
5134 *
5135 * parse Markup declarations
5136 *
5137 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5138 * NotationDecl | PI | Comment
5139 *
5140 * [ VC: Proper Declaration/PE Nesting ]
5141 * Parameter-entity replacement text must be properly nested with
5142 * markup declarations. That is to say, if either the first character
5143 * or the last character of a markup declaration (markupdecl above) is
5144 * contained in the replacement text for a parameter-entity reference,
5145 * both must be contained in the same replacement text.
5146 *
5147 * [ WFC: PEs in Internal Subset ]
5148 * In the internal DTD subset, parameter-entity references can occur
5149 * only where markup declarations can occur, not within markup declarations.
5150 * (This does not apply to references that occur in external parameter
5151 * entities or to the external subset.)
5152 */
5153void
5154xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5155 GROW;
5156 xmlParseElementDecl(ctxt);
5157 xmlParseAttributeListDecl(ctxt);
5158 xmlParseEntityDecl(ctxt);
5159 xmlParseNotationDecl(ctxt);
5160 xmlParsePI(ctxt);
5161 xmlParseComment(ctxt);
5162 /*
5163 * This is only for internal subset. On external entities,
5164 * the replacement is done before parsing stage
5165 */
5166 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5167 xmlParsePEReference(ctxt);
5168
5169 /*
5170 * Conditional sections are allowed from entities included
5171 * by PE References in the internal subset.
5172 */
5173 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5174 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5175 xmlParseConditionalSections(ctxt);
5176 }
5177 }
5178
5179 ctxt->instate = XML_PARSER_DTD;
5180}
5181
5182/**
5183 * xmlParseTextDecl:
5184 * @ctxt: an XML parser context
5185 *
5186 * parse an XML declaration header for external entities
5187 *
5188 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5189 *
5190 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5191 */
5192
5193void
5194xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5195 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005196 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005197
5198 /*
5199 * We know that '<?xml' is here.
5200 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005201 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005202 SKIP(5);
5203 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005204 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005205 return;
5206 }
5207
William M. Brack76e95df2003-10-18 16:20:14 +00005208 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005209 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5210 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005211 }
5212 SKIP_BLANKS;
5213
5214 /*
5215 * We may have the VersionInfo here.
5216 */
5217 version = xmlParseVersionInfo(ctxt);
5218 if (version == NULL)
5219 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005220 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005221 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005222 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5223 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005224 }
5225 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005226 ctxt->input->version = version;
5227
5228 /*
5229 * We must have the encoding declaration
5230 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005231 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005232 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5233 /*
5234 * The XML REC instructs us to stop parsing right here
5235 */
5236 return;
5237 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005238 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5239 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5240 "Missing encoding in text declaration\n");
5241 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005242
5243 SKIP_BLANKS;
5244 if ((RAW == '?') && (NXT(1) == '>')) {
5245 SKIP(2);
5246 } else if (RAW == '>') {
5247 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005248 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005249 NEXT;
5250 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005251 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 MOVETO_ENDTAG(CUR_PTR);
5253 NEXT;
5254 }
5255}
5256
5257/**
Owen Taylor3473f882001-02-23 17:55:21 +00005258 * xmlParseExternalSubset:
5259 * @ctxt: an XML parser context
5260 * @ExternalID: the external identifier
5261 * @SystemID: the system identifier (or URL)
5262 *
5263 * parse Markup declarations from an external subset
5264 *
5265 * [30] extSubset ::= textDecl? extSubsetDecl
5266 *
5267 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5268 */
5269void
5270xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5271 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005272 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005273 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005274 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005275 xmlParseTextDecl(ctxt);
5276 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5277 /*
5278 * The XML REC instructs us to stop parsing right here
5279 */
5280 ctxt->instate = XML_PARSER_EOF;
5281 return;
5282 }
5283 }
5284 if (ctxt->myDoc == NULL) {
5285 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5286 }
5287 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5288 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5289
5290 ctxt->instate = XML_PARSER_DTD;
5291 ctxt->external = 1;
5292 while (((RAW == '<') && (NXT(1) == '?')) ||
5293 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005294 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005295 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005296 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005297
5298 GROW;
5299 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5300 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005301 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005302 NEXT;
5303 } else if (RAW == '%') {
5304 xmlParsePEReference(ctxt);
5305 } else
5306 xmlParseMarkupDecl(ctxt);
5307
5308 /*
5309 * Pop-up of finished entities.
5310 */
5311 while ((RAW == 0) && (ctxt->inputNr > 1))
5312 xmlPopInput(ctxt);
5313
Daniel Veillardfdc91562002-07-01 21:52:03 +00005314 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 break;
5317 }
5318 }
5319
5320 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005321 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005322 }
5323
5324}
5325
5326/**
5327 * xmlParseReference:
5328 * @ctxt: an XML parser context
5329 *
5330 * parse and handle entity references in content, depending on the SAX
5331 * interface, this may end-up in a call to character() if this is a
5332 * CharRef, a predefined entity, if there is no reference() callback.
5333 * or if the parser was asked to switch to that mode.
5334 *
5335 * [67] Reference ::= EntityRef | CharRef
5336 */
5337void
5338xmlParseReference(xmlParserCtxtPtr ctxt) {
5339 xmlEntityPtr ent;
5340 xmlChar *val;
5341 if (RAW != '&') return;
5342
5343 if (NXT(1) == '#') {
5344 int i = 0;
5345 xmlChar out[10];
5346 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005347 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005348
5349 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5350 /*
5351 * So we are using non-UTF-8 buffers
5352 * Check that the char fit on 8bits, if not
5353 * generate a CharRef.
5354 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005355 if (value <= 0xFF) {
5356 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005357 out[1] = 0;
5358 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5359 (!ctxt->disableSAX))
5360 ctxt->sax->characters(ctxt->userData, out, 1);
5361 } else {
5362 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005363 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005365 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005366 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5367 (!ctxt->disableSAX))
5368 ctxt->sax->reference(ctxt->userData, out);
5369 }
5370 } else {
5371 /*
5372 * Just encode the value in UTF-8
5373 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005374 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 out[i] = 0;
5376 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5377 (!ctxt->disableSAX))
5378 ctxt->sax->characters(ctxt->userData, out, i);
5379 }
5380 } else {
5381 ent = xmlParseEntityRef(ctxt);
5382 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005383 if (!ctxt->wellFormed)
5384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005385 if ((ent->name != NULL) &&
5386 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5387 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005388 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005389
5390
5391 /*
5392 * The first reference to the entity trigger a parsing phase
5393 * where the ent->children is filled with the result from
5394 * the parsing.
5395 */
5396 if (ent->children == NULL) {
5397 xmlChar *value;
5398 value = ent->content;
5399
5400 /*
5401 * Check that this entity is well formed
5402 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005403 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005404 (value[1] == 0) && (value[0] == '<') &&
5405 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5406 /*
5407 * DONE: get definite answer on this !!!
5408 * Lots of entity decls are used to declare a single
5409 * char
5410 * <!ENTITY lt "<">
5411 * Which seems to be valid since
5412 * 2.4: The ampersand character (&) and the left angle
5413 * bracket (<) may appear in their literal form only
5414 * when used ... They are also legal within the literal
5415 * entity value of an internal entity declaration;i
5416 * see "4.3.2 Well-Formed Parsed Entities".
5417 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5418 * Looking at the OASIS test suite and James Clark
5419 * tests, this is broken. However the XML REC uses
5420 * it. Is the XML REC not well-formed ????
5421 * This is a hack to avoid this problem
5422 *
5423 * ANSWER: since lt gt amp .. are already defined,
5424 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005425 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005426 * is lousy but acceptable.
5427 */
5428 list = xmlNewDocText(ctxt->myDoc, value);
5429 if (list != NULL) {
5430 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5431 (ent->children == NULL)) {
5432 ent->children = list;
5433 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005434 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005435 list->parent = (xmlNodePtr) ent;
5436 } else {
5437 xmlFreeNodeList(list);
5438 }
5439 } else if (list != NULL) {
5440 xmlFreeNodeList(list);
5441 }
5442 } else {
5443 /*
5444 * 4.3.2: An internal general parsed entity is well-formed
5445 * if its replacement text matches the production labeled
5446 * content.
5447 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005448
5449 void *user_data;
5450 /*
5451 * This is a bit hackish but this seems the best
5452 * way to make sure both SAX and DOM entity support
5453 * behaves okay.
5454 */
5455 if (ctxt->userData == ctxt)
5456 user_data = NULL;
5457 else
5458 user_data = ctxt->userData;
5459
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5461 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005462 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5463 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 ctxt->depth--;
5465 } else if (ent->etype ==
5466 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5467 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005468 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005469 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005470 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005471 ctxt->depth--;
5472 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005473 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005474 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5475 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
5477 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005478 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005479 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005480 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005481 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5482 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005483 (ent->children == NULL)) {
5484 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005485 if (ctxt->replaceEntities) {
5486 /*
5487 * Prune it directly in the generated document
5488 * except for single text nodes.
5489 */
5490 if ((list->type == XML_TEXT_NODE) &&
5491 (list->next == NULL)) {
5492 list->parent = (xmlNodePtr) ent;
5493 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005494 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005495 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005496 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005497 while (list != NULL) {
5498 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005499 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005500 if (list->next == NULL)
5501 ent->last = list;
5502 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005503 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005504 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005505#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005506 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5507 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005508#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005509 }
5510 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005511 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005512 while (list != NULL) {
5513 list->parent = (xmlNodePtr) ent;
5514 if (list->next == NULL)
5515 ent->last = list;
5516 list = list->next;
5517 }
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519 } else {
5520 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005521 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005523 } else if ((ret != XML_ERR_OK) &&
5524 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005525 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005526 } else if (list != NULL) {
5527 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005528 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005529 }
5530 }
5531 }
5532 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5533 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5534 /*
5535 * Create a node.
5536 */
5537 ctxt->sax->reference(ctxt->userData, ent->name);
5538 return;
5539 } else if (ctxt->replaceEntities) {
5540 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5541 /*
5542 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005543 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005544 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005545 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005546 if ((list == NULL) && (ent->owner == 0)) {
5547 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005548 cur = ent->children;
5549 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005550 nw = xmlCopyNode(cur, 1);
5551 if (nw != NULL) {
5552 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005553 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005554 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005555 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005556 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005557 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005558 if (cur == ent->last)
5559 break;
5560 cur = cur->next;
5561 }
Daniel Veillard81273902003-09-30 00:43:48 +00005562#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005563 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005564 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005565#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005566 } else if (list == NULL) {
5567 xmlNodePtr nw = NULL, cur, next, last,
5568 firstChild = NULL;
5569 /*
5570 * Copy the entity child list and make it the new
5571 * entity child list. The goal is to make sure any
5572 * ID or REF referenced will be the one from the
5573 * document content and not the entity copy.
5574 */
5575 cur = ent->children;
5576 ent->children = NULL;
5577 last = ent->last;
5578 ent->last = NULL;
5579 while (cur != NULL) {
5580 next = cur->next;
5581 cur->next = NULL;
5582 cur->parent = NULL;
5583 nw = xmlCopyNode(cur, 1);
5584 if (nw != NULL) {
5585 nw->_private = cur->_private;
5586 if (firstChild == NULL){
5587 firstChild = cur;
5588 }
5589 xmlAddChild((xmlNodePtr) ent, nw);
5590 xmlAddChild(ctxt->node, cur);
5591 }
5592 if (cur == last)
5593 break;
5594 cur = next;
5595 }
5596 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005597#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005598 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5599 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005600#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005601 } else {
5602 /*
5603 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005604 * node with a possible previous text one which
5605 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005606 */
5607 if (ent->children->type == XML_TEXT_NODE)
5608 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5609 if ((ent->last != ent->children) &&
5610 (ent->last->type == XML_TEXT_NODE))
5611 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5612 xmlAddChildList(ctxt->node, ent->children);
5613 }
5614
Owen Taylor3473f882001-02-23 17:55:21 +00005615 /*
5616 * This is to avoid a nasty side effect, see
5617 * characters() in SAX.c
5618 */
5619 ctxt->nodemem = 0;
5620 ctxt->nodelen = 0;
5621 return;
5622 } else {
5623 /*
5624 * Probably running in SAX mode
5625 */
5626 xmlParserInputPtr input;
5627
5628 input = xmlNewEntityInputStream(ctxt, ent);
5629 xmlPushInput(ctxt, input);
5630 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005631 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5632 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005633 xmlParseTextDecl(ctxt);
5634 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5635 /*
5636 * The XML REC instructs us to stop parsing right here
5637 */
5638 ctxt->instate = XML_PARSER_EOF;
5639 return;
5640 }
5641 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005642 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5643 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005644 }
5645 }
5646 return;
5647 }
5648 }
5649 } else {
5650 val = ent->content;
5651 if (val == NULL) return;
5652 /*
5653 * inline the entity.
5654 */
5655 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5656 (!ctxt->disableSAX))
5657 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5658 }
5659 }
5660}
5661
5662/**
5663 * xmlParseEntityRef:
5664 * @ctxt: an XML parser context
5665 *
5666 * parse ENTITY references declarations
5667 *
5668 * [68] EntityRef ::= '&' Name ';'
5669 *
5670 * [ WFC: Entity Declared ]
5671 * In a document without any DTD, a document with only an internal DTD
5672 * subset which contains no parameter entity references, or a document
5673 * with "standalone='yes'", the Name given in the entity reference
5674 * must match that in an entity declaration, except that well-formed
5675 * documents need not declare any of the following entities: amp, lt,
5676 * gt, apos, quot. The declaration of a parameter entity must precede
5677 * any reference to it. Similarly, the declaration of a general entity
5678 * must precede any reference to it which appears in a default value in an
5679 * attribute-list declaration. Note that if entities are declared in the
5680 * external subset or in external parameter entities, a non-validating
5681 * processor is not obligated to read and process their declarations;
5682 * for such documents, the rule that an entity must be declared is a
5683 * well-formedness constraint only if standalone='yes'.
5684 *
5685 * [ WFC: Parsed Entity ]
5686 * An entity reference must not contain the name of an unparsed entity
5687 *
5688 * Returns the xmlEntityPtr if found, or NULL otherwise.
5689 */
5690xmlEntityPtr
5691xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005692 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005693 xmlEntityPtr ent = NULL;
5694
5695 GROW;
5696
5697 if (RAW == '&') {
5698 NEXT;
5699 name = xmlParseName(ctxt);
5700 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005701 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5702 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005703 } else {
5704 if (RAW == ';') {
5705 NEXT;
5706 /*
5707 * Ask first SAX for entity resolution, otherwise try the
5708 * predefined set.
5709 */
5710 if (ctxt->sax != NULL) {
5711 if (ctxt->sax->getEntity != NULL)
5712 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005713 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005714 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005715 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5716 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005717 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005718 }
Owen Taylor3473f882001-02-23 17:55:21 +00005719 }
5720 /*
5721 * [ WFC: Entity Declared ]
5722 * In a document without any DTD, a document with only an
5723 * internal DTD subset which contains no parameter entity
5724 * references, or a document with "standalone='yes'", the
5725 * Name given in the entity reference must match that in an
5726 * entity declaration, except that well-formed documents
5727 * need not declare any of the following entities: amp, lt,
5728 * gt, apos, quot.
5729 * The declaration of a parameter entity must precede any
5730 * reference to it.
5731 * Similarly, the declaration of a general entity must
5732 * precede any reference to it which appears in a default
5733 * value in an attribute-list declaration. Note that if
5734 * entities are declared in the external subset or in
5735 * external parameter entities, a non-validating processor
5736 * is not obligated to read and process their declarations;
5737 * for such documents, the rule that an entity must be
5738 * declared is a well-formedness constraint only if
5739 * standalone='yes'.
5740 */
5741 if (ent == NULL) {
5742 if ((ctxt->standalone == 1) ||
5743 ((ctxt->hasExternalSubset == 0) &&
5744 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005746 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005748 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005749 "Entity '%s' not defined\n", name);
5750 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005751 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005752 }
5753
5754 /*
5755 * [ WFC: Parsed Entity ]
5756 * An entity reference must not contain the name of an
5757 * unparsed entity
5758 */
5759 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005760 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005761 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005762 }
5763
5764 /*
5765 * [ WFC: No External Entity References ]
5766 * Attribute values cannot contain direct or indirect
5767 * entity references to external entities.
5768 */
5769 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5770 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005771 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5772 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 }
5774 /*
5775 * [ WFC: No < in Attribute Values ]
5776 * The replacement text of any entity referred to directly or
5777 * indirectly in an attribute value (other than "&lt;") must
5778 * not contain a <.
5779 */
5780 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5781 (ent != NULL) &&
5782 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5783 (ent->content != NULL) &&
5784 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005785 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005786 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005787 }
5788
5789 /*
5790 * Internal check, no parameter entities here ...
5791 */
5792 else {
5793 switch (ent->etype) {
5794 case XML_INTERNAL_PARAMETER_ENTITY:
5795 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005796 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5797 "Attempt to reference the parameter entity '%s'\n",
5798 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 break;
5800 default:
5801 break;
5802 }
5803 }
5804
5805 /*
5806 * [ WFC: No Recursion ]
5807 * A parsed entity must not contain a recursive reference
5808 * to itself, either directly or indirectly.
5809 * Done somewhere else
5810 */
5811
5812 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005813 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005814 }
Owen Taylor3473f882001-02-23 17:55:21 +00005815 }
5816 }
5817 return(ent);
5818}
5819
5820/**
5821 * xmlParseStringEntityRef:
5822 * @ctxt: an XML parser context
5823 * @str: a pointer to an index in the string
5824 *
5825 * parse ENTITY references declarations, but this version parses it from
5826 * a string value.
5827 *
5828 * [68] EntityRef ::= '&' Name ';'
5829 *
5830 * [ WFC: Entity Declared ]
5831 * In a document without any DTD, a document with only an internal DTD
5832 * subset which contains no parameter entity references, or a document
5833 * with "standalone='yes'", the Name given in the entity reference
5834 * must match that in an entity declaration, except that well-formed
5835 * documents need not declare any of the following entities: amp, lt,
5836 * gt, apos, quot. The declaration of a parameter entity must precede
5837 * any reference to it. Similarly, the declaration of a general entity
5838 * must precede any reference to it which appears in a default value in an
5839 * attribute-list declaration. Note that if entities are declared in the
5840 * external subset or in external parameter entities, a non-validating
5841 * processor is not obligated to read and process their declarations;
5842 * for such documents, the rule that an entity must be declared is a
5843 * well-formedness constraint only if standalone='yes'.
5844 *
5845 * [ WFC: Parsed Entity ]
5846 * An entity reference must not contain the name of an unparsed entity
5847 *
5848 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5849 * is updated to the current location in the string.
5850 */
5851xmlEntityPtr
5852xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5853 xmlChar *name;
5854 const xmlChar *ptr;
5855 xmlChar cur;
5856 xmlEntityPtr ent = NULL;
5857
5858 if ((str == NULL) || (*str == NULL))
5859 return(NULL);
5860 ptr = *str;
5861 cur = *ptr;
5862 if (cur == '&') {
5863 ptr++;
5864 cur = *ptr;
5865 name = xmlParseStringName(ctxt, &ptr);
5866 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005867 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5868 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005869 } else {
5870 if (*ptr == ';') {
5871 ptr++;
5872 /*
5873 * Ask first SAX for entity resolution, otherwise try the
5874 * predefined set.
5875 */
5876 if (ctxt->sax != NULL) {
5877 if (ctxt->sax->getEntity != NULL)
5878 ent = ctxt->sax->getEntity(ctxt->userData, name);
5879 if (ent == NULL)
5880 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005881 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005882 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005883 }
Owen Taylor3473f882001-02-23 17:55:21 +00005884 }
5885 /*
5886 * [ WFC: Entity Declared ]
5887 * In a document without any DTD, a document with only an
5888 * internal DTD subset which contains no parameter entity
5889 * references, or a document with "standalone='yes'", the
5890 * Name given in the entity reference must match that in an
5891 * entity declaration, except that well-formed documents
5892 * need not declare any of the following entities: amp, lt,
5893 * gt, apos, quot.
5894 * The declaration of a parameter entity must precede any
5895 * reference to it.
5896 * Similarly, the declaration of a general entity must
5897 * precede any reference to it which appears in a default
5898 * value in an attribute-list declaration. Note that if
5899 * entities are declared in the external subset or in
5900 * external parameter entities, a non-validating processor
5901 * is not obligated to read and process their declarations;
5902 * for such documents, the rule that an entity must be
5903 * declared is a well-formedness constraint only if
5904 * standalone='yes'.
5905 */
5906 if (ent == NULL) {
5907 if ((ctxt->standalone == 1) ||
5908 ((ctxt->hasExternalSubset == 0) &&
5909 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005910 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005911 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005913 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005914 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005915 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005917 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005918 }
5919
5920 /*
5921 * [ WFC: Parsed Entity ]
5922 * An entity reference must not contain the name of an
5923 * unparsed entity
5924 */
5925 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005926 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005927 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005928 }
5929
5930 /*
5931 * [ WFC: No External Entity References ]
5932 * Attribute values cannot contain direct or indirect
5933 * entity references to external entities.
5934 */
5935 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5936 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005937 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005938 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 }
5940 /*
5941 * [ WFC: No < in Attribute Values ]
5942 * The replacement text of any entity referred to directly or
5943 * indirectly in an attribute value (other than "&lt;") must
5944 * not contain a <.
5945 */
5946 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5947 (ent != NULL) &&
5948 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5949 (ent->content != NULL) &&
5950 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005951 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5952 "'<' in entity '%s' is not allowed in attributes values\n",
5953 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005954 }
5955
5956 /*
5957 * Internal check, no parameter entities here ...
5958 */
5959 else {
5960 switch (ent->etype) {
5961 case XML_INTERNAL_PARAMETER_ENTITY:
5962 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005963 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5964 "Attempt to reference the parameter entity '%s'\n",
5965 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005966 break;
5967 default:
5968 break;
5969 }
5970 }
5971
5972 /*
5973 * [ WFC: No Recursion ]
5974 * A parsed entity must not contain a recursive reference
5975 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005976 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005977 */
5978
5979 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005980 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005981 }
5982 xmlFree(name);
5983 }
5984 }
5985 *str = ptr;
5986 return(ent);
5987}
5988
5989/**
5990 * xmlParsePEReference:
5991 * @ctxt: an XML parser context
5992 *
5993 * parse PEReference declarations
5994 * The entity content is handled directly by pushing it's content as
5995 * a new input stream.
5996 *
5997 * [69] PEReference ::= '%' Name ';'
5998 *
5999 * [ WFC: No Recursion ]
6000 * A parsed entity must not contain a recursive
6001 * reference to itself, either directly or indirectly.
6002 *
6003 * [ WFC: Entity Declared ]
6004 * In a document without any DTD, a document with only an internal DTD
6005 * subset which contains no parameter entity references, or a document
6006 * with "standalone='yes'", ... ... The declaration of a parameter
6007 * entity must precede any reference to it...
6008 *
6009 * [ VC: Entity Declared ]
6010 * In a document with an external subset or external parameter entities
6011 * with "standalone='no'", ... ... The declaration of a parameter entity
6012 * must precede any reference to it...
6013 *
6014 * [ WFC: In DTD ]
6015 * Parameter-entity references may only appear in the DTD.
6016 * NOTE: misleading but this is handled.
6017 */
6018void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006019xmlParsePEReference(xmlParserCtxtPtr ctxt)
6020{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006021 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006022 xmlEntityPtr entity = NULL;
6023 xmlParserInputPtr input;
6024
6025 if (RAW == '%') {
6026 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006027 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006028 if (name == NULL) {
6029 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6030 "xmlParsePEReference: no name\n");
6031 } else {
6032 if (RAW == ';') {
6033 NEXT;
6034 if ((ctxt->sax != NULL) &&
6035 (ctxt->sax->getParameterEntity != NULL))
6036 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6037 name);
6038 if (entity == NULL) {
6039 /*
6040 * [ WFC: Entity Declared ]
6041 * In a document without any DTD, a document with only an
6042 * internal DTD subset which contains no parameter entity
6043 * references, or a document with "standalone='yes'", ...
6044 * ... The declaration of a parameter entity must precede
6045 * any reference to it...
6046 */
6047 if ((ctxt->standalone == 1) ||
6048 ((ctxt->hasExternalSubset == 0) &&
6049 (ctxt->hasPErefs == 0))) {
6050 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6051 "PEReference: %%%s; not found\n",
6052 name);
6053 } else {
6054 /*
6055 * [ VC: Entity Declared ]
6056 * In a document with an external subset or external
6057 * parameter entities with "standalone='no'", ...
6058 * ... The declaration of a parameter entity must
6059 * precede any reference to it...
6060 */
6061 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6062 "PEReference: %%%s; not found\n",
6063 name, NULL);
6064 ctxt->valid = 0;
6065 }
6066 } else {
6067 /*
6068 * Internal checking in case the entity quest barfed
6069 */
6070 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6071 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6072 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6073 "Internal: %%%s; is not a parameter entity\n",
6074 name, NULL);
6075 } else if (ctxt->input->free != deallocblankswrapper) {
6076 input =
6077 xmlNewBlanksWrapperInputStream(ctxt, entity);
6078 xmlPushInput(ctxt, input);
6079 } else {
6080 /*
6081 * TODO !!!
6082 * handle the extra spaces added before and after
6083 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6084 */
6085 input = xmlNewEntityInputStream(ctxt, entity);
6086 xmlPushInput(ctxt, input);
6087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006089 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006090 xmlParseTextDecl(ctxt);
6091 if (ctxt->errNo ==
6092 XML_ERR_UNSUPPORTED_ENCODING) {
6093 /*
6094 * The XML REC instructs us to stop parsing
6095 * right here
6096 */
6097 ctxt->instate = XML_PARSER_EOF;
6098 return;
6099 }
6100 }
6101 }
6102 }
6103 ctxt->hasPErefs = 1;
6104 } else {
6105 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6106 }
6107 }
Owen Taylor3473f882001-02-23 17:55:21 +00006108 }
6109}
6110
6111/**
6112 * xmlParseStringPEReference:
6113 * @ctxt: an XML parser context
6114 * @str: a pointer to an index in the string
6115 *
6116 * parse PEReference declarations
6117 *
6118 * [69] PEReference ::= '%' Name ';'
6119 *
6120 * [ WFC: No Recursion ]
6121 * A parsed entity must not contain a recursive
6122 * reference to itself, either directly or indirectly.
6123 *
6124 * [ WFC: Entity Declared ]
6125 * In a document without any DTD, a document with only an internal DTD
6126 * subset which contains no parameter entity references, or a document
6127 * with "standalone='yes'", ... ... The declaration of a parameter
6128 * entity must precede any reference to it...
6129 *
6130 * [ VC: Entity Declared ]
6131 * In a document with an external subset or external parameter entities
6132 * with "standalone='no'", ... ... The declaration of a parameter entity
6133 * must precede any reference to it...
6134 *
6135 * [ WFC: In DTD ]
6136 * Parameter-entity references may only appear in the DTD.
6137 * NOTE: misleading but this is handled.
6138 *
6139 * Returns the string of the entity content.
6140 * str is updated to the current value of the index
6141 */
6142xmlEntityPtr
6143xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6144 const xmlChar *ptr;
6145 xmlChar cur;
6146 xmlChar *name;
6147 xmlEntityPtr entity = NULL;
6148
6149 if ((str == NULL) || (*str == NULL)) return(NULL);
6150 ptr = *str;
6151 cur = *ptr;
6152 if (cur == '%') {
6153 ptr++;
6154 cur = *ptr;
6155 name = xmlParseStringName(ctxt, &ptr);
6156 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006157 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6158 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006159 } else {
6160 cur = *ptr;
6161 if (cur == ';') {
6162 ptr++;
6163 cur = *ptr;
6164 if ((ctxt->sax != NULL) &&
6165 (ctxt->sax->getParameterEntity != NULL))
6166 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6167 name);
6168 if (entity == NULL) {
6169 /*
6170 * [ WFC: Entity Declared ]
6171 * In a document without any DTD, a document with only an
6172 * internal DTD subset which contains no parameter entity
6173 * references, or a document with "standalone='yes'", ...
6174 * ... The declaration of a parameter entity must precede
6175 * any reference to it...
6176 */
6177 if ((ctxt->standalone == 1) ||
6178 ((ctxt->hasExternalSubset == 0) &&
6179 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006180 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006181 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006182 } else {
6183 /*
6184 * [ VC: Entity Declared ]
6185 * In a document with an external subset or external
6186 * parameter entities with "standalone='no'", ...
6187 * ... The declaration of a parameter entity must
6188 * precede any reference to it...
6189 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006190 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6191 "PEReference: %%%s; not found\n",
6192 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 ctxt->valid = 0;
6194 }
6195 } else {
6196 /*
6197 * Internal checking in case the entity quest barfed
6198 */
6199 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6200 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006201 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6202 "%%%s; is not a parameter entity\n",
6203 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006204 }
6205 }
6206 ctxt->hasPErefs = 1;
6207 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006208 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210 xmlFree(name);
6211 }
6212 }
6213 *str = ptr;
6214 return(entity);
6215}
6216
6217/**
6218 * xmlParseDocTypeDecl:
6219 * @ctxt: an XML parser context
6220 *
6221 * parse a DOCTYPE declaration
6222 *
6223 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6224 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6225 *
6226 * [ VC: Root Element Type ]
6227 * The Name in the document type declaration must match the element
6228 * type of the root element.
6229 */
6230
6231void
6232xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006233 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006234 xmlChar *ExternalID = NULL;
6235 xmlChar *URI = NULL;
6236
6237 /*
6238 * We know that '<!DOCTYPE' has been detected.
6239 */
6240 SKIP(9);
6241
6242 SKIP_BLANKS;
6243
6244 /*
6245 * Parse the DOCTYPE name.
6246 */
6247 name = xmlParseName(ctxt);
6248 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006249 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6250 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006251 }
6252 ctxt->intSubName = name;
6253
6254 SKIP_BLANKS;
6255
6256 /*
6257 * Check for SystemID and ExternalID
6258 */
6259 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6260
6261 if ((URI != NULL) || (ExternalID != NULL)) {
6262 ctxt->hasExternalSubset = 1;
6263 }
6264 ctxt->extSubURI = URI;
6265 ctxt->extSubSystem = ExternalID;
6266
6267 SKIP_BLANKS;
6268
6269 /*
6270 * Create and update the internal subset.
6271 */
6272 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6273 (!ctxt->disableSAX))
6274 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6275
6276 /*
6277 * Is there any internal subset declarations ?
6278 * they are handled separately in xmlParseInternalSubset()
6279 */
6280 if (RAW == '[')
6281 return;
6282
6283 /*
6284 * We should be at the end of the DOCTYPE declaration.
6285 */
6286 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006287 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006288 }
6289 NEXT;
6290}
6291
6292/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006293 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006294 * @ctxt: an XML parser context
6295 *
6296 * parse the internal subset declaration
6297 *
6298 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6299 */
6300
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006301static void
Owen Taylor3473f882001-02-23 17:55:21 +00006302xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6303 /*
6304 * Is there any DTD definition ?
6305 */
6306 if (RAW == '[') {
6307 ctxt->instate = XML_PARSER_DTD;
6308 NEXT;
6309 /*
6310 * Parse the succession of Markup declarations and
6311 * PEReferences.
6312 * Subsequence (markupdecl | PEReference | S)*
6313 */
6314 while (RAW != ']') {
6315 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006316 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006317
6318 SKIP_BLANKS;
6319 xmlParseMarkupDecl(ctxt);
6320 xmlParsePEReference(ctxt);
6321
6322 /*
6323 * Pop-up of finished entities.
6324 */
6325 while ((RAW == 0) && (ctxt->inputNr > 1))
6326 xmlPopInput(ctxt);
6327
6328 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006329 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006330 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006331 break;
6332 }
6333 }
6334 if (RAW == ']') {
6335 NEXT;
6336 SKIP_BLANKS;
6337 }
6338 }
6339
6340 /*
6341 * We should be at the end of the DOCTYPE declaration.
6342 */
6343 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006344 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006345 }
6346 NEXT;
6347}
6348
Daniel Veillard81273902003-09-30 00:43:48 +00006349#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006350/**
6351 * xmlParseAttribute:
6352 * @ctxt: an XML parser context
6353 * @value: a xmlChar ** used to store the value of the attribute
6354 *
6355 * parse an attribute
6356 *
6357 * [41] Attribute ::= Name Eq AttValue
6358 *
6359 * [ WFC: No External Entity References ]
6360 * Attribute values cannot contain direct or indirect entity references
6361 * to external entities.
6362 *
6363 * [ WFC: No < in Attribute Values ]
6364 * The replacement text of any entity referred to directly or indirectly in
6365 * an attribute value (other than "&lt;") must not contain a <.
6366 *
6367 * [ VC: Attribute Value Type ]
6368 * The attribute must have been declared; the value must be of the type
6369 * declared for it.
6370 *
6371 * [25] Eq ::= S? '=' S?
6372 *
6373 * With namespace:
6374 *
6375 * [NS 11] Attribute ::= QName Eq AttValue
6376 *
6377 * Also the case QName == xmlns:??? is handled independently as a namespace
6378 * definition.
6379 *
6380 * Returns the attribute name, and the value in *value.
6381 */
6382
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006383const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006384xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006385 const xmlChar *name;
6386 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006387
6388 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006389 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006390 name = xmlParseName(ctxt);
6391 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006392 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006393 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006394 return(NULL);
6395 }
6396
6397 /*
6398 * read the value
6399 */
6400 SKIP_BLANKS;
6401 if (RAW == '=') {
6402 NEXT;
6403 SKIP_BLANKS;
6404 val = xmlParseAttValue(ctxt);
6405 ctxt->instate = XML_PARSER_CONTENT;
6406 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006407 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006408 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006409 return(NULL);
6410 }
6411
6412 /*
6413 * Check that xml:lang conforms to the specification
6414 * No more registered as an error, just generate a warning now
6415 * since this was deprecated in XML second edition
6416 */
6417 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6418 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006419 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6420 "Malformed value for xml:lang : %s\n",
6421 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423 }
6424
6425 /*
6426 * Check that xml:space conforms to the specification
6427 */
6428 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6429 if (xmlStrEqual(val, BAD_CAST "default"))
6430 *(ctxt->space) = 0;
6431 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6432 *(ctxt->space) = 1;
6433 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006434 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006435"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006436 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006437 }
6438 }
6439
6440 *value = val;
6441 return(name);
6442}
6443
6444/**
6445 * xmlParseStartTag:
6446 * @ctxt: an XML parser context
6447 *
6448 * parse a start of tag either for rule element or
6449 * EmptyElement. In both case we don't parse the tag closing chars.
6450 *
6451 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6452 *
6453 * [ WFC: Unique Att Spec ]
6454 * No attribute name may appear more than once in the same start-tag or
6455 * empty-element tag.
6456 *
6457 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6458 *
6459 * [ WFC: Unique Att Spec ]
6460 * No attribute name may appear more than once in the same start-tag or
6461 * empty-element tag.
6462 *
6463 * With namespace:
6464 *
6465 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6466 *
6467 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6468 *
6469 * Returns the element name parsed
6470 */
6471
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006472const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006473xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006474 const xmlChar *name;
6475 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006477 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006478 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006479 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006480 int i;
6481
6482 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006483 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006484
6485 name = xmlParseName(ctxt);
6486 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006487 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006488 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006489 return(NULL);
6490 }
6491
6492 /*
6493 * Now parse the attributes, it ends up with the ending
6494 *
6495 * (S Attribute)* S?
6496 */
6497 SKIP_BLANKS;
6498 GROW;
6499
Daniel Veillard21a0f912001-02-25 19:54:14 +00006500 while ((RAW != '>') &&
6501 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006502 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006503 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006504 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006505
6506 attname = xmlParseAttribute(ctxt, &attvalue);
6507 if ((attname != NULL) && (attvalue != NULL)) {
6508 /*
6509 * [ WFC: Unique Att Spec ]
6510 * No attribute name may appear more than once in the same
6511 * start-tag or empty-element tag.
6512 */
6513 for (i = 0; i < nbatts;i += 2) {
6514 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006515 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006516 xmlFree(attvalue);
6517 goto failed;
6518 }
6519 }
Owen Taylor3473f882001-02-23 17:55:21 +00006520 /*
6521 * Add the pair to atts
6522 */
6523 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006524 maxatts = 22; /* allow for 10 attrs by default */
6525 atts = (const xmlChar **)
6526 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006527 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006528 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006529 if (attvalue != NULL)
6530 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006531 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006532 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006533 ctxt->atts = atts;
6534 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006536 const xmlChar **n;
6537
Owen Taylor3473f882001-02-23 17:55:21 +00006538 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006539 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006540 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006541 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006542 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006543 if (attvalue != NULL)
6544 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006545 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006546 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006547 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006548 ctxt->atts = atts;
6549 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006550 }
6551 atts[nbatts++] = attname;
6552 atts[nbatts++] = attvalue;
6553 atts[nbatts] = NULL;
6554 atts[nbatts + 1] = NULL;
6555 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006556 if (attvalue != NULL)
6557 xmlFree(attvalue);
6558 }
6559
6560failed:
6561
Daniel Veillard3772de32002-12-17 10:31:45 +00006562 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006563 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6564 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006565 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006566 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6567 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006568 }
6569 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006570 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6571 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006572 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6573 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006574 break;
6575 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006576 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006577 GROW;
6578 }
6579
6580 /*
6581 * SAX: Start of Element !
6582 */
6583 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006584 (!ctxt->disableSAX)) {
6585 if (nbatts > 0)
6586 ctxt->sax->startElement(ctxt->userData, name, atts);
6587 else
6588 ctxt->sax->startElement(ctxt->userData, name, NULL);
6589 }
Owen Taylor3473f882001-02-23 17:55:21 +00006590
6591 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006592 /* Free only the content strings */
6593 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006594 if (atts[i] != NULL)
6595 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006596 }
6597 return(name);
6598}
6599
6600/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006601 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006602 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006603 * @line: line of the start tag
6604 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006605 *
6606 * parse an end of tag
6607 *
6608 * [42] ETag ::= '</' Name S? '>'
6609 *
6610 * With namespace
6611 *
6612 * [NS 9] ETag ::= '</' QName S? '>'
6613 */
6614
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006615static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006616xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006617 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006618
6619 GROW;
6620 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006621 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006622 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006623 return;
6624 }
6625 SKIP(2);
6626
Daniel Veillard46de64e2002-05-29 08:21:33 +00006627 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006628
6629 /*
6630 * We should definitely be at the ending "S? '>'" part
6631 */
6632 GROW;
6633 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006634 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006635 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006637 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006638
6639 /*
6640 * [ WFC: Element Type Match ]
6641 * The Name in an element's end-tag must match the element type in the
6642 * start-tag.
6643 *
6644 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006645 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006646 if (name == NULL) name = BAD_CAST "unparseable";
6647 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006648 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006649 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006650 }
6651
6652 /*
6653 * SAX: End of Tag
6654 */
6655 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6656 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006657 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006658
Daniel Veillarde57ec792003-09-10 10:50:59 +00006659 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006661 return;
6662}
6663
6664/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006665 * xmlParseEndTag:
6666 * @ctxt: an XML parser context
6667 *
6668 * parse an end of tag
6669 *
6670 * [42] ETag ::= '</' Name S? '>'
6671 *
6672 * With namespace
6673 *
6674 * [NS 9] ETag ::= '</' QName S? '>'
6675 */
6676
6677void
6678xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006679 xmlParseEndTag1(ctxt, 0);
6680}
Daniel Veillard81273902003-09-30 00:43:48 +00006681#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006682
6683/************************************************************************
6684 * *
6685 * SAX 2 specific operations *
6686 * *
6687 ************************************************************************/
6688
6689static const xmlChar *
6690xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6691 int len = 0, l;
6692 int c;
6693 int count = 0;
6694
6695 /*
6696 * Handler for more complex cases
6697 */
6698 GROW;
6699 c = CUR_CHAR(l);
6700 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006701 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006702 return(NULL);
6703 }
6704
6705 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006706 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006707 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006708 (IS_COMBINING(c)) ||
6709 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006710 if (count++ > 100) {
6711 count = 0;
6712 GROW;
6713 }
6714 len += l;
6715 NEXTL(l);
6716 c = CUR_CHAR(l);
6717 }
6718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6719}
6720
6721/*
6722 * xmlGetNamespace:
6723 * @ctxt: an XML parser context
6724 * @prefix: the prefix to lookup
6725 *
6726 * Lookup the namespace name for the @prefix (which ca be NULL)
6727 * The prefix must come from the @ctxt->dict dictionnary
6728 *
6729 * Returns the namespace name or NULL if not bound
6730 */
6731static const xmlChar *
6732xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6733 int i;
6734
Daniel Veillarde57ec792003-09-10 10:50:59 +00006735 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006736 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006737 if (ctxt->nsTab[i] == prefix) {
6738 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6739 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006740 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006741 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006742 return(NULL);
6743}
6744
6745/**
6746 * xmlParseNCName:
6747 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006748 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006749 *
6750 * parse an XML name.
6751 *
6752 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6753 * CombiningChar | Extender
6754 *
6755 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6756 *
6757 * Returns the Name parsed or NULL
6758 */
6759
6760static const xmlChar *
6761xmlParseNCName(xmlParserCtxtPtr ctxt) {
6762 const xmlChar *in;
6763 const xmlChar *ret;
6764 int count = 0;
6765
6766 /*
6767 * Accelerator for simple ASCII names
6768 */
6769 in = ctxt->input->cur;
6770 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6771 ((*in >= 0x41) && (*in <= 0x5A)) ||
6772 (*in == '_')) {
6773 in++;
6774 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6775 ((*in >= 0x41) && (*in <= 0x5A)) ||
6776 ((*in >= 0x30) && (*in <= 0x39)) ||
6777 (*in == '_') || (*in == '-') ||
6778 (*in == '.'))
6779 in++;
6780 if ((*in > 0) && (*in < 0x80)) {
6781 count = in - ctxt->input->cur;
6782 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6783 ctxt->input->cur = in;
6784 ctxt->nbChars += count;
6785 ctxt->input->col += count;
6786 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006787 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006788 }
6789 return(ret);
6790 }
6791 }
6792 return(xmlParseNCNameComplex(ctxt));
6793}
6794
6795/**
6796 * xmlParseQName:
6797 * @ctxt: an XML parser context
6798 * @prefix: pointer to store the prefix part
6799 *
6800 * parse an XML Namespace QName
6801 *
6802 * [6] QName ::= (Prefix ':')? LocalPart
6803 * [7] Prefix ::= NCName
6804 * [8] LocalPart ::= NCName
6805 *
6806 * Returns the Name parsed or NULL
6807 */
6808
6809static const xmlChar *
6810xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6811 const xmlChar *l, *p;
6812
6813 GROW;
6814
6815 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006816 if (l == NULL) {
6817 if (CUR == ':') {
6818 l = xmlParseName(ctxt);
6819 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006820 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6821 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006822 *prefix = NULL;
6823 return(l);
6824 }
6825 }
6826 return(NULL);
6827 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006828 if (CUR == ':') {
6829 NEXT;
6830 p = l;
6831 l = xmlParseNCName(ctxt);
6832 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006833 xmlChar *tmp;
6834
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006835 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6836 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006837 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6838 p = xmlDictLookup(ctxt->dict, tmp, -1);
6839 if (tmp != NULL) xmlFree(tmp);
6840 *prefix = NULL;
6841 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006842 }
6843 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006844 xmlChar *tmp;
6845
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006846 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6847 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006848 NEXT;
6849 tmp = (xmlChar *) xmlParseName(ctxt);
6850 if (tmp != NULL) {
6851 tmp = xmlBuildQName(tmp, l, NULL, 0);
6852 l = xmlDictLookup(ctxt->dict, tmp, -1);
6853 if (tmp != NULL) xmlFree(tmp);
6854 *prefix = p;
6855 return(l);
6856 }
6857 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6858 l = xmlDictLookup(ctxt->dict, tmp, -1);
6859 if (tmp != NULL) xmlFree(tmp);
6860 *prefix = p;
6861 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006862 }
6863 *prefix = p;
6864 } else
6865 *prefix = NULL;
6866 return(l);
6867}
6868
6869/**
6870 * xmlParseQNameAndCompare:
6871 * @ctxt: an XML parser context
6872 * @name: the localname
6873 * @prefix: the prefix, if any.
6874 *
6875 * parse an XML name and compares for match
6876 * (specialized for endtag parsing)
6877 *
6878 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6879 * and the name for mismatch
6880 */
6881
6882static const xmlChar *
6883xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6884 xmlChar const *prefix) {
6885 const xmlChar *cmp = name;
6886 const xmlChar *in;
6887 const xmlChar *ret;
6888 const xmlChar *prefix2;
6889
6890 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6891
6892 GROW;
6893 in = ctxt->input->cur;
6894
6895 cmp = prefix;
6896 while (*in != 0 && *in == *cmp) {
6897 ++in;
6898 ++cmp;
6899 }
6900 if ((*cmp == 0) && (*in == ':')) {
6901 in++;
6902 cmp = name;
6903 while (*in != 0 && *in == *cmp) {
6904 ++in;
6905 ++cmp;
6906 }
William M. Brack76e95df2003-10-18 16:20:14 +00006907 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006908 /* success */
6909 ctxt->input->cur = in;
6910 return((const xmlChar*) 1);
6911 }
6912 }
6913 /*
6914 * all strings coms from the dictionary, equality can be done directly
6915 */
6916 ret = xmlParseQName (ctxt, &prefix2);
6917 if ((ret == name) && (prefix == prefix2))
6918 return((const xmlChar*) 1);
6919 return ret;
6920}
6921
6922/**
6923 * xmlParseAttValueInternal:
6924 * @ctxt: an XML parser context
6925 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006926 * @alloc: whether the attribute was reallocated as a new string
6927 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006928 *
6929 * parse a value for an attribute.
6930 * NOTE: if no normalization is needed, the routine will return pointers
6931 * directly from the data buffer.
6932 *
6933 * 3.3.3 Attribute-Value Normalization:
6934 * Before the value of an attribute is passed to the application or
6935 * checked for validity, the XML processor must normalize it as follows:
6936 * - a character reference is processed by appending the referenced
6937 * character to the attribute value
6938 * - an entity reference is processed by recursively processing the
6939 * replacement text of the entity
6940 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6941 * appending #x20 to the normalized value, except that only a single
6942 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6943 * parsed entity or the literal entity value of an internal parsed entity
6944 * - other characters are processed by appending them to the normalized value
6945 * If the declared value is not CDATA, then the XML processor must further
6946 * process the normalized attribute value by discarding any leading and
6947 * trailing space (#x20) characters, and by replacing sequences of space
6948 * (#x20) characters by a single space (#x20) character.
6949 * All attributes for which no declaration has been read should be treated
6950 * by a non-validating parser as if declared CDATA.
6951 *
6952 * Returns the AttValue parsed or NULL. The value has to be freed by the
6953 * caller if it was copied, this can be detected by val[*len] == 0.
6954 */
6955
6956static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006957xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6958 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006959{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006960 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006961 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006962 xmlChar *ret = NULL;
6963
6964 GROW;
6965 in = (xmlChar *) CUR_PTR;
6966 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006967 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006968 return (NULL);
6969 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006970 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006971
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006972 /*
6973 * try to handle in this routine the most common case where no
6974 * allocation of a new string is required and where content is
6975 * pure ASCII.
6976 */
6977 limit = *in++;
6978 end = ctxt->input->end;
6979 start = in;
6980 if (in >= end) {
6981 const xmlChar *oldbase = ctxt->input->base;
6982 GROW;
6983 if (oldbase != ctxt->input->base) {
6984 long delta = ctxt->input->base - oldbase;
6985 start = start + delta;
6986 in = in + delta;
6987 }
6988 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006989 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006990 if (normalize) {
6991 /*
6992 * Skip any leading spaces
6993 */
6994 while ((in < end) && (*in != limit) &&
6995 ((*in == 0x20) || (*in == 0x9) ||
6996 (*in == 0xA) || (*in == 0xD))) {
6997 in++;
6998 start = in;
6999 if (in >= end) {
7000 const xmlChar *oldbase = ctxt->input->base;
7001 GROW;
7002 if (oldbase != ctxt->input->base) {
7003 long delta = ctxt->input->base - oldbase;
7004 start = start + delta;
7005 in = in + delta;
7006 }
7007 end = ctxt->input->end;
7008 }
7009 }
7010 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7011 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7012 if ((*in++ == 0x20) && (*in == 0x20)) break;
7013 if (in >= end) {
7014 const xmlChar *oldbase = ctxt->input->base;
7015 GROW;
7016 if (oldbase != ctxt->input->base) {
7017 long delta = ctxt->input->base - oldbase;
7018 start = start + delta;
7019 in = in + delta;
7020 }
7021 end = ctxt->input->end;
7022 }
7023 }
7024 last = in;
7025 /*
7026 * skip the trailing blanks
7027 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007028 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007029 while ((in < end) && (*in != limit) &&
7030 ((*in == 0x20) || (*in == 0x9) ||
7031 (*in == 0xA) || (*in == 0xD))) {
7032 in++;
7033 if (in >= end) {
7034 const xmlChar *oldbase = ctxt->input->base;
7035 GROW;
7036 if (oldbase != ctxt->input->base) {
7037 long delta = ctxt->input->base - oldbase;
7038 start = start + delta;
7039 in = in + delta;
7040 last = last + delta;
7041 }
7042 end = ctxt->input->end;
7043 }
7044 }
7045 if (*in != limit) goto need_complex;
7046 } else {
7047 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7048 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7049 in++;
7050 if (in >= end) {
7051 const xmlChar *oldbase = ctxt->input->base;
7052 GROW;
7053 if (oldbase != ctxt->input->base) {
7054 long delta = ctxt->input->base - oldbase;
7055 start = start + delta;
7056 in = in + delta;
7057 }
7058 end = ctxt->input->end;
7059 }
7060 }
7061 last = in;
7062 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007064 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007065 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007066 *len = last - start;
7067 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007068 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007069 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007070 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007071 }
7072 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007073 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007074 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007075need_complex:
7076 if (alloc) *alloc = 1;
7077 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007078}
7079
7080/**
7081 * xmlParseAttribute2:
7082 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007083 * @pref: the element prefix
7084 * @elem: the element name
7085 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007086 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007087 * @len: an int * to save the length of the attribute
7088 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007089 *
7090 * parse an attribute in the new SAX2 framework.
7091 *
7092 * Returns the attribute name, and the value in *value, .
7093 */
7094
7095static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007096xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7097 const xmlChar *pref, const xmlChar *elem,
7098 const xmlChar **prefix, xmlChar **value,
7099 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007100 const xmlChar *name;
7101 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007102 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007103
7104 *value = NULL;
7105 GROW;
7106 name = xmlParseQName(ctxt, prefix);
7107 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7109 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007110 return(NULL);
7111 }
7112
7113 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007114 * get the type if needed
7115 */
7116 if (ctxt->attsSpecial != NULL) {
7117 int type;
7118
7119 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7120 pref, elem, *prefix, name);
7121 if (type != 0) normalize = 1;
7122 }
7123
7124 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007125 * read the value
7126 */
7127 SKIP_BLANKS;
7128 if (RAW == '=') {
7129 NEXT;
7130 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007131 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007132 ctxt->instate = XML_PARSER_CONTENT;
7133 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007134 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007135 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007136 return(NULL);
7137 }
7138
7139 /*
7140 * Check that xml:lang conforms to the specification
7141 * No more registered as an error, just generate a warning now
7142 * since this was deprecated in XML second edition
7143 */
7144 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7145 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007146 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7147 "Malformed value for xml:lang : %s\n",
7148 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 }
7150 }
7151
7152 /*
7153 * Check that xml:space conforms to the specification
7154 */
7155 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7156 if (xmlStrEqual(val, BAD_CAST "default"))
7157 *(ctxt->space) = 0;
7158 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7159 *(ctxt->space) = 1;
7160 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007161 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007162"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7163 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007164 }
7165 }
7166
7167 *value = val;
7168 return(name);
7169}
7170
7171/**
7172 * xmlParseStartTag2:
7173 * @ctxt: an XML parser context
7174 *
7175 * parse a start of tag either for rule element or
7176 * EmptyElement. In both case we don't parse the tag closing chars.
7177 * This routine is called when running SAX2 parsing
7178 *
7179 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7180 *
7181 * [ WFC: Unique Att Spec ]
7182 * No attribute name may appear more than once in the same start-tag or
7183 * empty-element tag.
7184 *
7185 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7186 *
7187 * [ WFC: Unique Att Spec ]
7188 * No attribute name may appear more than once in the same start-tag or
7189 * empty-element tag.
7190 *
7191 * With namespace:
7192 *
7193 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7194 *
7195 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7196 *
7197 * Returns the element name parsed
7198 */
7199
7200static const xmlChar *
7201xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007202 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007203 const xmlChar *localname;
7204 const xmlChar *prefix;
7205 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007206 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007207 const xmlChar *nsname;
7208 xmlChar *attvalue;
7209 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007210 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007211 int nratts, nbatts, nbdef;
7212 int i, j, nbNs, attval;
7213 const xmlChar *base;
7214 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007215
7216 if (RAW != '<') return(NULL);
7217 NEXT1;
7218
7219 /*
7220 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7221 * point since the attribute values may be stored as pointers to
7222 * the buffer and calling SHRINK would destroy them !
7223 * The Shrinking is only possible once the full set of attribute
7224 * callbacks have been done.
7225 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007226reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007227 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007228 base = ctxt->input->base;
7229 cur = ctxt->input->cur - ctxt->input->base;
7230 nbatts = 0;
7231 nratts = 0;
7232 nbdef = 0;
7233 nbNs = 0;
7234 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007235
7236 localname = xmlParseQName(ctxt, &prefix);
7237 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007238 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7239 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007240 return(NULL);
7241 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007242 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007243
7244 /*
7245 * Now parse the attributes, it ends up with the ending
7246 *
7247 * (S Attribute)* S?
7248 */
7249 SKIP_BLANKS;
7250 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007251 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252
7253 while ((RAW != '>') &&
7254 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007255 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007256 const xmlChar *q = CUR_PTR;
7257 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007258 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007259
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007260 attname = xmlParseAttribute2(ctxt, prefix, localname,
7261 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007262 if ((attname != NULL) && (attvalue != NULL)) {
7263 if (len < 0) len = xmlStrlen(attvalue);
7264 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007265 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7266 xmlURIPtr uri;
7267
7268 if (*URL != 0) {
7269 uri = xmlParseURI((const char *) URL);
7270 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007271 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7272 "xmlns: %s not a valid URI\n",
7273 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007274 } else {
7275 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007276 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7277 "xmlns: URI %s is not absolute\n",
7278 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007279 }
7280 xmlFreeURI(uri);
7281 }
7282 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007283 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007284 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007285 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007286 for (j = 1;j <= nbNs;j++)
7287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7288 break;
7289 if (j <= nbNs)
7290 xmlErrAttributeDup(ctxt, NULL, attname);
7291 else
7292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007293 if (alloc != 0) xmlFree(attvalue);
7294 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007295 continue;
7296 }
7297 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007298 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7299 xmlURIPtr uri;
7300
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007301 if (attname == ctxt->str_xml) {
7302 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7304 "xml namespace prefix mapped to wrong URI\n",
7305 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007306 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007307 /*
7308 * Do not keep a namespace definition node
7309 */
7310 if (alloc != 0) xmlFree(attvalue);
7311 SKIP_BLANKS;
7312 continue;
7313 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007314 uri = xmlParseURI((const char *) URL);
7315 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007316 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7317 "xmlns:%s: '%s' is not a valid URI\n",
7318 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007319 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007320 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007321 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7322 "xmlns:%s: URI %s is not absolute\n",
7323 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007324 }
7325 xmlFreeURI(uri);
7326 }
7327
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007329 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007330 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007331 for (j = 1;j <= nbNs;j++)
7332 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7333 break;
7334 if (j <= nbNs)
7335 xmlErrAttributeDup(ctxt, aprefix, attname);
7336 else
7337 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007338 if (alloc != 0) xmlFree(attvalue);
7339 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007340 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007341 continue;
7342 }
7343
7344 /*
7345 * Add the pair to atts
7346 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007347 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7348 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007349 if (attvalue[len] == 0)
7350 xmlFree(attvalue);
7351 goto failed;
7352 }
7353 maxatts = ctxt->maxatts;
7354 atts = ctxt->atts;
7355 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007356 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007357 atts[nbatts++] = attname;
7358 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007359 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007360 atts[nbatts++] = attvalue;
7361 attvalue += len;
7362 atts[nbatts++] = attvalue;
7363 /*
7364 * tag if some deallocation is needed
7365 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007366 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007367 } else {
7368 if ((attvalue != NULL) && (attvalue[len] == 0))
7369 xmlFree(attvalue);
7370 }
7371
7372failed:
7373
7374 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007375 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007376 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7377 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007378 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007379 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7380 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 }
7382 SKIP_BLANKS;
7383 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7384 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007385 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007386 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 break;
7388 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 }
7392
Daniel Veillard0fb18932003-09-07 09:14:37 +00007393 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007394 * The attributes defaulting
7395 */
7396 if (ctxt->attsDefault != NULL) {
7397 xmlDefAttrsPtr defaults;
7398
7399 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7400 if (defaults != NULL) {
7401 for (i = 0;i < defaults->nbAttrs;i++) {
7402 attname = defaults->values[4 * i];
7403 aprefix = defaults->values[4 * i + 1];
7404
7405 /*
7406 * special work for namespaces defaulted defs
7407 */
7408 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7409 /*
7410 * check that it's not a defined namespace
7411 */
7412 for (j = 1;j <= nbNs;j++)
7413 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7414 break;
7415 if (j <= nbNs) continue;
7416
7417 nsname = xmlGetNamespace(ctxt, NULL);
7418 if (nsname != defaults->values[4 * i + 2]) {
7419 if (nsPush(ctxt, NULL,
7420 defaults->values[4 * i + 2]) > 0)
7421 nbNs++;
7422 }
7423 } else if (aprefix == ctxt->str_xmlns) {
7424 /*
7425 * check that it's not a defined namespace
7426 */
7427 for (j = 1;j <= nbNs;j++)
7428 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7429 break;
7430 if (j <= nbNs) continue;
7431
7432 nsname = xmlGetNamespace(ctxt, attname);
7433 if (nsname != defaults->values[2]) {
7434 if (nsPush(ctxt, attname,
7435 defaults->values[4 * i + 2]) > 0)
7436 nbNs++;
7437 }
7438 } else {
7439 /*
7440 * check that it's not a defined attribute
7441 */
7442 for (j = 0;j < nbatts;j+=5) {
7443 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7444 break;
7445 }
7446 if (j < nbatts) continue;
7447
7448 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7449 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007450 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007451 }
7452 maxatts = ctxt->maxatts;
7453 atts = ctxt->atts;
7454 }
7455 atts[nbatts++] = attname;
7456 atts[nbatts++] = aprefix;
7457 if (aprefix == NULL)
7458 atts[nbatts++] = NULL;
7459 else
7460 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7461 atts[nbatts++] = defaults->values[4 * i + 2];
7462 atts[nbatts++] = defaults->values[4 * i + 3];
7463 nbdef++;
7464 }
7465 }
7466 }
7467 }
7468
Daniel Veillarde70c8772003-11-25 07:21:18 +00007469 /*
7470 * The attributes checkings
7471 */
7472 for (i = 0; i < nbatts;i += 5) {
7473 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7474 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7475 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7476 "Namespace prefix %s for %s on %s is not defined\n",
7477 atts[i + 1], atts[i], localname);
7478 }
7479 atts[i + 2] = nsname;
7480 /*
7481 * [ WFC: Unique Att Spec ]
7482 * No attribute name may appear more than once in the same
7483 * start-tag or empty-element tag.
7484 * As extended by the Namespace in XML REC.
7485 */
7486 for (j = 0; j < i;j += 5) {
7487 if (atts[i] == atts[j]) {
7488 if (atts[i+1] == atts[j+1]) {
7489 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7490 break;
7491 }
7492 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7493 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7494 "Namespaced Attribute %s in '%s' redefined\n",
7495 atts[i], nsname, NULL);
7496 break;
7497 }
7498 }
7499 }
7500 }
7501
Daniel Veillarde57ec792003-09-10 10:50:59 +00007502 nsname = xmlGetNamespace(ctxt, prefix);
7503 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007504 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7505 "Namespace prefix %s on %s is not defined\n",
7506 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007507 }
7508 *pref = prefix;
7509 *URI = nsname;
7510
7511 /*
7512 * SAX: Start of Element !
7513 */
7514 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7515 (!ctxt->disableSAX)) {
7516 if (nbNs > 0)
7517 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7518 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7519 nbatts / 5, nbdef, atts);
7520 else
7521 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7522 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7523 }
7524
7525 /*
7526 * Free up attribute allocated strings if needed
7527 */
7528 if (attval != 0) {
7529 for (i = 3,j = 0; j < nratts;i += 5,j++)
7530 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7531 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007532 }
7533
7534 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007535
7536base_changed:
7537 /*
7538 * the attribute strings are valid iif the base didn't changed
7539 */
7540 if (attval != 0) {
7541 for (i = 3,j = 0; j < nratts;i += 5,j++)
7542 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7543 xmlFree((xmlChar *) atts[i]);
7544 }
7545 ctxt->input->cur = ctxt->input->base + cur;
7546 if (ctxt->wellFormed == 1) {
7547 goto reparse;
7548 }
7549 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007550}
7551
7552/**
7553 * xmlParseEndTag2:
7554 * @ctxt: an XML parser context
7555 * @line: line of the start tag
7556 * @nsNr: number of namespaces on the start tag
7557 *
7558 * parse an end of tag
7559 *
7560 * [42] ETag ::= '</' Name S? '>'
7561 *
7562 * With namespace
7563 *
7564 * [NS 9] ETag ::= '</' QName S? '>'
7565 */
7566
7567static void
7568xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007569 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007570 const xmlChar *name;
7571
7572 GROW;
7573 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007574 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007575 return;
7576 }
7577 SKIP(2);
7578
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007579 if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
7580 if (ctxt->input->cur[tlen] == '>') {
7581 ctxt->input->cur += tlen + 1;
7582 goto done;
7583 }
7584 ctxt->input->cur += tlen;
7585 name = (xmlChar*)1;
7586 } else {
7587 if (prefix == NULL)
7588 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7589 else
7590 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7591 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007592
7593 /*
7594 * We should definitely be at the ending "S? '>'" part
7595 */
7596 GROW;
7597 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007598 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007599 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007600 } else
7601 NEXT1;
7602
7603 /*
7604 * [ WFC: Element Type Match ]
7605 * The Name in an element's end-tag must match the element type in the
7606 * start-tag.
7607 *
7608 */
7609 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007610 if (name == NULL) name = BAD_CAST "unparseable";
7611 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007612 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007613 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007614 }
7615
7616 /*
7617 * SAX: End of Tag
7618 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007619done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007620 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7621 (!ctxt->disableSAX))
7622 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7623
Daniel Veillard0fb18932003-09-07 09:14:37 +00007624 spacePop(ctxt);
7625 if (nsNr != 0)
7626 nsPop(ctxt, nsNr);
7627 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007628}
7629
7630/**
Owen Taylor3473f882001-02-23 17:55:21 +00007631 * xmlParseCDSect:
7632 * @ctxt: an XML parser context
7633 *
7634 * Parse escaped pure raw content.
7635 *
7636 * [18] CDSect ::= CDStart CData CDEnd
7637 *
7638 * [19] CDStart ::= '<![CDATA['
7639 *
7640 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7641 *
7642 * [21] CDEnd ::= ']]>'
7643 */
7644void
7645xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7646 xmlChar *buf = NULL;
7647 int len = 0;
7648 int size = XML_PARSER_BUFFER_SIZE;
7649 int r, rl;
7650 int s, sl;
7651 int cur, l;
7652 int count = 0;
7653
Daniel Veillard8f597c32003-10-06 08:19:27 +00007654 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007655 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007656 SKIP(9);
7657 } else
7658 return;
7659
7660 ctxt->instate = XML_PARSER_CDATA_SECTION;
7661 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007662 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007663 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007664 ctxt->instate = XML_PARSER_CONTENT;
7665 return;
7666 }
7667 NEXTL(rl);
7668 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007669 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007670 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007671 ctxt->instate = XML_PARSER_CONTENT;
7672 return;
7673 }
7674 NEXTL(sl);
7675 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007676 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007677 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007678 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007679 return;
7680 }
William M. Brack871611b2003-10-18 04:53:14 +00007681 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007682 ((r != ']') || (s != ']') || (cur != '>'))) {
7683 if (len + 5 >= size) {
7684 size *= 2;
7685 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7686 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007687 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007688 return;
7689 }
7690 }
7691 COPY_BUF(rl,buf,len,r);
7692 r = s;
7693 rl = sl;
7694 s = cur;
7695 sl = l;
7696 count++;
7697 if (count > 50) {
7698 GROW;
7699 count = 0;
7700 }
7701 NEXTL(l);
7702 cur = CUR_CHAR(l);
7703 }
7704 buf[len] = 0;
7705 ctxt->instate = XML_PARSER_CONTENT;
7706 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007707 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007708 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007709 xmlFree(buf);
7710 return;
7711 }
7712 NEXTL(l);
7713
7714 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007715 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007716 */
7717 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7718 if (ctxt->sax->cdataBlock != NULL)
7719 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007720 else if (ctxt->sax->characters != NULL)
7721 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007722 }
7723 xmlFree(buf);
7724}
7725
7726/**
7727 * xmlParseContent:
7728 * @ctxt: an XML parser context
7729 *
7730 * Parse a content:
7731 *
7732 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7733 */
7734
7735void
7736xmlParseContent(xmlParserCtxtPtr ctxt) {
7737 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007738 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007739 ((RAW != '<') || (NXT(1) != '/'))) {
7740 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007741 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007742 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007743
7744 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007745 * First case : a Processing Instruction.
7746 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007747 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007748 xmlParsePI(ctxt);
7749 }
7750
7751 /*
7752 * Second case : a CDSection
7753 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007754 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007755 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007756 xmlParseCDSect(ctxt);
7757 }
7758
7759 /*
7760 * Third case : a comment
7761 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007762 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007763 (NXT(2) == '-') && (NXT(3) == '-')) {
7764 xmlParseComment(ctxt);
7765 ctxt->instate = XML_PARSER_CONTENT;
7766 }
7767
7768 /*
7769 * Fourth case : a sub-element.
7770 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007771 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007772 xmlParseElement(ctxt);
7773 }
7774
7775 /*
7776 * Fifth case : a reference. If if has not been resolved,
7777 * parsing returns it's Name, create the node
7778 */
7779
Daniel Veillard21a0f912001-02-25 19:54:14 +00007780 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007781 xmlParseReference(ctxt);
7782 }
7783
7784 /*
7785 * Last case, text. Note that References are handled directly.
7786 */
7787 else {
7788 xmlParseCharData(ctxt, 0);
7789 }
7790
7791 GROW;
7792 /*
7793 * Pop-up of finished entities.
7794 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007795 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007796 xmlPopInput(ctxt);
7797 SHRINK;
7798
Daniel Veillardfdc91562002-07-01 21:52:03 +00007799 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007800 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7801 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007802 ctxt->instate = XML_PARSER_EOF;
7803 break;
7804 }
7805 }
7806}
7807
7808/**
7809 * xmlParseElement:
7810 * @ctxt: an XML parser context
7811 *
7812 * parse an XML element, this is highly recursive
7813 *
7814 * [39] element ::= EmptyElemTag | STag content ETag
7815 *
7816 * [ WFC: Element Type Match ]
7817 * The Name in an element's end-tag must match the element type in the
7818 * start-tag.
7819 *
Owen Taylor3473f882001-02-23 17:55:21 +00007820 */
7821
7822void
7823xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007824 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007825 const xmlChar *prefix;
7826 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007827 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007828 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007829 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007830 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007831
7832 /* Capture start position */
7833 if (ctxt->record_info) {
7834 node_info.begin_pos = ctxt->input->consumed +
7835 (CUR_PTR - ctxt->input->base);
7836 node_info.begin_line = ctxt->input->line;
7837 }
7838
7839 if (ctxt->spaceNr == 0)
7840 spacePush(ctxt, -1);
7841 else
7842 spacePush(ctxt, *ctxt->space);
7843
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007844 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007845#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007846 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007847#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007848 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007849#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007850 else
7851 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007852#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007853 if (name == NULL) {
7854 spacePop(ctxt);
7855 return;
7856 }
7857 namePush(ctxt, name);
7858 ret = ctxt->node;
7859
Daniel Veillard4432df22003-09-28 18:58:27 +00007860#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007861 /*
7862 * [ VC: Root Element Type ]
7863 * The Name in the document type declaration must match the element
7864 * type of the root element.
7865 */
7866 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7867 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7868 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007869#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007870
7871 /*
7872 * Check for an Empty Element.
7873 */
7874 if ((RAW == '/') && (NXT(1) == '>')) {
7875 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007876 if (ctxt->sax2) {
7877 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7878 (!ctxt->disableSAX))
7879 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007880#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007881 } else {
7882 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7883 (!ctxt->disableSAX))
7884 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007885#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007886 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007887 namePop(ctxt);
7888 spacePop(ctxt);
7889 if (nsNr != ctxt->nsNr)
7890 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007891 if ( ret != NULL && ctxt->record_info ) {
7892 node_info.end_pos = ctxt->input->consumed +
7893 (CUR_PTR - ctxt->input->base);
7894 node_info.end_line = ctxt->input->line;
7895 node_info.node = ret;
7896 xmlParserAddNodeInfo(ctxt, &node_info);
7897 }
7898 return;
7899 }
7900 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007901 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007902 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007903 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7904 "Couldn't find end of Start Tag %s line %d\n",
7905 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007906
7907 /*
7908 * end of parsing of this node.
7909 */
7910 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007911 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007912 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913 if (nsNr != ctxt->nsNr)
7914 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007915
7916 /*
7917 * Capture end position and add node
7918 */
7919 if ( ret != NULL && ctxt->record_info ) {
7920 node_info.end_pos = ctxt->input->consumed +
7921 (CUR_PTR - ctxt->input->base);
7922 node_info.end_line = ctxt->input->line;
7923 node_info.node = ret;
7924 xmlParserAddNodeInfo(ctxt, &node_info);
7925 }
7926 return;
7927 }
7928
7929 /*
7930 * Parse the content of the element:
7931 */
7932 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007933 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007934 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007935 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007936 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007937
7938 /*
7939 * end of parsing of this node.
7940 */
7941 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007943 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007944 if (nsNr != ctxt->nsNr)
7945 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007946 return;
7947 }
7948
7949 /*
7950 * parse the end of tag: '</' should be here.
7951 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007952 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007953 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007954 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007955 }
7956#ifdef LIBXML_SAX1_ENABLED
7957 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007959#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007960
7961 /*
7962 * Capture end position and add node
7963 */
7964 if ( ret != NULL && ctxt->record_info ) {
7965 node_info.end_pos = ctxt->input->consumed +
7966 (CUR_PTR - ctxt->input->base);
7967 node_info.end_line = ctxt->input->line;
7968 node_info.node = ret;
7969 xmlParserAddNodeInfo(ctxt, &node_info);
7970 }
7971}
7972
7973/**
7974 * xmlParseVersionNum:
7975 * @ctxt: an XML parser context
7976 *
7977 * parse the XML version value.
7978 *
7979 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7980 *
7981 * Returns the string giving the XML version number, or NULL
7982 */
7983xmlChar *
7984xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7985 xmlChar *buf = NULL;
7986 int len = 0;
7987 int size = 10;
7988 xmlChar cur;
7989
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007990 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007991 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007992 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007993 return(NULL);
7994 }
7995 cur = CUR;
7996 while (((cur >= 'a') && (cur <= 'z')) ||
7997 ((cur >= 'A') && (cur <= 'Z')) ||
7998 ((cur >= '0') && (cur <= '9')) ||
7999 (cur == '_') || (cur == '.') ||
8000 (cur == ':') || (cur == '-')) {
8001 if (len + 1 >= size) {
8002 size *= 2;
8003 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8004 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008006 return(NULL);
8007 }
8008 }
8009 buf[len++] = cur;
8010 NEXT;
8011 cur=CUR;
8012 }
8013 buf[len] = 0;
8014 return(buf);
8015}
8016
8017/**
8018 * xmlParseVersionInfo:
8019 * @ctxt: an XML parser context
8020 *
8021 * parse the XML version.
8022 *
8023 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8024 *
8025 * [25] Eq ::= S? '=' S?
8026 *
8027 * Returns the version string, e.g. "1.0"
8028 */
8029
8030xmlChar *
8031xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8032 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008033
Daniel Veillarda07050d2003-10-19 14:46:32 +00008034 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008035 SKIP(7);
8036 SKIP_BLANKS;
8037 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008038 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008039 return(NULL);
8040 }
8041 NEXT;
8042 SKIP_BLANKS;
8043 if (RAW == '"') {
8044 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008045 version = xmlParseVersionNum(ctxt);
8046 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008047 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008048 } else
8049 NEXT;
8050 } else if (RAW == '\''){
8051 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008052 version = xmlParseVersionNum(ctxt);
8053 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008054 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008055 } else
8056 NEXT;
8057 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008058 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008059 }
8060 }
8061 return(version);
8062}
8063
8064/**
8065 * xmlParseEncName:
8066 * @ctxt: an XML parser context
8067 *
8068 * parse the XML encoding name
8069 *
8070 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8071 *
8072 * Returns the encoding name value or NULL
8073 */
8074xmlChar *
8075xmlParseEncName(xmlParserCtxtPtr ctxt) {
8076 xmlChar *buf = NULL;
8077 int len = 0;
8078 int size = 10;
8079 xmlChar cur;
8080
8081 cur = CUR;
8082 if (((cur >= 'a') && (cur <= 'z')) ||
8083 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008084 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008085 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008086 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008087 return(NULL);
8088 }
8089
8090 buf[len++] = cur;
8091 NEXT;
8092 cur = CUR;
8093 while (((cur >= 'a') && (cur <= 'z')) ||
8094 ((cur >= 'A') && (cur <= 'Z')) ||
8095 ((cur >= '0') && (cur <= '9')) ||
8096 (cur == '.') || (cur == '_') ||
8097 (cur == '-')) {
8098 if (len + 1 >= size) {
8099 size *= 2;
8100 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8101 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008102 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008103 return(NULL);
8104 }
8105 }
8106 buf[len++] = cur;
8107 NEXT;
8108 cur = CUR;
8109 if (cur == 0) {
8110 SHRINK;
8111 GROW;
8112 cur = CUR;
8113 }
8114 }
8115 buf[len] = 0;
8116 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008117 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008118 }
8119 return(buf);
8120}
8121
8122/**
8123 * xmlParseEncodingDecl:
8124 * @ctxt: an XML parser context
8125 *
8126 * parse the XML encoding declaration
8127 *
8128 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8129 *
8130 * this setups the conversion filters.
8131 *
8132 * Returns the encoding value or NULL
8133 */
8134
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008135const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008136xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8137 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008138
8139 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008140 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008141 SKIP(8);
8142 SKIP_BLANKS;
8143 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008144 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008145 return(NULL);
8146 }
8147 NEXT;
8148 SKIP_BLANKS;
8149 if (RAW == '"') {
8150 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008151 encoding = xmlParseEncName(ctxt);
8152 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008153 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008154 } else
8155 NEXT;
8156 } else if (RAW == '\''){
8157 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008158 encoding = xmlParseEncName(ctxt);
8159 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008160 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008161 } else
8162 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008163 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008164 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008165 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008166 /*
8167 * UTF-16 encoding stwich has already taken place at this stage,
8168 * more over the little-endian/big-endian selection is already done
8169 */
8170 if ((encoding != NULL) &&
8171 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8172 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008173 if (ctxt->encoding != NULL)
8174 xmlFree((xmlChar *) ctxt->encoding);
8175 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008176 }
8177 /*
8178 * UTF-8 encoding is handled natively
8179 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008180 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008181 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8182 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008183 if (ctxt->encoding != NULL)
8184 xmlFree((xmlChar *) ctxt->encoding);
8185 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008186 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008187 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008188 xmlCharEncodingHandlerPtr handler;
8189
8190 if (ctxt->input->encoding != NULL)
8191 xmlFree((xmlChar *) ctxt->input->encoding);
8192 ctxt->input->encoding = encoding;
8193
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008194 handler = xmlFindCharEncodingHandler((const char *) encoding);
8195 if (handler != NULL) {
8196 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008197 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008198 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008199 "Unsupported encoding %s\n", encoding);
8200 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008201 }
8202 }
8203 }
8204 return(encoding);
8205}
8206
8207/**
8208 * xmlParseSDDecl:
8209 * @ctxt: an XML parser context
8210 *
8211 * parse the XML standalone declaration
8212 *
8213 * [32] SDDecl ::= S 'standalone' Eq
8214 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8215 *
8216 * [ VC: Standalone Document Declaration ]
8217 * TODO The standalone document declaration must have the value "no"
8218 * if any external markup declarations contain declarations of:
8219 * - attributes with default values, if elements to which these
8220 * attributes apply appear in the document without specifications
8221 * of values for these attributes, or
8222 * - entities (other than amp, lt, gt, apos, quot), if references
8223 * to those entities appear in the document, or
8224 * - attributes with values subject to normalization, where the
8225 * attribute appears in the document with a value which will change
8226 * as a result of normalization, or
8227 * - element types with element content, if white space occurs directly
8228 * within any instance of those types.
8229 *
8230 * Returns 1 if standalone, 0 otherwise
8231 */
8232
8233int
8234xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8235 int standalone = -1;
8236
8237 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008238 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008239 SKIP(10);
8240 SKIP_BLANKS;
8241 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008242 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008243 return(standalone);
8244 }
8245 NEXT;
8246 SKIP_BLANKS;
8247 if (RAW == '\''){
8248 NEXT;
8249 if ((RAW == 'n') && (NXT(1) == 'o')) {
8250 standalone = 0;
8251 SKIP(2);
8252 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8253 (NXT(2) == 's')) {
8254 standalone = 1;
8255 SKIP(3);
8256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008257 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008258 }
8259 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008260 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008261 } else
8262 NEXT;
8263 } else if (RAW == '"'){
8264 NEXT;
8265 if ((RAW == 'n') && (NXT(1) == 'o')) {
8266 standalone = 0;
8267 SKIP(2);
8268 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8269 (NXT(2) == 's')) {
8270 standalone = 1;
8271 SKIP(3);
8272 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008273 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008274 }
8275 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008276 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008277 } else
8278 NEXT;
8279 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008280 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008281 }
8282 }
8283 return(standalone);
8284}
8285
8286/**
8287 * xmlParseXMLDecl:
8288 * @ctxt: an XML parser context
8289 *
8290 * parse an XML declaration header
8291 *
8292 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8293 */
8294
8295void
8296xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8297 xmlChar *version;
8298
8299 /*
8300 * We know that '<?xml' is here.
8301 */
8302 SKIP(5);
8303
William M. Brack76e95df2003-10-18 16:20:14 +00008304 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8306 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008307 }
8308 SKIP_BLANKS;
8309
8310 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008311 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008312 */
8313 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008314 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008315 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008316 } else {
8317 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8318 /*
8319 * TODO: Blueberry should be detected here
8320 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008321 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8322 "Unsupported version '%s'\n",
8323 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008324 }
8325 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008326 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008327 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008328 }
Owen Taylor3473f882001-02-23 17:55:21 +00008329
8330 /*
8331 * We may have the encoding declaration
8332 */
William M. Brack76e95df2003-10-18 16:20:14 +00008333 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008334 if ((RAW == '?') && (NXT(1) == '>')) {
8335 SKIP(2);
8336 return;
8337 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008339 }
8340 xmlParseEncodingDecl(ctxt);
8341 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8342 /*
8343 * The XML REC instructs us to stop parsing right here
8344 */
8345 return;
8346 }
8347
8348 /*
8349 * We may have the standalone status.
8350 */
William M. Brack76e95df2003-10-18 16:20:14 +00008351 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008352 if ((RAW == '?') && (NXT(1) == '>')) {
8353 SKIP(2);
8354 return;
8355 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008357 }
8358 SKIP_BLANKS;
8359 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8360
8361 SKIP_BLANKS;
8362 if ((RAW == '?') && (NXT(1) == '>')) {
8363 SKIP(2);
8364 } else if (RAW == '>') {
8365 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 NEXT;
8368 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008369 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008370 MOVETO_ENDTAG(CUR_PTR);
8371 NEXT;
8372 }
8373}
8374
8375/**
8376 * xmlParseMisc:
8377 * @ctxt: an XML parser context
8378 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008379 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008380 *
8381 * [27] Misc ::= Comment | PI | S
8382 */
8383
8384void
8385xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008386 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008387 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008388 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008389 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008390 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008391 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008392 NEXT;
8393 } else
8394 xmlParseComment(ctxt);
8395 }
8396}
8397
8398/**
8399 * xmlParseDocument:
8400 * @ctxt: an XML parser context
8401 *
8402 * parse an XML document (and build a tree if using the standard SAX
8403 * interface).
8404 *
8405 * [1] document ::= prolog element Misc*
8406 *
8407 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8408 *
8409 * Returns 0, -1 in case of error. the parser context is augmented
8410 * as a result of the parsing.
8411 */
8412
8413int
8414xmlParseDocument(xmlParserCtxtPtr ctxt) {
8415 xmlChar start[4];
8416 xmlCharEncoding enc;
8417
8418 xmlInitParser();
8419
8420 GROW;
8421
8422 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423 * SAX: detecting the level.
8424 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008425 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008426
8427 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008428 * SAX: beginning of the document processing.
8429 */
8430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8431 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8432
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008433 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8434 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008435 /*
8436 * Get the 4 first bytes and decode the charset
8437 * if enc != XML_CHAR_ENCODING_NONE
8438 * plug some encoding conversion routines.
8439 */
8440 start[0] = RAW;
8441 start[1] = NXT(1);
8442 start[2] = NXT(2);
8443 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008444 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008445 if (enc != XML_CHAR_ENCODING_NONE) {
8446 xmlSwitchEncoding(ctxt, enc);
8447 }
Owen Taylor3473f882001-02-23 17:55:21 +00008448 }
8449
8450
8451 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008452 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008453 }
8454
8455 /*
8456 * Check for the XMLDecl in the Prolog.
8457 */
8458 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008459 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008460
8461 /*
8462 * Note that we will switch encoding on the fly.
8463 */
8464 xmlParseXMLDecl(ctxt);
8465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8466 /*
8467 * The XML REC instructs us to stop parsing right here
8468 */
8469 return(-1);
8470 }
8471 ctxt->standalone = ctxt->input->standalone;
8472 SKIP_BLANKS;
8473 } else {
8474 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8475 }
8476 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8477 ctxt->sax->startDocument(ctxt->userData);
8478
8479 /*
8480 * The Misc part of the Prolog
8481 */
8482 GROW;
8483 xmlParseMisc(ctxt);
8484
8485 /*
8486 * Then possibly doc type declaration(s) and more Misc
8487 * (doctypedecl Misc*)?
8488 */
8489 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008490 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008491
8492 ctxt->inSubset = 1;
8493 xmlParseDocTypeDecl(ctxt);
8494 if (RAW == '[') {
8495 ctxt->instate = XML_PARSER_DTD;
8496 xmlParseInternalSubset(ctxt);
8497 }
8498
8499 /*
8500 * Create and update the external subset.
8501 */
8502 ctxt->inSubset = 2;
8503 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8504 (!ctxt->disableSAX))
8505 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8506 ctxt->extSubSystem, ctxt->extSubURI);
8507 ctxt->inSubset = 0;
8508
8509
8510 ctxt->instate = XML_PARSER_PROLOG;
8511 xmlParseMisc(ctxt);
8512 }
8513
8514 /*
8515 * Time to start parsing the tree itself
8516 */
8517 GROW;
8518 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008519 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8520 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008521 } else {
8522 ctxt->instate = XML_PARSER_CONTENT;
8523 xmlParseElement(ctxt);
8524 ctxt->instate = XML_PARSER_EPILOG;
8525
8526
8527 /*
8528 * The Misc part at the end
8529 */
8530 xmlParseMisc(ctxt);
8531
Daniel Veillard561b7f82002-03-20 21:55:57 +00008532 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008533 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008534 }
8535 ctxt->instate = XML_PARSER_EOF;
8536 }
8537
8538 /*
8539 * SAX: end of the document processing.
8540 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008542 ctxt->sax->endDocument(ctxt->userData);
8543
Daniel Veillard5997aca2002-03-18 18:36:20 +00008544 /*
8545 * Remove locally kept entity definitions if the tree was not built
8546 */
8547 if ((ctxt->myDoc != NULL) &&
8548 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8549 xmlFreeDoc(ctxt->myDoc);
8550 ctxt->myDoc = NULL;
8551 }
8552
Daniel Veillardc7612992002-02-17 22:47:37 +00008553 if (! ctxt->wellFormed) {
8554 ctxt->valid = 0;
8555 return(-1);
8556 }
Owen Taylor3473f882001-02-23 17:55:21 +00008557 return(0);
8558}
8559
8560/**
8561 * xmlParseExtParsedEnt:
8562 * @ctxt: an XML parser context
8563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008564 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008565 * An external general parsed entity is well-formed if it matches the
8566 * production labeled extParsedEnt.
8567 *
8568 * [78] extParsedEnt ::= TextDecl? content
8569 *
8570 * Returns 0, -1 in case of error. the parser context is augmented
8571 * as a result of the parsing.
8572 */
8573
8574int
8575xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8576 xmlChar start[4];
8577 xmlCharEncoding enc;
8578
8579 xmlDefaultSAXHandlerInit();
8580
Daniel Veillard309f81d2003-09-23 09:02:53 +00008581 xmlDetectSAX2(ctxt);
8582
Owen Taylor3473f882001-02-23 17:55:21 +00008583 GROW;
8584
8585 /*
8586 * SAX: beginning of the document processing.
8587 */
8588 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8589 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8590
8591 /*
8592 * Get the 4 first bytes and decode the charset
8593 * if enc != XML_CHAR_ENCODING_NONE
8594 * plug some encoding conversion routines.
8595 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008596 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8597 start[0] = RAW;
8598 start[1] = NXT(1);
8599 start[2] = NXT(2);
8600 start[3] = NXT(3);
8601 enc = xmlDetectCharEncoding(start, 4);
8602 if (enc != XML_CHAR_ENCODING_NONE) {
8603 xmlSwitchEncoding(ctxt, enc);
8604 }
Owen Taylor3473f882001-02-23 17:55:21 +00008605 }
8606
8607
8608 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008609 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008610 }
8611
8612 /*
8613 * Check for the XMLDecl in the Prolog.
8614 */
8615 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008616 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008617
8618 /*
8619 * Note that we will switch encoding on the fly.
8620 */
8621 xmlParseXMLDecl(ctxt);
8622 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8623 /*
8624 * The XML REC instructs us to stop parsing right here
8625 */
8626 return(-1);
8627 }
8628 SKIP_BLANKS;
8629 } else {
8630 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8631 }
8632 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8633 ctxt->sax->startDocument(ctxt->userData);
8634
8635 /*
8636 * Doing validity checking on chunk doesn't make sense
8637 */
8638 ctxt->instate = XML_PARSER_CONTENT;
8639 ctxt->validate = 0;
8640 ctxt->loadsubset = 0;
8641 ctxt->depth = 0;
8642
8643 xmlParseContent(ctxt);
8644
8645 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008648 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008649 }
8650
8651 /*
8652 * SAX: end of the document processing.
8653 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008654 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008655 ctxt->sax->endDocument(ctxt->userData);
8656
8657 if (! ctxt->wellFormed) return(-1);
8658 return(0);
8659}
8660
Daniel Veillard73b013f2003-09-30 12:36:01 +00008661#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008662/************************************************************************
8663 * *
8664 * Progressive parsing interfaces *
8665 * *
8666 ************************************************************************/
8667
8668/**
8669 * xmlParseLookupSequence:
8670 * @ctxt: an XML parser context
8671 * @first: the first char to lookup
8672 * @next: the next char to lookup or zero
8673 * @third: the next char to lookup or zero
8674 *
8675 * Try to find if a sequence (first, next, third) or just (first next) or
8676 * (first) is available in the input stream.
8677 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8678 * to avoid rescanning sequences of bytes, it DOES change the state of the
8679 * parser, do not use liberally.
8680 *
8681 * Returns the index to the current parsing point if the full sequence
8682 * is available, -1 otherwise.
8683 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008684static int
Owen Taylor3473f882001-02-23 17:55:21 +00008685xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8686 xmlChar next, xmlChar third) {
8687 int base, len;
8688 xmlParserInputPtr in;
8689 const xmlChar *buf;
8690
8691 in = ctxt->input;
8692 if (in == NULL) return(-1);
8693 base = in->cur - in->base;
8694 if (base < 0) return(-1);
8695 if (ctxt->checkIndex > base)
8696 base = ctxt->checkIndex;
8697 if (in->buf == NULL) {
8698 buf = in->base;
8699 len = in->length;
8700 } else {
8701 buf = in->buf->buffer->content;
8702 len = in->buf->buffer->use;
8703 }
8704 /* take into account the sequence length */
8705 if (third) len -= 2;
8706 else if (next) len --;
8707 for (;base < len;base++) {
8708 if (buf[base] == first) {
8709 if (third != 0) {
8710 if ((buf[base + 1] != next) ||
8711 (buf[base + 2] != third)) continue;
8712 } else if (next != 0) {
8713 if (buf[base + 1] != next) continue;
8714 }
8715 ctxt->checkIndex = 0;
8716#ifdef DEBUG_PUSH
8717 if (next == 0)
8718 xmlGenericError(xmlGenericErrorContext,
8719 "PP: lookup '%c' found at %d\n",
8720 first, base);
8721 else if (third == 0)
8722 xmlGenericError(xmlGenericErrorContext,
8723 "PP: lookup '%c%c' found at %d\n",
8724 first, next, base);
8725 else
8726 xmlGenericError(xmlGenericErrorContext,
8727 "PP: lookup '%c%c%c' found at %d\n",
8728 first, next, third, base);
8729#endif
8730 return(base - (in->cur - in->base));
8731 }
8732 }
8733 ctxt->checkIndex = base;
8734#ifdef DEBUG_PUSH
8735 if (next == 0)
8736 xmlGenericError(xmlGenericErrorContext,
8737 "PP: lookup '%c' failed\n", first);
8738 else if (third == 0)
8739 xmlGenericError(xmlGenericErrorContext,
8740 "PP: lookup '%c%c' failed\n", first, next);
8741 else
8742 xmlGenericError(xmlGenericErrorContext,
8743 "PP: lookup '%c%c%c' failed\n", first, next, third);
8744#endif
8745 return(-1);
8746}
8747
8748/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008749 * xmlParseGetLasts:
8750 * @ctxt: an XML parser context
8751 * @lastlt: pointer to store the last '<' from the input
8752 * @lastgt: pointer to store the last '>' from the input
8753 *
8754 * Lookup the last < and > in the current chunk
8755 */
8756static void
8757xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8758 const xmlChar **lastgt) {
8759 const xmlChar *tmp;
8760
8761 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8762 xmlGenericError(xmlGenericErrorContext,
8763 "Internal error: xmlParseGetLasts\n");
8764 return;
8765 }
8766 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8767 tmp = ctxt->input->end;
8768 tmp--;
8769 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8770 (*tmp != '>')) tmp--;
8771 if (tmp < ctxt->input->base) {
8772 *lastlt = NULL;
8773 *lastgt = NULL;
8774 } else if (*tmp == '<') {
8775 *lastlt = tmp;
8776 tmp--;
8777 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8778 if (tmp < ctxt->input->base)
8779 *lastgt = NULL;
8780 else
8781 *lastgt = tmp;
8782 } else {
8783 *lastgt = tmp;
8784 tmp--;
8785 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8786 if (tmp < ctxt->input->base)
8787 *lastlt = NULL;
8788 else
8789 *lastlt = tmp;
8790 }
8791
8792 } else {
8793 *lastlt = NULL;
8794 *lastgt = NULL;
8795 }
8796}
8797/**
Owen Taylor3473f882001-02-23 17:55:21 +00008798 * xmlParseTryOrFinish:
8799 * @ctxt: an XML parser context
8800 * @terminate: last chunk indicator
8801 *
8802 * Try to progress on parsing
8803 *
8804 * Returns zero if no parsing was possible
8805 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008806static int
Owen Taylor3473f882001-02-23 17:55:21 +00008807xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8808 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008809 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008810 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008811 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008812
8813#ifdef DEBUG_PUSH
8814 switch (ctxt->instate) {
8815 case XML_PARSER_EOF:
8816 xmlGenericError(xmlGenericErrorContext,
8817 "PP: try EOF\n"); break;
8818 case XML_PARSER_START:
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: try START\n"); break;
8821 case XML_PARSER_MISC:
8822 xmlGenericError(xmlGenericErrorContext,
8823 "PP: try MISC\n");break;
8824 case XML_PARSER_COMMENT:
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: try COMMENT\n");break;
8827 case XML_PARSER_PROLOG:
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: try PROLOG\n");break;
8830 case XML_PARSER_START_TAG:
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: try START_TAG\n");break;
8833 case XML_PARSER_CONTENT:
8834 xmlGenericError(xmlGenericErrorContext,
8835 "PP: try CONTENT\n");break;
8836 case XML_PARSER_CDATA_SECTION:
8837 xmlGenericError(xmlGenericErrorContext,
8838 "PP: try CDATA_SECTION\n");break;
8839 case XML_PARSER_END_TAG:
8840 xmlGenericError(xmlGenericErrorContext,
8841 "PP: try END_TAG\n");break;
8842 case XML_PARSER_ENTITY_DECL:
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: try ENTITY_DECL\n");break;
8845 case XML_PARSER_ENTITY_VALUE:
8846 xmlGenericError(xmlGenericErrorContext,
8847 "PP: try ENTITY_VALUE\n");break;
8848 case XML_PARSER_ATTRIBUTE_VALUE:
8849 xmlGenericError(xmlGenericErrorContext,
8850 "PP: try ATTRIBUTE_VALUE\n");break;
8851 case XML_PARSER_DTD:
8852 xmlGenericError(xmlGenericErrorContext,
8853 "PP: try DTD\n");break;
8854 case XML_PARSER_EPILOG:
8855 xmlGenericError(xmlGenericErrorContext,
8856 "PP: try EPILOG\n");break;
8857 case XML_PARSER_PI:
8858 xmlGenericError(xmlGenericErrorContext,
8859 "PP: try PI\n");break;
8860 case XML_PARSER_IGNORE:
8861 xmlGenericError(xmlGenericErrorContext,
8862 "PP: try IGNORE\n");break;
8863 }
8864#endif
8865
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008866 if ((ctxt->input != NULL) &&
8867 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008868 xmlSHRINK(ctxt);
8869 ctxt->checkIndex = 0;
8870 }
8871 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008872
Daniel Veillarda880b122003-04-21 21:36:41 +00008873 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008874 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8875 return(0);
8876
8877
Owen Taylor3473f882001-02-23 17:55:21 +00008878 /*
8879 * Pop-up of finished entities.
8880 */
8881 while ((RAW == 0) && (ctxt->inputNr > 1))
8882 xmlPopInput(ctxt);
8883
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008884 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008885 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008886 avail = ctxt->input->length -
8887 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008888 else {
8889 /*
8890 * If we are operating on converted input, try to flush
8891 * remainng chars to avoid them stalling in the non-converted
8892 * buffer.
8893 */
8894 if ((ctxt->input->buf->raw != NULL) &&
8895 (ctxt->input->buf->raw->use > 0)) {
8896 int base = ctxt->input->base -
8897 ctxt->input->buf->buffer->content;
8898 int current = ctxt->input->cur - ctxt->input->base;
8899
8900 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8901 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8902 ctxt->input->cur = ctxt->input->base + current;
8903 ctxt->input->end =
8904 &ctxt->input->buf->buffer->content[
8905 ctxt->input->buf->buffer->use];
8906 }
8907 avail = ctxt->input->buf->buffer->use -
8908 (ctxt->input->cur - ctxt->input->base);
8909 }
Owen Taylor3473f882001-02-23 17:55:21 +00008910 if (avail < 1)
8911 goto done;
8912 switch (ctxt->instate) {
8913 case XML_PARSER_EOF:
8914 /*
8915 * Document parsing is done !
8916 */
8917 goto done;
8918 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008919 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8920 xmlChar start[4];
8921 xmlCharEncoding enc;
8922
8923 /*
8924 * Very first chars read from the document flow.
8925 */
8926 if (avail < 4)
8927 goto done;
8928
8929 /*
8930 * Get the 4 first bytes and decode the charset
8931 * if enc != XML_CHAR_ENCODING_NONE
8932 * plug some encoding conversion routines.
8933 */
8934 start[0] = RAW;
8935 start[1] = NXT(1);
8936 start[2] = NXT(2);
8937 start[3] = NXT(3);
8938 enc = xmlDetectCharEncoding(start, 4);
8939 if (enc != XML_CHAR_ENCODING_NONE) {
8940 xmlSwitchEncoding(ctxt, enc);
8941 }
8942 break;
8943 }
Owen Taylor3473f882001-02-23 17:55:21 +00008944
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008945 if (avail < 2)
8946 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008947 cur = ctxt->input->cur[0];
8948 next = ctxt->input->cur[1];
8949 if (cur == 0) {
8950 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8951 ctxt->sax->setDocumentLocator(ctxt->userData,
8952 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008953 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008954 ctxt->instate = XML_PARSER_EOF;
8955#ifdef DEBUG_PUSH
8956 xmlGenericError(xmlGenericErrorContext,
8957 "PP: entering EOF\n");
8958#endif
8959 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8960 ctxt->sax->endDocument(ctxt->userData);
8961 goto done;
8962 }
8963 if ((cur == '<') && (next == '?')) {
8964 /* PI or XML decl */
8965 if (avail < 5) return(ret);
8966 if ((!terminate) &&
8967 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8968 return(ret);
8969 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8970 ctxt->sax->setDocumentLocator(ctxt->userData,
8971 &xmlDefaultSAXLocator);
8972 if ((ctxt->input->cur[2] == 'x') &&
8973 (ctxt->input->cur[3] == 'm') &&
8974 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008975 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008976 ret += 5;
8977#ifdef DEBUG_PUSH
8978 xmlGenericError(xmlGenericErrorContext,
8979 "PP: Parsing XML Decl\n");
8980#endif
8981 xmlParseXMLDecl(ctxt);
8982 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8983 /*
8984 * The XML REC instructs us to stop parsing right
8985 * here
8986 */
8987 ctxt->instate = XML_PARSER_EOF;
8988 return(0);
8989 }
8990 ctxt->standalone = ctxt->input->standalone;
8991 if ((ctxt->encoding == NULL) &&
8992 (ctxt->input->encoding != NULL))
8993 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8994 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8995 (!ctxt->disableSAX))
8996 ctxt->sax->startDocument(ctxt->userData);
8997 ctxt->instate = XML_PARSER_MISC;
8998#ifdef DEBUG_PUSH
8999 xmlGenericError(xmlGenericErrorContext,
9000 "PP: entering MISC\n");
9001#endif
9002 } else {
9003 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9004 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9005 (!ctxt->disableSAX))
9006 ctxt->sax->startDocument(ctxt->userData);
9007 ctxt->instate = XML_PARSER_MISC;
9008#ifdef DEBUG_PUSH
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: entering MISC\n");
9011#endif
9012 }
9013 } else {
9014 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9015 ctxt->sax->setDocumentLocator(ctxt->userData,
9016 &xmlDefaultSAXLocator);
9017 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9018 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9019 (!ctxt->disableSAX))
9020 ctxt->sax->startDocument(ctxt->userData);
9021 ctxt->instate = XML_PARSER_MISC;
9022#ifdef DEBUG_PUSH
9023 xmlGenericError(xmlGenericErrorContext,
9024 "PP: entering MISC\n");
9025#endif
9026 }
9027 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009028 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009029 const xmlChar *name;
9030 const xmlChar *prefix;
9031 const xmlChar *URI;
9032 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009033
9034 if ((avail < 2) && (ctxt->inputNr == 1))
9035 goto done;
9036 cur = ctxt->input->cur[0];
9037 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009038 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009039 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009040 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9041 ctxt->sax->endDocument(ctxt->userData);
9042 goto done;
9043 }
9044 if (!terminate) {
9045 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009046 /* > can be found unescaped in attribute values */
9047 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009048 goto done;
9049 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9050 goto done;
9051 }
9052 }
9053 if (ctxt->spaceNr == 0)
9054 spacePush(ctxt, -1);
9055 else
9056 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009057#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009058 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009059#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009060 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009061#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009062 else
9063 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009064#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009065 if (name == NULL) {
9066 spacePop(ctxt);
9067 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009068 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9069 ctxt->sax->endDocument(ctxt->userData);
9070 goto done;
9071 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009072#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009073 /*
9074 * [ VC: Root Element Type ]
9075 * The Name in the document type declaration must match
9076 * the element type of the root element.
9077 */
9078 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9079 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9080 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009081#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009082
9083 /*
9084 * Check for an Empty Element.
9085 */
9086 if ((RAW == '/') && (NXT(1) == '>')) {
9087 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009088
9089 if (ctxt->sax2) {
9090 if ((ctxt->sax != NULL) &&
9091 (ctxt->sax->endElementNs != NULL) &&
9092 (!ctxt->disableSAX))
9093 ctxt->sax->endElementNs(ctxt->userData, name,
9094 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009095#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 } else {
9097 if ((ctxt->sax != NULL) &&
9098 (ctxt->sax->endElement != NULL) &&
9099 (!ctxt->disableSAX))
9100 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009101#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009102 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009103 spacePop(ctxt);
9104 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009105 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009106 } else {
9107 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009108 }
9109 break;
9110 }
9111 if (RAW == '>') {
9112 NEXT;
9113 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009114 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009115 "Couldn't find end of Start Tag %s\n",
9116 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009117 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009118 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009119 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009120 if (ctxt->sax2)
9121 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009122#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009123 else
9124 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009125#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009126
Daniel Veillarda880b122003-04-21 21:36:41 +00009127 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009128 break;
9129 }
9130 case XML_PARSER_CONTENT: {
9131 const xmlChar *test;
9132 unsigned int cons;
9133 if ((avail < 2) && (ctxt->inputNr == 1))
9134 goto done;
9135 cur = ctxt->input->cur[0];
9136 next = ctxt->input->cur[1];
9137
9138 test = CUR_PTR;
9139 cons = ctxt->input->consumed;
9140 if ((cur == '<') && (next == '/')) {
9141 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009142 break;
9143 } else if ((cur == '<') && (next == '?')) {
9144 if ((!terminate) &&
9145 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9146 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009147 xmlParsePI(ctxt);
9148 } else if ((cur == '<') && (next != '!')) {
9149 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009150 break;
9151 } else if ((cur == '<') && (next == '!') &&
9152 (ctxt->input->cur[2] == '-') &&
9153 (ctxt->input->cur[3] == '-')) {
9154 if ((!terminate) &&
9155 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9156 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009157 xmlParseComment(ctxt);
9158 ctxt->instate = XML_PARSER_CONTENT;
9159 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9160 (ctxt->input->cur[2] == '[') &&
9161 (ctxt->input->cur[3] == 'C') &&
9162 (ctxt->input->cur[4] == 'D') &&
9163 (ctxt->input->cur[5] == 'A') &&
9164 (ctxt->input->cur[6] == 'T') &&
9165 (ctxt->input->cur[7] == 'A') &&
9166 (ctxt->input->cur[8] == '[')) {
9167 SKIP(9);
9168 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009169 break;
9170 } else if ((cur == '<') && (next == '!') &&
9171 (avail < 9)) {
9172 goto done;
9173 } else if (cur == '&') {
9174 if ((!terminate) &&
9175 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9176 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009177 xmlParseReference(ctxt);
9178 } else {
9179 /* TODO Avoid the extra copy, handle directly !!! */
9180 /*
9181 * Goal of the following test is:
9182 * - minimize calls to the SAX 'character' callback
9183 * when they are mergeable
9184 * - handle an problem for isBlank when we only parse
9185 * a sequence of blank chars and the next one is
9186 * not available to check against '<' presence.
9187 * - tries to homogenize the differences in SAX
9188 * callbacks between the push and pull versions
9189 * of the parser.
9190 */
9191 if ((ctxt->inputNr == 1) &&
9192 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9193 if (!terminate) {
9194 if (ctxt->progressive) {
9195 if ((lastlt == NULL) ||
9196 (ctxt->input->cur > lastlt))
9197 goto done;
9198 } else if (xmlParseLookupSequence(ctxt,
9199 '<', 0, 0) < 0) {
9200 goto done;
9201 }
9202 }
9203 }
9204 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009205 xmlParseCharData(ctxt, 0);
9206 }
9207 /*
9208 * Pop-up of finished entities.
9209 */
9210 while ((RAW == 0) && (ctxt->inputNr > 1))
9211 xmlPopInput(ctxt);
9212 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009213 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9214 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009215 ctxt->instate = XML_PARSER_EOF;
9216 break;
9217 }
9218 break;
9219 }
9220 case XML_PARSER_END_TAG:
9221 if (avail < 2)
9222 goto done;
9223 if (!terminate) {
9224 if (ctxt->progressive) {
9225 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9226 goto done;
9227 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9228 goto done;
9229 }
9230 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009231 if (ctxt->sax2) {
9232 xmlParseEndTag2(ctxt,
9233 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9234 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009235 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009236 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009237 }
9238#ifdef LIBXML_SAX1_ENABLED
9239 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009240 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009241#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009242 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009243 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009244 } else {
9245 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009246 }
9247 break;
9248 case XML_PARSER_CDATA_SECTION: {
9249 /*
9250 * The Push mode need to have the SAX callback for
9251 * cdataBlock merge back contiguous callbacks.
9252 */
9253 int base;
9254
9255 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9256 if (base < 0) {
9257 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9258 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9259 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009260 ctxt->sax->cdataBlock(ctxt->userData,
9261 ctxt->input->cur,
9262 XML_PARSER_BIG_BUFFER_SIZE);
9263 else if (ctxt->sax->characters != NULL)
9264 ctxt->sax->characters(ctxt->userData,
9265 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009266 XML_PARSER_BIG_BUFFER_SIZE);
9267 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009268 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009269 ctxt->checkIndex = 0;
9270 }
9271 goto done;
9272 } else {
9273 if ((ctxt->sax != NULL) && (base > 0) &&
9274 (!ctxt->disableSAX)) {
9275 if (ctxt->sax->cdataBlock != NULL)
9276 ctxt->sax->cdataBlock(ctxt->userData,
9277 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009278 else if (ctxt->sax->characters != NULL)
9279 ctxt->sax->characters(ctxt->userData,
9280 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009281 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009282 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009283 ctxt->checkIndex = 0;
9284 ctxt->instate = XML_PARSER_CONTENT;
9285#ifdef DEBUG_PUSH
9286 xmlGenericError(xmlGenericErrorContext,
9287 "PP: entering CONTENT\n");
9288#endif
9289 }
9290 break;
9291 }
Owen Taylor3473f882001-02-23 17:55:21 +00009292 case XML_PARSER_MISC:
9293 SKIP_BLANKS;
9294 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009295 avail = ctxt->input->length -
9296 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009297 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009298 avail = ctxt->input->buf->buffer->use -
9299 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009300 if (avail < 2)
9301 goto done;
9302 cur = ctxt->input->cur[0];
9303 next = ctxt->input->cur[1];
9304 if ((cur == '<') && (next == '?')) {
9305 if ((!terminate) &&
9306 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9307 goto done;
9308#ifdef DEBUG_PUSH
9309 xmlGenericError(xmlGenericErrorContext,
9310 "PP: Parsing PI\n");
9311#endif
9312 xmlParsePI(ctxt);
9313 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009314 (ctxt->input->cur[2] == '-') &&
9315 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009316 if ((!terminate) &&
9317 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9318 goto done;
9319#ifdef DEBUG_PUSH
9320 xmlGenericError(xmlGenericErrorContext,
9321 "PP: Parsing Comment\n");
9322#endif
9323 xmlParseComment(ctxt);
9324 ctxt->instate = XML_PARSER_MISC;
9325 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009326 (ctxt->input->cur[2] == 'D') &&
9327 (ctxt->input->cur[3] == 'O') &&
9328 (ctxt->input->cur[4] == 'C') &&
9329 (ctxt->input->cur[5] == 'T') &&
9330 (ctxt->input->cur[6] == 'Y') &&
9331 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009332 (ctxt->input->cur[8] == 'E')) {
9333 if ((!terminate) &&
9334 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9335 goto done;
9336#ifdef DEBUG_PUSH
9337 xmlGenericError(xmlGenericErrorContext,
9338 "PP: Parsing internal subset\n");
9339#endif
9340 ctxt->inSubset = 1;
9341 xmlParseDocTypeDecl(ctxt);
9342 if (RAW == '[') {
9343 ctxt->instate = XML_PARSER_DTD;
9344#ifdef DEBUG_PUSH
9345 xmlGenericError(xmlGenericErrorContext,
9346 "PP: entering DTD\n");
9347#endif
9348 } else {
9349 /*
9350 * Create and update the external subset.
9351 */
9352 ctxt->inSubset = 2;
9353 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9354 (ctxt->sax->externalSubset != NULL))
9355 ctxt->sax->externalSubset(ctxt->userData,
9356 ctxt->intSubName, ctxt->extSubSystem,
9357 ctxt->extSubURI);
9358 ctxt->inSubset = 0;
9359 ctxt->instate = XML_PARSER_PROLOG;
9360#ifdef DEBUG_PUSH
9361 xmlGenericError(xmlGenericErrorContext,
9362 "PP: entering PROLOG\n");
9363#endif
9364 }
9365 } else if ((cur == '<') && (next == '!') &&
9366 (avail < 9)) {
9367 goto done;
9368 } else {
9369 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009370 ctxt->progressive = 1;
9371 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009372#ifdef DEBUG_PUSH
9373 xmlGenericError(xmlGenericErrorContext,
9374 "PP: entering START_TAG\n");
9375#endif
9376 }
9377 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009378 case XML_PARSER_PROLOG:
9379 SKIP_BLANKS;
9380 if (ctxt->input->buf == NULL)
9381 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9382 else
9383 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9384 if (avail < 2)
9385 goto done;
9386 cur = ctxt->input->cur[0];
9387 next = ctxt->input->cur[1];
9388 if ((cur == '<') && (next == '?')) {
9389 if ((!terminate) &&
9390 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9391 goto done;
9392#ifdef DEBUG_PUSH
9393 xmlGenericError(xmlGenericErrorContext,
9394 "PP: Parsing PI\n");
9395#endif
9396 xmlParsePI(ctxt);
9397 } else if ((cur == '<') && (next == '!') &&
9398 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9399 if ((!terminate) &&
9400 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9401 goto done;
9402#ifdef DEBUG_PUSH
9403 xmlGenericError(xmlGenericErrorContext,
9404 "PP: Parsing Comment\n");
9405#endif
9406 xmlParseComment(ctxt);
9407 ctxt->instate = XML_PARSER_PROLOG;
9408 } else if ((cur == '<') && (next == '!') &&
9409 (avail < 4)) {
9410 goto done;
9411 } else {
9412 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009413 ctxt->progressive = 1;
9414 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009415#ifdef DEBUG_PUSH
9416 xmlGenericError(xmlGenericErrorContext,
9417 "PP: entering START_TAG\n");
9418#endif
9419 }
9420 break;
9421 case XML_PARSER_EPILOG:
9422 SKIP_BLANKS;
9423 if (ctxt->input->buf == NULL)
9424 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9425 else
9426 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9427 if (avail < 2)
9428 goto done;
9429 cur = ctxt->input->cur[0];
9430 next = ctxt->input->cur[1];
9431 if ((cur == '<') && (next == '?')) {
9432 if ((!terminate) &&
9433 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9434 goto done;
9435#ifdef DEBUG_PUSH
9436 xmlGenericError(xmlGenericErrorContext,
9437 "PP: Parsing PI\n");
9438#endif
9439 xmlParsePI(ctxt);
9440 ctxt->instate = XML_PARSER_EPILOG;
9441 } else if ((cur == '<') && (next == '!') &&
9442 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9443 if ((!terminate) &&
9444 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9445 goto done;
9446#ifdef DEBUG_PUSH
9447 xmlGenericError(xmlGenericErrorContext,
9448 "PP: Parsing Comment\n");
9449#endif
9450 xmlParseComment(ctxt);
9451 ctxt->instate = XML_PARSER_EPILOG;
9452 } else if ((cur == '<') && (next == '!') &&
9453 (avail < 4)) {
9454 goto done;
9455 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009456 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009457 ctxt->instate = XML_PARSER_EOF;
9458#ifdef DEBUG_PUSH
9459 xmlGenericError(xmlGenericErrorContext,
9460 "PP: entering EOF\n");
9461#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009462 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009463 ctxt->sax->endDocument(ctxt->userData);
9464 goto done;
9465 }
9466 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009467 case XML_PARSER_DTD: {
9468 /*
9469 * Sorry but progressive parsing of the internal subset
9470 * is not expected to be supported. We first check that
9471 * the full content of the internal subset is available and
9472 * the parsing is launched only at that point.
9473 * Internal subset ends up with "']' S? '>'" in an unescaped
9474 * section and not in a ']]>' sequence which are conditional
9475 * sections (whoever argued to keep that crap in XML deserve
9476 * a place in hell !).
9477 */
9478 int base, i;
9479 xmlChar *buf;
9480 xmlChar quote = 0;
9481
9482 base = ctxt->input->cur - ctxt->input->base;
9483 if (base < 0) return(0);
9484 if (ctxt->checkIndex > base)
9485 base = ctxt->checkIndex;
9486 buf = ctxt->input->buf->buffer->content;
9487 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9488 base++) {
9489 if (quote != 0) {
9490 if (buf[base] == quote)
9491 quote = 0;
9492 continue;
9493 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009494 if ((quote == 0) && (buf[base] == '<')) {
9495 int found = 0;
9496 /* special handling of comments */
9497 if (((unsigned int) base + 4 <
9498 ctxt->input->buf->buffer->use) &&
9499 (buf[base + 1] == '!') &&
9500 (buf[base + 2] == '-') &&
9501 (buf[base + 3] == '-')) {
9502 for (;(unsigned int) base + 3 <
9503 ctxt->input->buf->buffer->use; base++) {
9504 if ((buf[base] == '-') &&
9505 (buf[base + 1] == '-') &&
9506 (buf[base + 2] == '>')) {
9507 found = 1;
9508 base += 2;
9509 break;
9510 }
9511 }
9512 if (!found)
9513 break;
9514 continue;
9515 }
9516 }
Owen Taylor3473f882001-02-23 17:55:21 +00009517 if (buf[base] == '"') {
9518 quote = '"';
9519 continue;
9520 }
9521 if (buf[base] == '\'') {
9522 quote = '\'';
9523 continue;
9524 }
9525 if (buf[base] == ']') {
9526 if ((unsigned int) base +1 >=
9527 ctxt->input->buf->buffer->use)
9528 break;
9529 if (buf[base + 1] == ']') {
9530 /* conditional crap, skip both ']' ! */
9531 base++;
9532 continue;
9533 }
9534 for (i = 0;
9535 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9536 i++) {
9537 if (buf[base + i] == '>')
9538 goto found_end_int_subset;
9539 }
9540 break;
9541 }
9542 }
9543 /*
9544 * We didn't found the end of the Internal subset
9545 */
9546 if (quote == 0)
9547 ctxt->checkIndex = base;
9548#ifdef DEBUG_PUSH
9549 if (next == 0)
9550 xmlGenericError(xmlGenericErrorContext,
9551 "PP: lookup of int subset end filed\n");
9552#endif
9553 goto done;
9554
9555found_end_int_subset:
9556 xmlParseInternalSubset(ctxt);
9557 ctxt->inSubset = 2;
9558 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9559 (ctxt->sax->externalSubset != NULL))
9560 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9561 ctxt->extSubSystem, ctxt->extSubURI);
9562 ctxt->inSubset = 0;
9563 ctxt->instate = XML_PARSER_PROLOG;
9564 ctxt->checkIndex = 0;
9565#ifdef DEBUG_PUSH
9566 xmlGenericError(xmlGenericErrorContext,
9567 "PP: entering PROLOG\n");
9568#endif
9569 break;
9570 }
9571 case XML_PARSER_COMMENT:
9572 xmlGenericError(xmlGenericErrorContext,
9573 "PP: internal error, state == COMMENT\n");
9574 ctxt->instate = XML_PARSER_CONTENT;
9575#ifdef DEBUG_PUSH
9576 xmlGenericError(xmlGenericErrorContext,
9577 "PP: entering CONTENT\n");
9578#endif
9579 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009580 case XML_PARSER_IGNORE:
9581 xmlGenericError(xmlGenericErrorContext,
9582 "PP: internal error, state == IGNORE");
9583 ctxt->instate = XML_PARSER_DTD;
9584#ifdef DEBUG_PUSH
9585 xmlGenericError(xmlGenericErrorContext,
9586 "PP: entering DTD\n");
9587#endif
9588 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009589 case XML_PARSER_PI:
9590 xmlGenericError(xmlGenericErrorContext,
9591 "PP: internal error, state == PI\n");
9592 ctxt->instate = XML_PARSER_CONTENT;
9593#ifdef DEBUG_PUSH
9594 xmlGenericError(xmlGenericErrorContext,
9595 "PP: entering CONTENT\n");
9596#endif
9597 break;
9598 case XML_PARSER_ENTITY_DECL:
9599 xmlGenericError(xmlGenericErrorContext,
9600 "PP: internal error, state == ENTITY_DECL\n");
9601 ctxt->instate = XML_PARSER_DTD;
9602#ifdef DEBUG_PUSH
9603 xmlGenericError(xmlGenericErrorContext,
9604 "PP: entering DTD\n");
9605#endif
9606 break;
9607 case XML_PARSER_ENTITY_VALUE:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: internal error, state == ENTITY_VALUE\n");
9610 ctxt->instate = XML_PARSER_CONTENT;
9611#ifdef DEBUG_PUSH
9612 xmlGenericError(xmlGenericErrorContext,
9613 "PP: entering DTD\n");
9614#endif
9615 break;
9616 case XML_PARSER_ATTRIBUTE_VALUE:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9619 ctxt->instate = XML_PARSER_START_TAG;
9620#ifdef DEBUG_PUSH
9621 xmlGenericError(xmlGenericErrorContext,
9622 "PP: entering START_TAG\n");
9623#endif
9624 break;
9625 case XML_PARSER_SYSTEM_LITERAL:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: internal error, state == SYSTEM_LITERAL\n");
9628 ctxt->instate = XML_PARSER_START_TAG;
9629#ifdef DEBUG_PUSH
9630 xmlGenericError(xmlGenericErrorContext,
9631 "PP: entering START_TAG\n");
9632#endif
9633 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009634 case XML_PARSER_PUBLIC_LITERAL:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: internal error, state == PUBLIC_LITERAL\n");
9637 ctxt->instate = XML_PARSER_START_TAG;
9638#ifdef DEBUG_PUSH
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: entering START_TAG\n");
9641#endif
9642 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009643 }
9644 }
9645done:
9646#ifdef DEBUG_PUSH
9647 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9648#endif
9649 return(ret);
9650}
9651
9652/**
Owen Taylor3473f882001-02-23 17:55:21 +00009653 * xmlParseChunk:
9654 * @ctxt: an XML parser context
9655 * @chunk: an char array
9656 * @size: the size in byte of the chunk
9657 * @terminate: last chunk indicator
9658 *
9659 * Parse a Chunk of memory
9660 *
9661 * Returns zero if no error, the xmlParserErrors otherwise.
9662 */
9663int
9664xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9665 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009666 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9667 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009668 if (ctxt->instate == XML_PARSER_START)
9669 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009670 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9671 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9672 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9673 int cur = ctxt->input->cur - ctxt->input->base;
9674
9675 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9676 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9677 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009678 ctxt->input->end =
9679 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009680#ifdef DEBUG_PUSH
9681 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9682#endif
9683
Owen Taylor3473f882001-02-23 17:55:21 +00009684 } else if (ctxt->instate != XML_PARSER_EOF) {
9685 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9686 xmlParserInputBufferPtr in = ctxt->input->buf;
9687 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9688 (in->raw != NULL)) {
9689 int nbchars;
9690
9691 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9692 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009693 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009694 xmlGenericError(xmlGenericErrorContext,
9695 "xmlParseChunk: encoder error\n");
9696 return(XML_ERR_INVALID_ENCODING);
9697 }
9698 }
9699 }
9700 }
9701 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009702 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9703 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if (terminate) {
9705 /*
9706 * Check for termination
9707 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009708 int avail = 0;
9709 if (ctxt->input->buf == NULL)
9710 avail = ctxt->input->length -
9711 (ctxt->input->cur - ctxt->input->base);
9712 else
9713 avail = ctxt->input->buf->buffer->use -
9714 (ctxt->input->cur - ctxt->input->base);
9715
Owen Taylor3473f882001-02-23 17:55:21 +00009716 if ((ctxt->instate != XML_PARSER_EOF) &&
9717 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009718 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009719 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009720 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009722 }
Owen Taylor3473f882001-02-23 17:55:21 +00009723 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009724 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009725 ctxt->sax->endDocument(ctxt->userData);
9726 }
9727 ctxt->instate = XML_PARSER_EOF;
9728 }
9729 return((xmlParserErrors) ctxt->errNo);
9730}
9731
9732/************************************************************************
9733 * *
9734 * I/O front end functions to the parser *
9735 * *
9736 ************************************************************************/
9737
9738/**
9739 * xmlStopParser:
9740 * @ctxt: an XML parser context
9741 *
9742 * Blocks further parser processing
9743 */
9744void
9745xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009746 if (ctxt == NULL)
9747 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009749 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009750 if (ctxt->input != NULL)
9751 ctxt->input->cur = BAD_CAST"";
9752}
9753
9754/**
9755 * xmlCreatePushParserCtxt:
9756 * @sax: a SAX handler
9757 * @user_data: The user data returned on SAX callbacks
9758 * @chunk: a pointer to an array of chars
9759 * @size: number of chars in the array
9760 * @filename: an optional file name or URI
9761 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009762 * Create a parser context for using the XML parser in push mode.
9763 * If @buffer and @size are non-NULL, the data is used to detect
9764 * the encoding. The remaining characters will be parsed so they
9765 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009766 * To allow content encoding detection, @size should be >= 4
9767 * The value of @filename is used for fetching external entities
9768 * and error/warning reports.
9769 *
9770 * Returns the new parser context or NULL
9771 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009772
Owen Taylor3473f882001-02-23 17:55:21 +00009773xmlParserCtxtPtr
9774xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9775 const char *chunk, int size, const char *filename) {
9776 xmlParserCtxtPtr ctxt;
9777 xmlParserInputPtr inputStream;
9778 xmlParserInputBufferPtr buf;
9779 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9780
9781 /*
9782 * plug some encoding conversion routines
9783 */
9784 if ((chunk != NULL) && (size >= 4))
9785 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9786
9787 buf = xmlAllocParserInputBuffer(enc);
9788 if (buf == NULL) return(NULL);
9789
9790 ctxt = xmlNewParserCtxt();
9791 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009792 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009793 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009794 return(NULL);
9795 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009796 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9797 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009798 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009799 xmlFreeParserInputBuffer(buf);
9800 xmlFreeParserCtxt(ctxt);
9801 return(NULL);
9802 }
Owen Taylor3473f882001-02-23 17:55:21 +00009803 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009805 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009806#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009807 xmlFree(ctxt->sax);
9808 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9809 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009810 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009811 xmlFreeParserInputBuffer(buf);
9812 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009813 return(NULL);
9814 }
9815 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9816 if (user_data != NULL)
9817 ctxt->userData = user_data;
9818 }
9819 if (filename == NULL) {
9820 ctxt->directory = NULL;
9821 } else {
9822 ctxt->directory = xmlParserGetDirectory(filename);
9823 }
9824
9825 inputStream = xmlNewInputStream(ctxt);
9826 if (inputStream == NULL) {
9827 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009828 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009829 return(NULL);
9830 }
9831
9832 if (filename == NULL)
9833 inputStream->filename = NULL;
9834 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009835 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009836 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009837 inputStream->buf = buf;
9838 inputStream->base = inputStream->buf->buffer->content;
9839 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009840 inputStream->end =
9841 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009842
9843 inputPush(ctxt, inputStream);
9844
9845 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9846 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009847 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9848 int cur = ctxt->input->cur - ctxt->input->base;
9849
Owen Taylor3473f882001-02-23 17:55:21 +00009850 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009851
9852 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9853 ctxt->input->cur = ctxt->input->base + cur;
9854 ctxt->input->end =
9855 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009856#ifdef DEBUG_PUSH
9857 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9858#endif
9859 }
9860
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009861 if (enc != XML_CHAR_ENCODING_NONE) {
9862 xmlSwitchEncoding(ctxt, enc);
9863 }
9864
Owen Taylor3473f882001-02-23 17:55:21 +00009865 return(ctxt);
9866}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009867#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009868
9869/**
9870 * xmlCreateIOParserCtxt:
9871 * @sax: a SAX handler
9872 * @user_data: The user data returned on SAX callbacks
9873 * @ioread: an I/O read function
9874 * @ioclose: an I/O close function
9875 * @ioctx: an I/O handler
9876 * @enc: the charset encoding if known
9877 *
9878 * Create a parser context for using the XML parser with an existing
9879 * I/O stream
9880 *
9881 * Returns the new parser context or NULL
9882 */
9883xmlParserCtxtPtr
9884xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9885 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9886 void *ioctx, xmlCharEncoding enc) {
9887 xmlParserCtxtPtr ctxt;
9888 xmlParserInputPtr inputStream;
9889 xmlParserInputBufferPtr buf;
9890
9891 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9892 if (buf == NULL) return(NULL);
9893
9894 ctxt = xmlNewParserCtxt();
9895 if (ctxt == NULL) {
9896 xmlFree(buf);
9897 return(NULL);
9898 }
9899 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009900#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009901 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009902#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009903 xmlFree(ctxt->sax);
9904 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9905 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009907 xmlFree(ctxt);
9908 return(NULL);
9909 }
9910 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9911 if (user_data != NULL)
9912 ctxt->userData = user_data;
9913 }
9914
9915 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9916 if (inputStream == NULL) {
9917 xmlFreeParserCtxt(ctxt);
9918 return(NULL);
9919 }
9920 inputPush(ctxt, inputStream);
9921
9922 return(ctxt);
9923}
9924
Daniel Veillard4432df22003-09-28 18:58:27 +00009925#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009926/************************************************************************
9927 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009928 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009929 * *
9930 ************************************************************************/
9931
9932/**
9933 * xmlIOParseDTD:
9934 * @sax: the SAX handler block or NULL
9935 * @input: an Input Buffer
9936 * @enc: the charset encoding if known
9937 *
9938 * Load and parse a DTD
9939 *
9940 * Returns the resulting xmlDtdPtr or NULL in case of error.
9941 * @input will be freed at parsing end.
9942 */
9943
9944xmlDtdPtr
9945xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9946 xmlCharEncoding enc) {
9947 xmlDtdPtr ret = NULL;
9948 xmlParserCtxtPtr ctxt;
9949 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009950 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009951
9952 if (input == NULL)
9953 return(NULL);
9954
9955 ctxt = xmlNewParserCtxt();
9956 if (ctxt == NULL) {
9957 return(NULL);
9958 }
9959
9960 /*
9961 * Set-up the SAX context
9962 */
9963 if (sax != NULL) {
9964 if (ctxt->sax != NULL)
9965 xmlFree(ctxt->sax);
9966 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +00009967 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009968 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009969 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009970
9971 /*
9972 * generate a parser input from the I/O handler
9973 */
9974
Daniel Veillard43caefb2003-12-07 19:32:22 +00009975 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009976 if (pinput == NULL) {
9977 if (sax != NULL) ctxt->sax = NULL;
9978 xmlFreeParserCtxt(ctxt);
9979 return(NULL);
9980 }
9981
9982 /*
9983 * plug some encoding conversion routines here.
9984 */
9985 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009986 if (enc != XML_CHAR_ENCODING_NONE) {
9987 xmlSwitchEncoding(ctxt, enc);
9988 }
Owen Taylor3473f882001-02-23 17:55:21 +00009989
9990 pinput->filename = NULL;
9991 pinput->line = 1;
9992 pinput->col = 1;
9993 pinput->base = ctxt->input->cur;
9994 pinput->cur = ctxt->input->cur;
9995 pinput->free = NULL;
9996
9997 /*
9998 * let's parse that entity knowing it's an external subset.
9999 */
10000 ctxt->inSubset = 2;
10001 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10002 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10003 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010004
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010005 if ((enc == XML_CHAR_ENCODING_NONE) &&
10006 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010007 /*
10008 * Get the 4 first bytes and decode the charset
10009 * if enc != XML_CHAR_ENCODING_NONE
10010 * plug some encoding conversion routines.
10011 */
10012 start[0] = RAW;
10013 start[1] = NXT(1);
10014 start[2] = NXT(2);
10015 start[3] = NXT(3);
10016 enc = xmlDetectCharEncoding(start, 4);
10017 if (enc != XML_CHAR_ENCODING_NONE) {
10018 xmlSwitchEncoding(ctxt, enc);
10019 }
10020 }
10021
Owen Taylor3473f882001-02-23 17:55:21 +000010022 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10023
10024 if (ctxt->myDoc != NULL) {
10025 if (ctxt->wellFormed) {
10026 ret = ctxt->myDoc->extSubset;
10027 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010028 if (ret != NULL) {
10029 xmlNodePtr tmp;
10030
10031 ret->doc = NULL;
10032 tmp = ret->children;
10033 while (tmp != NULL) {
10034 tmp->doc = NULL;
10035 tmp = tmp->next;
10036 }
10037 }
Owen Taylor3473f882001-02-23 17:55:21 +000010038 } else {
10039 ret = NULL;
10040 }
10041 xmlFreeDoc(ctxt->myDoc);
10042 ctxt->myDoc = NULL;
10043 }
10044 if (sax != NULL) ctxt->sax = NULL;
10045 xmlFreeParserCtxt(ctxt);
10046
10047 return(ret);
10048}
10049
10050/**
10051 * xmlSAXParseDTD:
10052 * @sax: the SAX handler block
10053 * @ExternalID: a NAME* containing the External ID of the DTD
10054 * @SystemID: a NAME* containing the URL to the DTD
10055 *
10056 * Load and parse an external subset.
10057 *
10058 * Returns the resulting xmlDtdPtr or NULL in case of error.
10059 */
10060
10061xmlDtdPtr
10062xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10063 const xmlChar *SystemID) {
10064 xmlDtdPtr ret = NULL;
10065 xmlParserCtxtPtr ctxt;
10066 xmlParserInputPtr input = NULL;
10067 xmlCharEncoding enc;
10068
10069 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10070
10071 ctxt = xmlNewParserCtxt();
10072 if (ctxt == NULL) {
10073 return(NULL);
10074 }
10075
10076 /*
10077 * Set-up the SAX context
10078 */
10079 if (sax != NULL) {
10080 if (ctxt->sax != NULL)
10081 xmlFree(ctxt->sax);
10082 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010083 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010084 }
10085
10086 /*
10087 * Ask the Entity resolver to load the damn thing
10088 */
10089
10090 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010091 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010092 if (input == NULL) {
10093 if (sax != NULL) ctxt->sax = NULL;
10094 xmlFreeParserCtxt(ctxt);
10095 return(NULL);
10096 }
10097
10098 /*
10099 * plug some encoding conversion routines here.
10100 */
10101 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010102 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10103 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10104 xmlSwitchEncoding(ctxt, enc);
10105 }
Owen Taylor3473f882001-02-23 17:55:21 +000010106
10107 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010108 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010109 input->line = 1;
10110 input->col = 1;
10111 input->base = ctxt->input->cur;
10112 input->cur = ctxt->input->cur;
10113 input->free = NULL;
10114
10115 /*
10116 * let's parse that entity knowing it's an external subset.
10117 */
10118 ctxt->inSubset = 2;
10119 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10120 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10121 ExternalID, SystemID);
10122 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10123
10124 if (ctxt->myDoc != NULL) {
10125 if (ctxt->wellFormed) {
10126 ret = ctxt->myDoc->extSubset;
10127 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010128 if (ret != NULL) {
10129 xmlNodePtr tmp;
10130
10131 ret->doc = NULL;
10132 tmp = ret->children;
10133 while (tmp != NULL) {
10134 tmp->doc = NULL;
10135 tmp = tmp->next;
10136 }
10137 }
Owen Taylor3473f882001-02-23 17:55:21 +000010138 } else {
10139 ret = NULL;
10140 }
10141 xmlFreeDoc(ctxt->myDoc);
10142 ctxt->myDoc = NULL;
10143 }
10144 if (sax != NULL) ctxt->sax = NULL;
10145 xmlFreeParserCtxt(ctxt);
10146
10147 return(ret);
10148}
10149
Daniel Veillard4432df22003-09-28 18:58:27 +000010150
Owen Taylor3473f882001-02-23 17:55:21 +000010151/**
10152 * xmlParseDTD:
10153 * @ExternalID: a NAME* containing the External ID of the DTD
10154 * @SystemID: a NAME* containing the URL to the DTD
10155 *
10156 * Load and parse an external subset.
10157 *
10158 * Returns the resulting xmlDtdPtr or NULL in case of error.
10159 */
10160
10161xmlDtdPtr
10162xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10163 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10164}
Daniel Veillard4432df22003-09-28 18:58:27 +000010165#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010166
10167/************************************************************************
10168 * *
10169 * Front ends when parsing an Entity *
10170 * *
10171 ************************************************************************/
10172
10173/**
Owen Taylor3473f882001-02-23 17:55:21 +000010174 * xmlParseCtxtExternalEntity:
10175 * @ctx: the existing parsing context
10176 * @URL: the URL for the entity to load
10177 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010178 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010179 *
10180 * Parse an external general entity within an existing parsing context
10181 * An external general parsed entity is well-formed if it matches the
10182 * production labeled extParsedEnt.
10183 *
10184 * [78] extParsedEnt ::= TextDecl? content
10185 *
10186 * Returns 0 if the entity is well formed, -1 in case of args problem and
10187 * the parser error code otherwise
10188 */
10189
10190int
10191xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010192 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010193 xmlParserCtxtPtr ctxt;
10194 xmlDocPtr newDoc;
10195 xmlSAXHandlerPtr oldsax = NULL;
10196 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010197 xmlChar start[4];
10198 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010199
10200 if (ctx->depth > 40) {
10201 return(XML_ERR_ENTITY_LOOP);
10202 }
10203
Daniel Veillardcda96922001-08-21 10:56:31 +000010204 if (lst != NULL)
10205 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010206 if ((URL == NULL) && (ID == NULL))
10207 return(-1);
10208 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10209 return(-1);
10210
10211
10212 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10213 if (ctxt == NULL) return(-1);
10214 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010215 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010216 oldsax = ctxt->sax;
10217 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010218 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010219 newDoc = xmlNewDoc(BAD_CAST "1.0");
10220 if (newDoc == NULL) {
10221 xmlFreeParserCtxt(ctxt);
10222 return(-1);
10223 }
10224 if (ctx->myDoc != NULL) {
10225 newDoc->intSubset = ctx->myDoc->intSubset;
10226 newDoc->extSubset = ctx->myDoc->extSubset;
10227 }
10228 if (ctx->myDoc->URL != NULL) {
10229 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10230 }
10231 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10232 if (newDoc->children == NULL) {
10233 ctxt->sax = oldsax;
10234 xmlFreeParserCtxt(ctxt);
10235 newDoc->intSubset = NULL;
10236 newDoc->extSubset = NULL;
10237 xmlFreeDoc(newDoc);
10238 return(-1);
10239 }
10240 nodePush(ctxt, newDoc->children);
10241 if (ctx->myDoc == NULL) {
10242 ctxt->myDoc = newDoc;
10243 } else {
10244 ctxt->myDoc = ctx->myDoc;
10245 newDoc->children->doc = ctx->myDoc;
10246 }
10247
Daniel Veillard87a764e2001-06-20 17:41:10 +000010248 /*
10249 * Get the 4 first bytes and decode the charset
10250 * if enc != XML_CHAR_ENCODING_NONE
10251 * plug some encoding conversion routines.
10252 */
10253 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010254 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10255 start[0] = RAW;
10256 start[1] = NXT(1);
10257 start[2] = NXT(2);
10258 start[3] = NXT(3);
10259 enc = xmlDetectCharEncoding(start, 4);
10260 if (enc != XML_CHAR_ENCODING_NONE) {
10261 xmlSwitchEncoding(ctxt, enc);
10262 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010263 }
10264
Owen Taylor3473f882001-02-23 17:55:21 +000010265 /*
10266 * Parse a possible text declaration first
10267 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010268 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010269 xmlParseTextDecl(ctxt);
10270 }
10271
10272 /*
10273 * Doing validity checking on chunk doesn't make sense
10274 */
10275 ctxt->instate = XML_PARSER_CONTENT;
10276 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010277 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010278 ctxt->loadsubset = ctx->loadsubset;
10279 ctxt->depth = ctx->depth + 1;
10280 ctxt->replaceEntities = ctx->replaceEntities;
10281 if (ctxt->validate) {
10282 ctxt->vctxt.error = ctx->vctxt.error;
10283 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010284 } else {
10285 ctxt->vctxt.error = NULL;
10286 ctxt->vctxt.warning = NULL;
10287 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010288 ctxt->vctxt.nodeTab = NULL;
10289 ctxt->vctxt.nodeNr = 0;
10290 ctxt->vctxt.nodeMax = 0;
10291 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010292 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10293 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010294 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10295 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10296 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010297 ctxt->dictNames = ctx->dictNames;
10298 ctxt->attsDefault = ctx->attsDefault;
10299 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010300
10301 xmlParseContent(ctxt);
10302
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010303 ctx->validate = ctxt->validate;
10304 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010305 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010306 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010307 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010308 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 }
10310 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313
10314 if (!ctxt->wellFormed) {
10315 if (ctxt->errNo == 0)
10316 ret = 1;
10317 else
10318 ret = ctxt->errNo;
10319 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010320 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010321 xmlNodePtr cur;
10322
10323 /*
10324 * Return the newly created nodeset after unlinking it from
10325 * they pseudo parent.
10326 */
10327 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010328 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010329 while (cur != NULL) {
10330 cur->parent = NULL;
10331 cur = cur->next;
10332 }
10333 newDoc->children->children = NULL;
10334 }
10335 ret = 0;
10336 }
10337 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010338 ctxt->dict = NULL;
10339 ctxt->attsDefault = NULL;
10340 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010341 xmlFreeParserCtxt(ctxt);
10342 newDoc->intSubset = NULL;
10343 newDoc->extSubset = NULL;
10344 xmlFreeDoc(newDoc);
10345
10346 return(ret);
10347}
10348
10349/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010350 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010351 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010352 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010353 * @sax: the SAX handler bloc (possibly NULL)
10354 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10355 * @depth: Used for loop detection, use 0
10356 * @URL: the URL for the entity to load
10357 * @ID: the System ID for the entity to load
10358 * @list: the return value for the set of parsed nodes
10359 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010360 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010361 *
10362 * Returns 0 if the entity is well formed, -1 in case of args problem and
10363 * the parser error code otherwise
10364 */
10365
Daniel Veillard7d515752003-09-26 19:12:37 +000010366static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010367xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10368 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010369 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010370 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010371 xmlParserCtxtPtr ctxt;
10372 xmlDocPtr newDoc;
10373 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010374 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010375 xmlChar start[4];
10376 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010377
10378 if (depth > 40) {
10379 return(XML_ERR_ENTITY_LOOP);
10380 }
10381
10382
10383
10384 if (list != NULL)
10385 *list = NULL;
10386 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010387 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010388 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010389 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010390
10391
10392 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010393 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010394 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010395 if (oldctxt != NULL) {
10396 ctxt->_private = oldctxt->_private;
10397 ctxt->loadsubset = oldctxt->loadsubset;
10398 ctxt->validate = oldctxt->validate;
10399 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010400 ctxt->record_info = oldctxt->record_info;
10401 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10402 ctxt->node_seq.length = oldctxt->node_seq.length;
10403 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010404 } else {
10405 /*
10406 * Doing validity checking on chunk without context
10407 * doesn't make sense
10408 */
10409 ctxt->_private = NULL;
10410 ctxt->validate = 0;
10411 ctxt->external = 2;
10412 ctxt->loadsubset = 0;
10413 }
Owen Taylor3473f882001-02-23 17:55:21 +000010414 if (sax != NULL) {
10415 oldsax = ctxt->sax;
10416 ctxt->sax = sax;
10417 if (user_data != NULL)
10418 ctxt->userData = user_data;
10419 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010420 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010421 newDoc = xmlNewDoc(BAD_CAST "1.0");
10422 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010423 ctxt->node_seq.maximum = 0;
10424 ctxt->node_seq.length = 0;
10425 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010426 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010427 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010428 }
10429 if (doc != NULL) {
10430 newDoc->intSubset = doc->intSubset;
10431 newDoc->extSubset = doc->extSubset;
10432 }
10433 if (doc->URL != NULL) {
10434 newDoc->URL = xmlStrdup(doc->URL);
10435 }
10436 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10437 if (newDoc->children == NULL) {
10438 if (sax != NULL)
10439 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010440 ctxt->node_seq.maximum = 0;
10441 ctxt->node_seq.length = 0;
10442 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010443 xmlFreeParserCtxt(ctxt);
10444 newDoc->intSubset = NULL;
10445 newDoc->extSubset = NULL;
10446 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010447 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010448 }
10449 nodePush(ctxt, newDoc->children);
10450 if (doc == NULL) {
10451 ctxt->myDoc = newDoc;
10452 } else {
10453 ctxt->myDoc = doc;
10454 newDoc->children->doc = doc;
10455 }
10456
Daniel Veillard87a764e2001-06-20 17:41:10 +000010457 /*
10458 * Get the 4 first bytes and decode the charset
10459 * if enc != XML_CHAR_ENCODING_NONE
10460 * plug some encoding conversion routines.
10461 */
10462 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010463 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10464 start[0] = RAW;
10465 start[1] = NXT(1);
10466 start[2] = NXT(2);
10467 start[3] = NXT(3);
10468 enc = xmlDetectCharEncoding(start, 4);
10469 if (enc != XML_CHAR_ENCODING_NONE) {
10470 xmlSwitchEncoding(ctxt, enc);
10471 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010472 }
10473
Owen Taylor3473f882001-02-23 17:55:21 +000010474 /*
10475 * Parse a possible text declaration first
10476 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010477 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010478 xmlParseTextDecl(ctxt);
10479 }
10480
Owen Taylor3473f882001-02-23 17:55:21 +000010481 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010482 ctxt->depth = depth;
10483
10484 xmlParseContent(ctxt);
10485
Daniel Veillard561b7f82002-03-20 21:55:57 +000010486 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010487 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010488 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010489 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010490 }
10491 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010492 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010493 }
10494
10495 if (!ctxt->wellFormed) {
10496 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010497 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010498 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010499 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010500 } else {
10501 if (list != NULL) {
10502 xmlNodePtr cur;
10503
10504 /*
10505 * Return the newly created nodeset after unlinking it from
10506 * they pseudo parent.
10507 */
10508 cur = newDoc->children->children;
10509 *list = cur;
10510 while (cur != NULL) {
10511 cur->parent = NULL;
10512 cur = cur->next;
10513 }
10514 newDoc->children->children = NULL;
10515 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010516 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010517 }
10518 if (sax != NULL)
10519 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010520 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10521 oldctxt->node_seq.length = ctxt->node_seq.length;
10522 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010523 ctxt->node_seq.maximum = 0;
10524 ctxt->node_seq.length = 0;
10525 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010526 xmlFreeParserCtxt(ctxt);
10527 newDoc->intSubset = NULL;
10528 newDoc->extSubset = NULL;
10529 xmlFreeDoc(newDoc);
10530
10531 return(ret);
10532}
10533
Daniel Veillard81273902003-09-30 00:43:48 +000010534#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010535/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010536 * xmlParseExternalEntity:
10537 * @doc: the document the chunk pertains to
10538 * @sax: the SAX handler bloc (possibly NULL)
10539 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10540 * @depth: Used for loop detection, use 0
10541 * @URL: the URL for the entity to load
10542 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010543 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010544 *
10545 * Parse an external general entity
10546 * An external general parsed entity is well-formed if it matches the
10547 * production labeled extParsedEnt.
10548 *
10549 * [78] extParsedEnt ::= TextDecl? content
10550 *
10551 * Returns 0 if the entity is well formed, -1 in case of args problem and
10552 * the parser error code otherwise
10553 */
10554
10555int
10556xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010557 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010558 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010559 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010560}
10561
10562/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010563 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010564 * @doc: the document the chunk pertains to
10565 * @sax: the SAX handler bloc (possibly NULL)
10566 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10567 * @depth: Used for loop detection, use 0
10568 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010569 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010570 *
10571 * Parse a well-balanced chunk of an XML document
10572 * called by the parser
10573 * The allowed sequence for the Well Balanced Chunk is the one defined by
10574 * the content production in the XML grammar:
10575 *
10576 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10577 *
10578 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10579 * the parser error code otherwise
10580 */
10581
10582int
10583xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010584 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010585 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10586 depth, string, lst, 0 );
10587}
Daniel Veillard81273902003-09-30 00:43:48 +000010588#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010589
10590/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010591 * xmlParseBalancedChunkMemoryInternal:
10592 * @oldctxt: the existing parsing context
10593 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10594 * @user_data: the user data field for the parser context
10595 * @lst: the return value for the set of parsed nodes
10596 *
10597 *
10598 * Parse a well-balanced chunk of an XML document
10599 * called by the parser
10600 * The allowed sequence for the Well Balanced Chunk is the one defined by
10601 * the content production in the XML grammar:
10602 *
10603 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10604 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010605 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10606 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010607 *
10608 * In case recover is set to 1, the nodelist will not be empty even if
10609 * the parsed chunk is not well balanced.
10610 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010611static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010612xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10613 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10614 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010615 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010616 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010617 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010618 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010619 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010620
10621 if (oldctxt->depth > 40) {
10622 return(XML_ERR_ENTITY_LOOP);
10623 }
10624
10625
10626 if (lst != NULL)
10627 *lst = NULL;
10628 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010629 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010630
10631 size = xmlStrlen(string);
10632
10633 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010634 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010635 if (user_data != NULL)
10636 ctxt->userData = user_data;
10637 else
10638 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010639 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10640 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010641 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10642 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10643 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010644
10645 oldsax = ctxt->sax;
10646 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010647 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010648 ctxt->replaceEntities = oldctxt->replaceEntities;
10649 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010650
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010651 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010652 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010653 newDoc = xmlNewDoc(BAD_CAST "1.0");
10654 if (newDoc == NULL) {
10655 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010656 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010657 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010658 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010659 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010660 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010661 } else {
10662 ctxt->myDoc = oldctxt->myDoc;
10663 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010664 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010665 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010666 BAD_CAST "pseudoroot", NULL);
10667 if (ctxt->myDoc->children == NULL) {
10668 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010669 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010670 xmlFreeParserCtxt(ctxt);
10671 if (newDoc != NULL)
10672 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010673 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010674 }
10675 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010676 ctxt->instate = XML_PARSER_CONTENT;
10677 ctxt->depth = oldctxt->depth + 1;
10678
Daniel Veillard328f48c2002-11-15 15:24:34 +000010679 ctxt->validate = 0;
10680 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010681 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10682 /*
10683 * ID/IDREF registration will be done in xmlValidateElement below
10684 */
10685 ctxt->loadsubset |= XML_SKIP_IDS;
10686 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010687 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010688 ctxt->attsDefault = oldctxt->attsDefault;
10689 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010690
Daniel Veillard68e9e742002-11-16 15:35:11 +000010691 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010692 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010693 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010694 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010695 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010696 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010697 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010698 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010699 }
10700
10701 if (!ctxt->wellFormed) {
10702 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010703 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010704 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010705 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010706 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010707 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010708 }
10709
William M. Brack7b9154b2003-09-27 19:23:50 +000010710 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010711 xmlNodePtr cur;
10712
10713 /*
10714 * Return the newly created nodeset after unlinking it from
10715 * they pseudo parent.
10716 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010717 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010718 *lst = cur;
10719 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010720#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010721 if (oldctxt->validate && oldctxt->wellFormed &&
10722 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10723 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10724 oldctxt->myDoc, cur);
10725 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010726#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010727 cur->parent = NULL;
10728 cur = cur->next;
10729 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010730 ctxt->myDoc->children->children = NULL;
10731 }
10732 if (ctxt->myDoc != NULL) {
10733 xmlFreeNode(ctxt->myDoc->children);
10734 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010735 }
10736
10737 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010738 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010739 ctxt->attsDefault = NULL;
10740 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010741 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010742 if (newDoc != NULL)
10743 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010744
10745 return(ret);
10746}
10747
Daniel Veillard81273902003-09-30 00:43:48 +000010748#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010749/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010750 * xmlParseBalancedChunkMemoryRecover:
10751 * @doc: the document the chunk pertains to
10752 * @sax: the SAX handler bloc (possibly NULL)
10753 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10754 * @depth: Used for loop detection, use 0
10755 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10756 * @lst: the return value for the set of parsed nodes
10757 * @recover: return nodes even if the data is broken (use 0)
10758 *
10759 *
10760 * Parse a well-balanced chunk of an XML document
10761 * called by the parser
10762 * The allowed sequence for the Well Balanced Chunk is the one defined by
10763 * the content production in the XML grammar:
10764 *
10765 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10766 *
10767 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10768 * the parser error code otherwise
10769 *
10770 * In case recover is set to 1, the nodelist will not be empty even if
10771 * the parsed chunk is not well balanced.
10772 */
10773int
10774xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10775 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10776 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010777 xmlParserCtxtPtr ctxt;
10778 xmlDocPtr newDoc;
10779 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010780 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010781 int size;
10782 int ret = 0;
10783
10784 if (depth > 40) {
10785 return(XML_ERR_ENTITY_LOOP);
10786 }
10787
10788
Daniel Veillardcda96922001-08-21 10:56:31 +000010789 if (lst != NULL)
10790 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010791 if (string == NULL)
10792 return(-1);
10793
10794 size = xmlStrlen(string);
10795
10796 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10797 if (ctxt == NULL) return(-1);
10798 ctxt->userData = ctxt;
10799 if (sax != NULL) {
10800 oldsax = ctxt->sax;
10801 ctxt->sax = sax;
10802 if (user_data != NULL)
10803 ctxt->userData = user_data;
10804 }
10805 newDoc = xmlNewDoc(BAD_CAST "1.0");
10806 if (newDoc == NULL) {
10807 xmlFreeParserCtxt(ctxt);
10808 return(-1);
10809 }
10810 if (doc != NULL) {
10811 newDoc->intSubset = doc->intSubset;
10812 newDoc->extSubset = doc->extSubset;
10813 }
10814 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10815 if (newDoc->children == NULL) {
10816 if (sax != NULL)
10817 ctxt->sax = oldsax;
10818 xmlFreeParserCtxt(ctxt);
10819 newDoc->intSubset = NULL;
10820 newDoc->extSubset = NULL;
10821 xmlFreeDoc(newDoc);
10822 return(-1);
10823 }
10824 nodePush(ctxt, newDoc->children);
10825 if (doc == NULL) {
10826 ctxt->myDoc = newDoc;
10827 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010828 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010829 newDoc->children->doc = doc;
10830 }
10831 ctxt->instate = XML_PARSER_CONTENT;
10832 ctxt->depth = depth;
10833
10834 /*
10835 * Doing validity checking on chunk doesn't make sense
10836 */
10837 ctxt->validate = 0;
10838 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010839 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010840
Daniel Veillardb39bc392002-10-26 19:29:51 +000010841 if ( doc != NULL ){
10842 content = doc->children;
10843 doc->children = NULL;
10844 xmlParseContent(ctxt);
10845 doc->children = content;
10846 }
10847 else {
10848 xmlParseContent(ctxt);
10849 }
Owen Taylor3473f882001-02-23 17:55:21 +000010850 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010851 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010852 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010853 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010854 }
10855 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010856 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010857 }
10858
10859 if (!ctxt->wellFormed) {
10860 if (ctxt->errNo == 0)
10861 ret = 1;
10862 else
10863 ret = ctxt->errNo;
10864 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010865 ret = 0;
10866 }
10867
10868 if (lst != NULL && (ret == 0 || recover == 1)) {
10869 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010870
10871 /*
10872 * Return the newly created nodeset after unlinking it from
10873 * they pseudo parent.
10874 */
10875 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010876 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010877 while (cur != NULL) {
10878 cur->parent = NULL;
10879 cur = cur->next;
10880 }
10881 newDoc->children->children = NULL;
10882 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010883
Owen Taylor3473f882001-02-23 17:55:21 +000010884 if (sax != NULL)
10885 ctxt->sax = oldsax;
10886 xmlFreeParserCtxt(ctxt);
10887 newDoc->intSubset = NULL;
10888 newDoc->extSubset = NULL;
10889 xmlFreeDoc(newDoc);
10890
10891 return(ret);
10892}
10893
10894/**
10895 * xmlSAXParseEntity:
10896 * @sax: the SAX handler block
10897 * @filename: the filename
10898 *
10899 * parse an XML external entity out of context and build a tree.
10900 * It use the given SAX function block to handle the parsing callback.
10901 * If sax is NULL, fallback to the default DOM tree building routines.
10902 *
10903 * [78] extParsedEnt ::= TextDecl? content
10904 *
10905 * This correspond to a "Well Balanced" chunk
10906 *
10907 * Returns the resulting document tree
10908 */
10909
10910xmlDocPtr
10911xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10912 xmlDocPtr ret;
10913 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010914
10915 ctxt = xmlCreateFileParserCtxt(filename);
10916 if (ctxt == NULL) {
10917 return(NULL);
10918 }
10919 if (sax != NULL) {
10920 if (ctxt->sax != NULL)
10921 xmlFree(ctxt->sax);
10922 ctxt->sax = sax;
10923 ctxt->userData = NULL;
10924 }
10925
Owen Taylor3473f882001-02-23 17:55:21 +000010926 xmlParseExtParsedEnt(ctxt);
10927
10928 if (ctxt->wellFormed)
10929 ret = ctxt->myDoc;
10930 else {
10931 ret = NULL;
10932 xmlFreeDoc(ctxt->myDoc);
10933 ctxt->myDoc = NULL;
10934 }
10935 if (sax != NULL)
10936 ctxt->sax = NULL;
10937 xmlFreeParserCtxt(ctxt);
10938
10939 return(ret);
10940}
10941
10942/**
10943 * xmlParseEntity:
10944 * @filename: the filename
10945 *
10946 * parse an XML external entity out of context and build a tree.
10947 *
10948 * [78] extParsedEnt ::= TextDecl? content
10949 *
10950 * This correspond to a "Well Balanced" chunk
10951 *
10952 * Returns the resulting document tree
10953 */
10954
10955xmlDocPtr
10956xmlParseEntity(const char *filename) {
10957 return(xmlSAXParseEntity(NULL, filename));
10958}
Daniel Veillard81273902003-09-30 00:43:48 +000010959#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010960
10961/**
10962 * xmlCreateEntityParserCtxt:
10963 * @URL: the entity URL
10964 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010965 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010966 *
10967 * Create a parser context for an external entity
10968 * Automatic support for ZLIB/Compress compressed document is provided
10969 * by default if found at compile-time.
10970 *
10971 * Returns the new parser context or NULL
10972 */
10973xmlParserCtxtPtr
10974xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10975 const xmlChar *base) {
10976 xmlParserCtxtPtr ctxt;
10977 xmlParserInputPtr inputStream;
10978 char *directory = NULL;
10979 xmlChar *uri;
10980
10981 ctxt = xmlNewParserCtxt();
10982 if (ctxt == NULL) {
10983 return(NULL);
10984 }
10985
10986 uri = xmlBuildURI(URL, base);
10987
10988 if (uri == NULL) {
10989 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10990 if (inputStream == NULL) {
10991 xmlFreeParserCtxt(ctxt);
10992 return(NULL);
10993 }
10994
10995 inputPush(ctxt, inputStream);
10996
10997 if ((ctxt->directory == NULL) && (directory == NULL))
10998 directory = xmlParserGetDirectory((char *)URL);
10999 if ((ctxt->directory == NULL) && (directory != NULL))
11000 ctxt->directory = directory;
11001 } else {
11002 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11003 if (inputStream == NULL) {
11004 xmlFree(uri);
11005 xmlFreeParserCtxt(ctxt);
11006 return(NULL);
11007 }
11008
11009 inputPush(ctxt, inputStream);
11010
11011 if ((ctxt->directory == NULL) && (directory == NULL))
11012 directory = xmlParserGetDirectory((char *)uri);
11013 if ((ctxt->directory == NULL) && (directory != NULL))
11014 ctxt->directory = directory;
11015 xmlFree(uri);
11016 }
Owen Taylor3473f882001-02-23 17:55:21 +000011017 return(ctxt);
11018}
11019
11020/************************************************************************
11021 * *
11022 * Front ends when parsing from a file *
11023 * *
11024 ************************************************************************/
11025
11026/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011027 * xmlCreateURLParserCtxt:
11028 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011029 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011030 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011031 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011032 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011033 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011034 *
11035 * Returns the new parser context or NULL
11036 */
11037xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011038xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011039{
11040 xmlParserCtxtPtr ctxt;
11041 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011042 char *directory = NULL;
11043
Owen Taylor3473f882001-02-23 17:55:21 +000011044 ctxt = xmlNewParserCtxt();
11045 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011046 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011047 return(NULL);
11048 }
11049
Daniel Veillard61b93382003-11-03 14:28:31 +000011050 if (options != 0)
11051 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011052
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011053 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011054 if (inputStream == NULL) {
11055 xmlFreeParserCtxt(ctxt);
11056 return(NULL);
11057 }
11058
Owen Taylor3473f882001-02-23 17:55:21 +000011059 inputPush(ctxt, inputStream);
11060 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011061 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011062 if ((ctxt->directory == NULL) && (directory != NULL))
11063 ctxt->directory = directory;
11064
11065 return(ctxt);
11066}
11067
Daniel Veillard61b93382003-11-03 14:28:31 +000011068/**
11069 * xmlCreateFileParserCtxt:
11070 * @filename: the filename
11071 *
11072 * Create a parser context for a file content.
11073 * Automatic support for ZLIB/Compress compressed document is provided
11074 * by default if found at compile-time.
11075 *
11076 * Returns the new parser context or NULL
11077 */
11078xmlParserCtxtPtr
11079xmlCreateFileParserCtxt(const char *filename)
11080{
11081 return(xmlCreateURLParserCtxt(filename, 0));
11082}
11083
Daniel Veillard81273902003-09-30 00:43:48 +000011084#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011085/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011086 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011087 * @sax: the SAX handler block
11088 * @filename: the filename
11089 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11090 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011091 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011092 *
11093 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11094 * compressed document is provided by default if found at compile-time.
11095 * It use the given SAX function block to handle the parsing callback.
11096 * If sax is NULL, fallback to the default DOM tree building routines.
11097 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011098 * User data (void *) is stored within the parser context in the
11099 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011100 *
Owen Taylor3473f882001-02-23 17:55:21 +000011101 * Returns the resulting document tree
11102 */
11103
11104xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011105xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11106 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011107 xmlDocPtr ret;
11108 xmlParserCtxtPtr ctxt;
11109 char *directory = NULL;
11110
Daniel Veillard635ef722001-10-29 11:48:19 +000011111 xmlInitParser();
11112
Owen Taylor3473f882001-02-23 17:55:21 +000011113 ctxt = xmlCreateFileParserCtxt(filename);
11114 if (ctxt == NULL) {
11115 return(NULL);
11116 }
11117 if (sax != NULL) {
11118 if (ctxt->sax != NULL)
11119 xmlFree(ctxt->sax);
11120 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011121 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011122 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011123 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011124 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011125 }
Owen Taylor3473f882001-02-23 17:55:21 +000011126
11127 if ((ctxt->directory == NULL) && (directory == NULL))
11128 directory = xmlParserGetDirectory(filename);
11129 if ((ctxt->directory == NULL) && (directory != NULL))
11130 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11131
Daniel Veillarddad3f682002-11-17 16:47:27 +000011132 ctxt->recovery = recovery;
11133
Owen Taylor3473f882001-02-23 17:55:21 +000011134 xmlParseDocument(ctxt);
11135
William M. Brackc07329e2003-09-08 01:57:30 +000011136 if ((ctxt->wellFormed) || recovery) {
11137 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011138 if (ret != NULL) {
11139 if (ctxt->input->buf->compressed > 0)
11140 ret->compression = 9;
11141 else
11142 ret->compression = ctxt->input->buf->compressed;
11143 }
William M. Brackc07329e2003-09-08 01:57:30 +000011144 }
Owen Taylor3473f882001-02-23 17:55:21 +000011145 else {
11146 ret = NULL;
11147 xmlFreeDoc(ctxt->myDoc);
11148 ctxt->myDoc = NULL;
11149 }
11150 if (sax != NULL)
11151 ctxt->sax = NULL;
11152 xmlFreeParserCtxt(ctxt);
11153
11154 return(ret);
11155}
11156
11157/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011158 * xmlSAXParseFile:
11159 * @sax: the SAX handler block
11160 * @filename: the filename
11161 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11162 * documents
11163 *
11164 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11165 * compressed document is provided by default if found at compile-time.
11166 * It use the given SAX function block to handle the parsing callback.
11167 * If sax is NULL, fallback to the default DOM tree building routines.
11168 *
11169 * Returns the resulting document tree
11170 */
11171
11172xmlDocPtr
11173xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11174 int recovery) {
11175 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11176}
11177
11178/**
Owen Taylor3473f882001-02-23 17:55:21 +000011179 * xmlRecoverDoc:
11180 * @cur: a pointer to an array of xmlChar
11181 *
11182 * parse an XML in-memory document and build a tree.
11183 * In the case the document is not Well Formed, a tree is built anyway
11184 *
11185 * Returns the resulting document tree
11186 */
11187
11188xmlDocPtr
11189xmlRecoverDoc(xmlChar *cur) {
11190 return(xmlSAXParseDoc(NULL, cur, 1));
11191}
11192
11193/**
11194 * xmlParseFile:
11195 * @filename: the filename
11196 *
11197 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11198 * compressed document is provided by default if found at compile-time.
11199 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011200 * Returns the resulting document tree if the file was wellformed,
11201 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011202 */
11203
11204xmlDocPtr
11205xmlParseFile(const char *filename) {
11206 return(xmlSAXParseFile(NULL, filename, 0));
11207}
11208
11209/**
11210 * xmlRecoverFile:
11211 * @filename: the filename
11212 *
11213 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11214 * compressed document is provided by default if found at compile-time.
11215 * In the case the document is not Well Formed, a tree is built anyway
11216 *
11217 * Returns the resulting document tree
11218 */
11219
11220xmlDocPtr
11221xmlRecoverFile(const char *filename) {
11222 return(xmlSAXParseFile(NULL, filename, 1));
11223}
11224
11225
11226/**
11227 * xmlSetupParserForBuffer:
11228 * @ctxt: an XML parser context
11229 * @buffer: a xmlChar * buffer
11230 * @filename: a file name
11231 *
11232 * Setup the parser context to parse a new buffer; Clears any prior
11233 * contents from the parser context. The buffer parameter must not be
11234 * NULL, but the filename parameter can be
11235 */
11236void
11237xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11238 const char* filename)
11239{
11240 xmlParserInputPtr input;
11241
11242 input = xmlNewInputStream(ctxt);
11243 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011244 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011245 xmlFree(ctxt);
11246 return;
11247 }
11248
11249 xmlClearParserCtxt(ctxt);
11250 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011251 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011252 input->base = buffer;
11253 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011254 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011255 inputPush(ctxt, input);
11256}
11257
11258/**
11259 * xmlSAXUserParseFile:
11260 * @sax: a SAX handler
11261 * @user_data: The user data returned on SAX callbacks
11262 * @filename: a file name
11263 *
11264 * parse an XML file and call the given SAX handler routines.
11265 * Automatic support for ZLIB/Compress compressed document is provided
11266 *
11267 * Returns 0 in case of success or a error number otherwise
11268 */
11269int
11270xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11271 const char *filename) {
11272 int ret = 0;
11273 xmlParserCtxtPtr ctxt;
11274
11275 ctxt = xmlCreateFileParserCtxt(filename);
11276 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011277#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011278 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011279#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011280 xmlFree(ctxt->sax);
11281 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011282 xmlDetectSAX2(ctxt);
11283
Owen Taylor3473f882001-02-23 17:55:21 +000011284 if (user_data != NULL)
11285 ctxt->userData = user_data;
11286
11287 xmlParseDocument(ctxt);
11288
11289 if (ctxt->wellFormed)
11290 ret = 0;
11291 else {
11292 if (ctxt->errNo != 0)
11293 ret = ctxt->errNo;
11294 else
11295 ret = -1;
11296 }
11297 if (sax != NULL)
11298 ctxt->sax = NULL;
11299 xmlFreeParserCtxt(ctxt);
11300
11301 return ret;
11302}
Daniel Veillard81273902003-09-30 00:43:48 +000011303#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011304
11305/************************************************************************
11306 * *
11307 * Front ends when parsing from memory *
11308 * *
11309 ************************************************************************/
11310
11311/**
11312 * xmlCreateMemoryParserCtxt:
11313 * @buffer: a pointer to a char array
11314 * @size: the size of the array
11315 *
11316 * Create a parser context for an XML in-memory document.
11317 *
11318 * Returns the new parser context or NULL
11319 */
11320xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011321xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011322 xmlParserCtxtPtr ctxt;
11323 xmlParserInputPtr input;
11324 xmlParserInputBufferPtr buf;
11325
11326 if (buffer == NULL)
11327 return(NULL);
11328 if (size <= 0)
11329 return(NULL);
11330
11331 ctxt = xmlNewParserCtxt();
11332 if (ctxt == NULL)
11333 return(NULL);
11334
Daniel Veillard53350552003-09-18 13:35:51 +000011335 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011336 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011337 if (buf == NULL) {
11338 xmlFreeParserCtxt(ctxt);
11339 return(NULL);
11340 }
Owen Taylor3473f882001-02-23 17:55:21 +000011341
11342 input = xmlNewInputStream(ctxt);
11343 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011344 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011345 xmlFreeParserCtxt(ctxt);
11346 return(NULL);
11347 }
11348
11349 input->filename = NULL;
11350 input->buf = buf;
11351 input->base = input->buf->buffer->content;
11352 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011353 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011354
11355 inputPush(ctxt, input);
11356 return(ctxt);
11357}
11358
Daniel Veillard81273902003-09-30 00:43:48 +000011359#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011360/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011361 * xmlSAXParseMemoryWithData:
11362 * @sax: the SAX handler block
11363 * @buffer: an pointer to a char array
11364 * @size: the size of the array
11365 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11366 * documents
11367 * @data: the userdata
11368 *
11369 * parse an XML in-memory block and use the given SAX function block
11370 * to handle the parsing callback. If sax is NULL, fallback to the default
11371 * DOM tree building routines.
11372 *
11373 * User data (void *) is stored within the parser context in the
11374 * context's _private member, so it is available nearly everywhere in libxml
11375 *
11376 * Returns the resulting document tree
11377 */
11378
11379xmlDocPtr
11380xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11381 int size, int recovery, void *data) {
11382 xmlDocPtr ret;
11383 xmlParserCtxtPtr ctxt;
11384
11385 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11386 if (ctxt == NULL) return(NULL);
11387 if (sax != NULL) {
11388 if (ctxt->sax != NULL)
11389 xmlFree(ctxt->sax);
11390 ctxt->sax = sax;
11391 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011392 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011393 if (data!=NULL) {
11394 ctxt->_private=data;
11395 }
11396
Daniel Veillardadba5f12003-04-04 16:09:01 +000011397 ctxt->recovery = recovery;
11398
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011399 xmlParseDocument(ctxt);
11400
11401 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11402 else {
11403 ret = NULL;
11404 xmlFreeDoc(ctxt->myDoc);
11405 ctxt->myDoc = NULL;
11406 }
11407 if (sax != NULL)
11408 ctxt->sax = NULL;
11409 xmlFreeParserCtxt(ctxt);
11410
11411 return(ret);
11412}
11413
11414/**
Owen Taylor3473f882001-02-23 17:55:21 +000011415 * xmlSAXParseMemory:
11416 * @sax: the SAX handler block
11417 * @buffer: an pointer to a char array
11418 * @size: the size of the array
11419 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11420 * documents
11421 *
11422 * parse an XML in-memory block and use the given SAX function block
11423 * to handle the parsing callback. If sax is NULL, fallback to the default
11424 * DOM tree building routines.
11425 *
11426 * Returns the resulting document tree
11427 */
11428xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011429xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11430 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011431 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011432}
11433
11434/**
11435 * xmlParseMemory:
11436 * @buffer: an pointer to a char array
11437 * @size: the size of the array
11438 *
11439 * parse an XML in-memory block and build a tree.
11440 *
11441 * Returns the resulting document tree
11442 */
11443
Daniel Veillard50822cb2001-07-26 20:05:51 +000011444xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011445 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11446}
11447
11448/**
11449 * xmlRecoverMemory:
11450 * @buffer: an pointer to a char array
11451 * @size: the size of the array
11452 *
11453 * parse an XML in-memory block and build a tree.
11454 * In the case the document is not Well Formed, a tree is built anyway
11455 *
11456 * Returns the resulting document tree
11457 */
11458
Daniel Veillard50822cb2001-07-26 20:05:51 +000011459xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011460 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11461}
11462
11463/**
11464 * xmlSAXUserParseMemory:
11465 * @sax: a SAX handler
11466 * @user_data: The user data returned on SAX callbacks
11467 * @buffer: an in-memory XML document input
11468 * @size: the length of the XML document in bytes
11469 *
11470 * A better SAX parsing routine.
11471 * parse an XML in-memory buffer and call the given SAX handler routines.
11472 *
11473 * Returns 0 in case of success or a error number otherwise
11474 */
11475int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011476 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011477 int ret = 0;
11478 xmlParserCtxtPtr ctxt;
11479 xmlSAXHandlerPtr oldsax = NULL;
11480
Daniel Veillard9e923512002-08-14 08:48:52 +000011481 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011482 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11483 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011484 oldsax = ctxt->sax;
11485 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011486 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011487 if (user_data != NULL)
11488 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011489
11490 xmlParseDocument(ctxt);
11491
11492 if (ctxt->wellFormed)
11493 ret = 0;
11494 else {
11495 if (ctxt->errNo != 0)
11496 ret = ctxt->errNo;
11497 else
11498 ret = -1;
11499 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011500 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011501 xmlFreeParserCtxt(ctxt);
11502
11503 return ret;
11504}
Daniel Veillard81273902003-09-30 00:43:48 +000011505#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011506
11507/**
11508 * xmlCreateDocParserCtxt:
11509 * @cur: a pointer to an array of xmlChar
11510 *
11511 * Creates a parser context for an XML in-memory document.
11512 *
11513 * Returns the new parser context or NULL
11514 */
11515xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011516xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011517 int len;
11518
11519 if (cur == NULL)
11520 return(NULL);
11521 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011522 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011523}
11524
Daniel Veillard81273902003-09-30 00:43:48 +000011525#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011526/**
11527 * xmlSAXParseDoc:
11528 * @sax: the SAX handler block
11529 * @cur: a pointer to an array of xmlChar
11530 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11531 * documents
11532 *
11533 * parse an XML in-memory document and build a tree.
11534 * It use the given SAX function block to handle the parsing callback.
11535 * If sax is NULL, fallback to the default DOM tree building routines.
11536 *
11537 * Returns the resulting document tree
11538 */
11539
11540xmlDocPtr
11541xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11542 xmlDocPtr ret;
11543 xmlParserCtxtPtr ctxt;
11544
11545 if (cur == NULL) return(NULL);
11546
11547
11548 ctxt = xmlCreateDocParserCtxt(cur);
11549 if (ctxt == NULL) return(NULL);
11550 if (sax != NULL) {
11551 ctxt->sax = sax;
11552 ctxt->userData = NULL;
11553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011554 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011555
11556 xmlParseDocument(ctxt);
11557 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11558 else {
11559 ret = NULL;
11560 xmlFreeDoc(ctxt->myDoc);
11561 ctxt->myDoc = NULL;
11562 }
11563 if (sax != NULL)
11564 ctxt->sax = NULL;
11565 xmlFreeParserCtxt(ctxt);
11566
11567 return(ret);
11568}
11569
11570/**
11571 * xmlParseDoc:
11572 * @cur: a pointer to an array of xmlChar
11573 *
11574 * parse an XML in-memory document and build a tree.
11575 *
11576 * Returns the resulting document tree
11577 */
11578
11579xmlDocPtr
11580xmlParseDoc(xmlChar *cur) {
11581 return(xmlSAXParseDoc(NULL, cur, 0));
11582}
Daniel Veillard81273902003-09-30 00:43:48 +000011583#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011584
Daniel Veillard81273902003-09-30 00:43:48 +000011585#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011586/************************************************************************
11587 * *
11588 * Specific function to keep track of entities references *
11589 * and used by the XSLT debugger *
11590 * *
11591 ************************************************************************/
11592
11593static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11594
11595/**
11596 * xmlAddEntityReference:
11597 * @ent : A valid entity
11598 * @firstNode : A valid first node for children of entity
11599 * @lastNode : A valid last node of children entity
11600 *
11601 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11602 */
11603static void
11604xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11605 xmlNodePtr lastNode)
11606{
11607 if (xmlEntityRefFunc != NULL) {
11608 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11609 }
11610}
11611
11612
11613/**
11614 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011615 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011616 *
11617 * Set the function to call call back when a xml reference has been made
11618 */
11619void
11620xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11621{
11622 xmlEntityRefFunc = func;
11623}
Daniel Veillard81273902003-09-30 00:43:48 +000011624#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011625
11626/************************************************************************
11627 * *
11628 * Miscellaneous *
11629 * *
11630 ************************************************************************/
11631
11632#ifdef LIBXML_XPATH_ENABLED
11633#include <libxml/xpath.h>
11634#endif
11635
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011636extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011637static int xmlParserInitialized = 0;
11638
11639/**
11640 * xmlInitParser:
11641 *
11642 * Initialization function for the XML parser.
11643 * This is not reentrant. Call once before processing in case of
11644 * use in multithreaded programs.
11645 */
11646
11647void
11648xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011649 if (xmlParserInitialized != 0)
11650 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011651
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011652 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11653 (xmlGenericError == NULL))
11654 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011655 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011656 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011657 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011658 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011659 xmlDefaultSAXHandlerInit();
11660 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011661#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011662 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011663#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011664#ifdef LIBXML_HTML_ENABLED
11665 htmlInitAutoClose();
11666 htmlDefaultSAXHandlerInit();
11667#endif
11668#ifdef LIBXML_XPATH_ENABLED
11669 xmlXPathInit();
11670#endif
11671 xmlParserInitialized = 1;
11672}
11673
11674/**
11675 * xmlCleanupParser:
11676 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011677 * Cleanup function for the XML library. It tries to reclaim all
11678 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011679 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011680 * function should not prevent reusing the library but one should
11681 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011682 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011683 */
11684
11685void
11686xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011687 if (!xmlParserInitialized)
11688 return;
11689
Owen Taylor3473f882001-02-23 17:55:21 +000011690 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011691#ifdef LIBXML_CATALOG_ENABLED
11692 xmlCatalogCleanup();
11693#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011694 xmlCleanupInputCallbacks();
11695#ifdef LIBXML_OUTPUT_ENABLED
11696 xmlCleanupOutputCallbacks();
11697#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011698 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011699 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011700 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011701 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011702 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011703}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011704
11705/************************************************************************
11706 * *
11707 * New set (2.6.0) of simpler and more flexible APIs *
11708 * *
11709 ************************************************************************/
11710
11711/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011712 * DICT_FREE:
11713 * @str: a string
11714 *
11715 * Free a string if it is not owned by the "dict" dictionnary in the
11716 * current scope
11717 */
11718#define DICT_FREE(str) \
11719 if ((str) && ((!dict) || \
11720 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11721 xmlFree((char *)(str));
11722
11723/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011724 * xmlCtxtReset:
11725 * @ctxt: an XML parser context
11726 *
11727 * Reset a parser context
11728 */
11729void
11730xmlCtxtReset(xmlParserCtxtPtr ctxt)
11731{
11732 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011733 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011734
11735 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11736 xmlFreeInputStream(input);
11737 }
11738 ctxt->inputNr = 0;
11739 ctxt->input = NULL;
11740
11741 ctxt->spaceNr = 0;
11742 ctxt->spaceTab[0] = -1;
11743 ctxt->space = &ctxt->spaceTab[0];
11744
11745
11746 ctxt->nodeNr = 0;
11747 ctxt->node = NULL;
11748
11749 ctxt->nameNr = 0;
11750 ctxt->name = NULL;
11751
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011752 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011753 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011754 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011755 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011756 DICT_FREE(ctxt->directory);
11757 ctxt->directory = NULL;
11758 DICT_FREE(ctxt->extSubURI);
11759 ctxt->extSubURI = NULL;
11760 DICT_FREE(ctxt->extSubSystem);
11761 ctxt->extSubSystem = NULL;
11762 if (ctxt->myDoc != NULL)
11763 xmlFreeDoc(ctxt->myDoc);
11764 ctxt->myDoc = NULL;
11765
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011766 ctxt->standalone = -1;
11767 ctxt->hasExternalSubset = 0;
11768 ctxt->hasPErefs = 0;
11769 ctxt->html = 0;
11770 ctxt->external = 0;
11771 ctxt->instate = XML_PARSER_START;
11772 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011773
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011774 ctxt->wellFormed = 1;
11775 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011776 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011777 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011778#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011779 ctxt->vctxt.userData = ctxt;
11780 ctxt->vctxt.error = xmlParserValidityError;
11781 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011782#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011783 ctxt->record_info = 0;
11784 ctxt->nbChars = 0;
11785 ctxt->checkIndex = 0;
11786 ctxt->inSubset = 0;
11787 ctxt->errNo = XML_ERR_OK;
11788 ctxt->depth = 0;
11789 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11790 ctxt->catalogs = NULL;
11791 xmlInitNodeInfoSeq(&ctxt->node_seq);
11792
11793 if (ctxt->attsDefault != NULL) {
11794 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11795 ctxt->attsDefault = NULL;
11796 }
11797 if (ctxt->attsSpecial != NULL) {
11798 xmlHashFree(ctxt->attsSpecial, NULL);
11799 ctxt->attsSpecial = NULL;
11800 }
11801
Daniel Veillard4432df22003-09-28 18:58:27 +000011802#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011803 if (ctxt->catalogs != NULL)
11804 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011805#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011806 if (ctxt->lastError.code != XML_ERR_OK)
11807 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011808}
11809
11810/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011811 * xmlCtxtResetPush:
11812 * @ctxt: an XML parser context
11813 * @chunk: a pointer to an array of chars
11814 * @size: number of chars in the array
11815 * @filename: an optional file name or URI
11816 * @encoding: the document encoding, or NULL
11817 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011818 * Reset a push parser context
11819 *
11820 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011821 */
11822int
11823xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11824 int size, const char *filename, const char *encoding)
11825{
11826 xmlParserInputPtr inputStream;
11827 xmlParserInputBufferPtr buf;
11828 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11829
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011830 if (ctxt == NULL)
11831 return(1);
11832
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011833 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11834 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11835
11836 buf = xmlAllocParserInputBuffer(enc);
11837 if (buf == NULL)
11838 return(1);
11839
11840 if (ctxt == NULL) {
11841 xmlFreeParserInputBuffer(buf);
11842 return(1);
11843 }
11844
11845 xmlCtxtReset(ctxt);
11846
11847 if (ctxt->pushTab == NULL) {
11848 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11849 sizeof(xmlChar *));
11850 if (ctxt->pushTab == NULL) {
11851 xmlErrMemory(ctxt, NULL);
11852 xmlFreeParserInputBuffer(buf);
11853 return(1);
11854 }
11855 }
11856
11857 if (filename == NULL) {
11858 ctxt->directory = NULL;
11859 } else {
11860 ctxt->directory = xmlParserGetDirectory(filename);
11861 }
11862
11863 inputStream = xmlNewInputStream(ctxt);
11864 if (inputStream == NULL) {
11865 xmlFreeParserInputBuffer(buf);
11866 return(1);
11867 }
11868
11869 if (filename == NULL)
11870 inputStream->filename = NULL;
11871 else
11872 inputStream->filename = (char *)
11873 xmlCanonicPath((const xmlChar *) filename);
11874 inputStream->buf = buf;
11875 inputStream->base = inputStream->buf->buffer->content;
11876 inputStream->cur = inputStream->buf->buffer->content;
11877 inputStream->end =
11878 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11879
11880 inputPush(ctxt, inputStream);
11881
11882 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11883 (ctxt->input->buf != NULL)) {
11884 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11885 int cur = ctxt->input->cur - ctxt->input->base;
11886
11887 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11888
11889 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11890 ctxt->input->cur = ctxt->input->base + cur;
11891 ctxt->input->end =
11892 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11893 use];
11894#ifdef DEBUG_PUSH
11895 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11896#endif
11897 }
11898
11899 if (encoding != NULL) {
11900 xmlCharEncodingHandlerPtr hdlr;
11901
11902 hdlr = xmlFindCharEncodingHandler(encoding);
11903 if (hdlr != NULL) {
11904 xmlSwitchToEncoding(ctxt, hdlr);
11905 } else {
11906 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11907 "Unsupported encoding %s\n", BAD_CAST encoding);
11908 }
11909 } else if (enc != XML_CHAR_ENCODING_NONE) {
11910 xmlSwitchEncoding(ctxt, enc);
11911 }
11912
11913 return(0);
11914}
11915
11916/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011917 * xmlCtxtUseOptions:
11918 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011919 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011920 *
11921 * Applies the options to the parser context
11922 *
11923 * Returns 0 in case of success, the set of unknown or unimplemented options
11924 * in case of error.
11925 */
11926int
11927xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11928{
11929 if (options & XML_PARSE_RECOVER) {
11930 ctxt->recovery = 1;
11931 options -= XML_PARSE_RECOVER;
11932 } else
11933 ctxt->recovery = 0;
11934 if (options & XML_PARSE_DTDLOAD) {
11935 ctxt->loadsubset = XML_DETECT_IDS;
11936 options -= XML_PARSE_DTDLOAD;
11937 } else
11938 ctxt->loadsubset = 0;
11939 if (options & XML_PARSE_DTDATTR) {
11940 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11941 options -= XML_PARSE_DTDATTR;
11942 }
11943 if (options & XML_PARSE_NOENT) {
11944 ctxt->replaceEntities = 1;
11945 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11946 options -= XML_PARSE_NOENT;
11947 } else
11948 ctxt->replaceEntities = 0;
11949 if (options & XML_PARSE_NOWARNING) {
11950 ctxt->sax->warning = NULL;
11951 options -= XML_PARSE_NOWARNING;
11952 }
11953 if (options & XML_PARSE_NOERROR) {
11954 ctxt->sax->error = NULL;
11955 ctxt->sax->fatalError = NULL;
11956 options -= XML_PARSE_NOERROR;
11957 }
11958 if (options & XML_PARSE_PEDANTIC) {
11959 ctxt->pedantic = 1;
11960 options -= XML_PARSE_PEDANTIC;
11961 } else
11962 ctxt->pedantic = 0;
11963 if (options & XML_PARSE_NOBLANKS) {
11964 ctxt->keepBlanks = 0;
11965 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11966 options -= XML_PARSE_NOBLANKS;
11967 } else
11968 ctxt->keepBlanks = 1;
11969 if (options & XML_PARSE_DTDVALID) {
11970 ctxt->validate = 1;
11971 if (options & XML_PARSE_NOWARNING)
11972 ctxt->vctxt.warning = NULL;
11973 if (options & XML_PARSE_NOERROR)
11974 ctxt->vctxt.error = NULL;
11975 options -= XML_PARSE_DTDVALID;
11976 } else
11977 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011978#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011979 if (options & XML_PARSE_SAX1) {
11980 ctxt->sax->startElement = xmlSAX2StartElement;
11981 ctxt->sax->endElement = xmlSAX2EndElement;
11982 ctxt->sax->startElementNs = NULL;
11983 ctxt->sax->endElementNs = NULL;
11984 ctxt->sax->initialized = 1;
11985 options -= XML_PARSE_SAX1;
11986 }
Daniel Veillard81273902003-09-30 00:43:48 +000011987#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011988 if (options & XML_PARSE_NODICT) {
11989 ctxt->dictNames = 0;
11990 options -= XML_PARSE_NODICT;
11991 } else {
11992 ctxt->dictNames = 1;
11993 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000011994 if (options & XML_PARSE_NOCDATA) {
11995 ctxt->sax->cdataBlock = NULL;
11996 options -= XML_PARSE_NOCDATA;
11997 }
11998 if (options & XML_PARSE_NSCLEAN) {
11999 ctxt->options |= XML_PARSE_NSCLEAN;
12000 options -= XML_PARSE_NSCLEAN;
12001 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012002 if (options & XML_PARSE_NONET) {
12003 ctxt->options |= XML_PARSE_NONET;
12004 options -= XML_PARSE_NONET;
12005 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012006 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012007 return (options);
12008}
12009
12010/**
12011 * xmlDoRead:
12012 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012013 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012014 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012015 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012016 * @reuse: keep the context for reuse
12017 *
12018 * Common front-end for the xmlRead functions
12019 *
12020 * Returns the resulting document tree or NULL
12021 */
12022static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012023xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12024 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012025{
12026 xmlDocPtr ret;
12027
12028 xmlCtxtUseOptions(ctxt, options);
12029 if (encoding != NULL) {
12030 xmlCharEncodingHandlerPtr hdlr;
12031
12032 hdlr = xmlFindCharEncodingHandler(encoding);
12033 if (hdlr != NULL)
12034 xmlSwitchToEncoding(ctxt, hdlr);
12035 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012036 if ((URL != NULL) && (ctxt->input != NULL) &&
12037 (ctxt->input->filename == NULL))
12038 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012039 xmlParseDocument(ctxt);
12040 if ((ctxt->wellFormed) || ctxt->recovery)
12041 ret = ctxt->myDoc;
12042 else {
12043 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012044 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012045 xmlFreeDoc(ctxt->myDoc);
12046 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012047 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012048 ctxt->myDoc = NULL;
12049 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012050 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012051 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052
12053 return (ret);
12054}
12055
12056/**
12057 * xmlReadDoc:
12058 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012059 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012060 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012061 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012062 *
12063 * parse an XML in-memory document and build a tree.
12064 *
12065 * Returns the resulting document tree
12066 */
12067xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012068xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012069{
12070 xmlParserCtxtPtr ctxt;
12071
12072 if (cur == NULL)
12073 return (NULL);
12074
12075 ctxt = xmlCreateDocParserCtxt(cur);
12076 if (ctxt == NULL)
12077 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012078 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012079}
12080
12081/**
12082 * xmlReadFile:
12083 * @filename: a file or URL
12084 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012085 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012086 *
12087 * parse an XML file from the filesystem or the network.
12088 *
12089 * Returns the resulting document tree
12090 */
12091xmlDocPtr
12092xmlReadFile(const char *filename, const char *encoding, int options)
12093{
12094 xmlParserCtxtPtr ctxt;
12095
Daniel Veillard61b93382003-11-03 14:28:31 +000012096 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012097 if (ctxt == NULL)
12098 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012099 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012100}
12101
12102/**
12103 * xmlReadMemory:
12104 * @buffer: a pointer to a char array
12105 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012106 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012107 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012108 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012109 *
12110 * parse an XML in-memory document and build a tree.
12111 *
12112 * Returns the resulting document tree
12113 */
12114xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012115xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012116{
12117 xmlParserCtxtPtr ctxt;
12118
12119 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12120 if (ctxt == NULL)
12121 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012122 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012123}
12124
12125/**
12126 * xmlReadFd:
12127 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012128 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012129 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012130 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012131 *
12132 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012133 * NOTE that the file descriptor will not be closed when the
12134 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012135 *
12136 * Returns the resulting document tree
12137 */
12138xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012139xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012140{
12141 xmlParserCtxtPtr ctxt;
12142 xmlParserInputBufferPtr input;
12143 xmlParserInputPtr stream;
12144
12145 if (fd < 0)
12146 return (NULL);
12147
12148 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12149 if (input == NULL)
12150 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012151 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012152 ctxt = xmlNewParserCtxt();
12153 if (ctxt == NULL) {
12154 xmlFreeParserInputBuffer(input);
12155 return (NULL);
12156 }
12157 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12158 if (stream == NULL) {
12159 xmlFreeParserInputBuffer(input);
12160 xmlFreeParserCtxt(ctxt);
12161 return (NULL);
12162 }
12163 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012164 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012165}
12166
12167/**
12168 * xmlReadIO:
12169 * @ioread: an I/O read function
12170 * @ioclose: an I/O close function
12171 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012172 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012173 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012174 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012175 *
12176 * parse an XML document from I/O functions and source and build a tree.
12177 *
12178 * Returns the resulting document tree
12179 */
12180xmlDocPtr
12181xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012182 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012183{
12184 xmlParserCtxtPtr ctxt;
12185 xmlParserInputBufferPtr input;
12186 xmlParserInputPtr stream;
12187
12188 if (ioread == NULL)
12189 return (NULL);
12190
12191 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12192 XML_CHAR_ENCODING_NONE);
12193 if (input == NULL)
12194 return (NULL);
12195 ctxt = xmlNewParserCtxt();
12196 if (ctxt == NULL) {
12197 xmlFreeParserInputBuffer(input);
12198 return (NULL);
12199 }
12200 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12201 if (stream == NULL) {
12202 xmlFreeParserInputBuffer(input);
12203 xmlFreeParserCtxt(ctxt);
12204 return (NULL);
12205 }
12206 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012207 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012208}
12209
12210/**
12211 * xmlCtxtReadDoc:
12212 * @ctxt: an XML parser context
12213 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012214 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012215 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012216 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012217 *
12218 * parse an XML in-memory document and build a tree.
12219 * This reuses the existing @ctxt parser context
12220 *
12221 * Returns the resulting document tree
12222 */
12223xmlDocPtr
12224xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012225 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012226{
12227 xmlParserInputPtr stream;
12228
12229 if (cur == NULL)
12230 return (NULL);
12231 if (ctxt == NULL)
12232 return (NULL);
12233
12234 xmlCtxtReset(ctxt);
12235
12236 stream = xmlNewStringInputStream(ctxt, cur);
12237 if (stream == NULL) {
12238 return (NULL);
12239 }
12240 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012241 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012242}
12243
12244/**
12245 * xmlCtxtReadFile:
12246 * @ctxt: an XML parser context
12247 * @filename: a file or URL
12248 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012249 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012250 *
12251 * parse an XML file from the filesystem or the network.
12252 * This reuses the existing @ctxt parser context
12253 *
12254 * Returns the resulting document tree
12255 */
12256xmlDocPtr
12257xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12258 const char *encoding, int options)
12259{
12260 xmlParserInputPtr stream;
12261
12262 if (filename == NULL)
12263 return (NULL);
12264 if (ctxt == NULL)
12265 return (NULL);
12266
12267 xmlCtxtReset(ctxt);
12268
12269 stream = xmlNewInputFromFile(ctxt, filename);
12270 if (stream == NULL) {
12271 return (NULL);
12272 }
12273 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012274 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012275}
12276
12277/**
12278 * xmlCtxtReadMemory:
12279 * @ctxt: an XML parser context
12280 * @buffer: a pointer to a char array
12281 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012282 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012283 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012284 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012285 *
12286 * parse an XML in-memory document and build a tree.
12287 * This reuses the existing @ctxt parser context
12288 *
12289 * Returns the resulting document tree
12290 */
12291xmlDocPtr
12292xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012293 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012294{
12295 xmlParserInputBufferPtr input;
12296 xmlParserInputPtr stream;
12297
12298 if (ctxt == NULL)
12299 return (NULL);
12300 if (buffer == NULL)
12301 return (NULL);
12302
12303 xmlCtxtReset(ctxt);
12304
12305 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12306 if (input == NULL) {
12307 return(NULL);
12308 }
12309
12310 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12311 if (stream == NULL) {
12312 xmlFreeParserInputBuffer(input);
12313 return(NULL);
12314 }
12315
12316 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012317 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012318}
12319
12320/**
12321 * xmlCtxtReadFd:
12322 * @ctxt: an XML parser context
12323 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012324 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012326 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012327 *
12328 * parse an XML from a file descriptor and build a tree.
12329 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012330 * NOTE that the file descriptor will not be closed when the
12331 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012332 *
12333 * Returns the resulting document tree
12334 */
12335xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012336xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12337 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012338{
12339 xmlParserInputBufferPtr input;
12340 xmlParserInputPtr stream;
12341
12342 if (fd < 0)
12343 return (NULL);
12344 if (ctxt == NULL)
12345 return (NULL);
12346
12347 xmlCtxtReset(ctxt);
12348
12349
12350 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12351 if (input == NULL)
12352 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012353 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012354 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12355 if (stream == NULL) {
12356 xmlFreeParserInputBuffer(input);
12357 return (NULL);
12358 }
12359 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012360 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012361}
12362
12363/**
12364 * xmlCtxtReadIO:
12365 * @ctxt: an XML parser context
12366 * @ioread: an I/O read function
12367 * @ioclose: an I/O close function
12368 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012369 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012370 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012371 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012372 *
12373 * parse an XML document from I/O functions and source and build a tree.
12374 * This reuses the existing @ctxt parser context
12375 *
12376 * Returns the resulting document tree
12377 */
12378xmlDocPtr
12379xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12380 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012381 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012382 const char *encoding, int options)
12383{
12384 xmlParserInputBufferPtr input;
12385 xmlParserInputPtr stream;
12386
12387 if (ioread == NULL)
12388 return (NULL);
12389 if (ctxt == NULL)
12390 return (NULL);
12391
12392 xmlCtxtReset(ctxt);
12393
12394 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12395 XML_CHAR_ENCODING_NONE);
12396 if (input == NULL)
12397 return (NULL);
12398 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12399 if (stream == NULL) {
12400 xmlFreeParserInputBuffer(input);
12401 return (NULL);
12402 }
12403 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012404 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012405}