blob: 77a12f5003f400e5eabc44bfcd666ce84e21568a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
Daniel Veillarda2351322004-06-27 12:08:10 +00001042
1043#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001044/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001045 * nameNsPush:
1046 * @ctxt: an XML parser context
1047 * @value: the element name
1048 * @prefix: the element prefix
1049 * @URI: the element namespace name
1050 *
1051 * Pushes a new element name/prefix/URL on top of the name stack
1052 *
1053 * Returns -1 in case of error, the index in the stack otherwise
1054 */
1055static int
1056nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1057 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1058{
1059 if (ctxt->nameNr >= ctxt->nameMax) {
1060 const xmlChar * *tmp;
1061 void **tmp2;
1062 ctxt->nameMax *= 2;
1063 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1064 ctxt->nameMax *
1065 sizeof(ctxt->nameTab[0]));
1066 if (tmp == NULL) {
1067 ctxt->nameMax /= 2;
1068 goto mem_error;
1069 }
1070 ctxt->nameTab = tmp;
1071 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1072 ctxt->nameMax * 3 *
1073 sizeof(ctxt->pushTab[0]));
1074 if (tmp2 == NULL) {
1075 ctxt->nameMax /= 2;
1076 goto mem_error;
1077 }
1078 ctxt->pushTab = tmp2;
1079 }
1080 ctxt->nameTab[ctxt->nameNr] = value;
1081 ctxt->name = value;
1082 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1083 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001084 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085 return (ctxt->nameNr++);
1086mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001087 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001088 return (-1);
1089}
1090/**
1091 * nameNsPop:
1092 * @ctxt: an XML parser context
1093 *
1094 * Pops the top element/prefix/URI name from the name stack
1095 *
1096 * Returns the name just removed
1097 */
1098static const xmlChar *
1099nameNsPop(xmlParserCtxtPtr ctxt)
1100{
1101 const xmlChar *ret;
1102
1103 if (ctxt->nameNr <= 0)
1104 return (0);
1105 ctxt->nameNr--;
1106 if (ctxt->nameNr > 0)
1107 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1108 else
1109 ctxt->name = NULL;
1110 ret = ctxt->nameTab[ctxt->nameNr];
1111 ctxt->nameTab[ctxt->nameNr] = NULL;
1112 return (ret);
1113}
Daniel Veillarda2351322004-06-27 12:08:10 +00001114#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115
1116/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001117 * namePush:
1118 * @ctxt: an XML parser context
1119 * @value: the element name
1120 *
1121 * Pushes a new element name on top of the name stack
1122 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001123 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124 */
1125extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001126namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127{
1128 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001129 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001130 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001132 ctxt->nameMax *
1133 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (tmp == NULL) {
1135 ctxt->nameMax /= 2;
1136 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001137 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001138 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001139 }
1140 ctxt->nameTab[ctxt->nameNr] = value;
1141 ctxt->name = value;
1142 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001146}
1147/**
1148 * namePop:
1149 * @ctxt: an XML parser context
1150 *
1151 * Pops the top element name from the name stack
1152 *
1153 * Returns the name just removed
1154 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156namePop(xmlParserCtxtPtr ctxt)
1157{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001158 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001159
1160 if (ctxt->nameNr <= 0)
1161 return (0);
1162 ctxt->nameNr--;
1163 if (ctxt->nameNr > 0)
1164 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1165 else
1166 ctxt->name = NULL;
1167 ret = ctxt->nameTab[ctxt->nameNr];
1168 ctxt->nameTab[ctxt->nameNr] = 0;
1169 return (ret);
1170}
Owen Taylor3473f882001-02-23 17:55:21 +00001171
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001172static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001173 if (ctxt->spaceNr >= ctxt->spaceMax) {
1174 ctxt->spaceMax *= 2;
1175 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1176 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1177 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001178 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001179 return(0);
1180 }
1181 }
1182 ctxt->spaceTab[ctxt->spaceNr] = val;
1183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1184 return(ctxt->spaceNr++);
1185}
1186
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001187static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001188 int ret;
1189 if (ctxt->spaceNr <= 0) return(0);
1190 ctxt->spaceNr--;
1191 if (ctxt->spaceNr > 0)
1192 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1193 else
1194 ctxt->space = NULL;
1195 ret = ctxt->spaceTab[ctxt->spaceNr];
1196 ctxt->spaceTab[ctxt->spaceNr] = -1;
1197 return(ret);
1198}
1199
1200/*
1201 * Macros for accessing the content. Those should be used only by the parser,
1202 * and not exported.
1203 *
1204 * Dirty macros, i.e. one often need to make assumption on the context to
1205 * use them
1206 *
1207 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1208 * To be used with extreme caution since operations consuming
1209 * characters may move the input buffer to a different location !
1210 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1211 * This should be used internally by the parser
1212 * only to compare to ASCII values otherwise it would break when
1213 * running with UTF-8 encoding.
1214 * RAW same as CUR but in the input buffer, bypass any token
1215 * extraction that may have been done
1216 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1217 * to compare on ASCII based substring.
1218 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001219 * strings without newlines within the parser.
1220 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1221 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001222 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1223 *
1224 * NEXT Skip to the next character, this does the proper decoding
1225 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001226 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001227 * CUR_CHAR(l) returns the current unicode character (int), set l
1228 * to the number of xmlChars used for the encoding [0-5].
1229 * CUR_SCHAR same but operate on a string instead of the context
1230 * COPY_BUF copy the current unicode char to the target buffer, increment
1231 * the index
1232 * GROW, SHRINK handling of input buffers
1233 */
1234
Daniel Veillardfdc91562002-07-01 21:52:03 +00001235#define RAW (*ctxt->input->cur)
1236#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001237#define NXT(val) ctxt->input->cur[(val)]
1238#define CUR_PTR ctxt->input->cur
1239
Daniel Veillarda07050d2003-10-19 14:46:32 +00001240#define CMP4( s, c1, c2, c3, c4 ) \
1241 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1242 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1243#define CMP5( s, c1, c2, c3, c4, c5 ) \
1244 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1245#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1246 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1247#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1248 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1249#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1250 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1251#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1252 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1253 ((unsigned char *) s)[ 8 ] == c9 )
1254#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1255 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1256 ((unsigned char *) s)[ 9 ] == c10 )
1257
Owen Taylor3473f882001-02-23 17:55:21 +00001258#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001259 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001260 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001261 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001262 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1263 xmlPopInput(ctxt); \
1264 } while (0)
1265
Daniel Veillard0b787f32004-03-26 17:29:53 +00001266#define SKIPL(val) do { \
1267 int skipl; \
1268 for(skipl=0; skipl<val; skipl++) { \
1269 if (*(ctxt->input->cur) == '\n') { \
1270 ctxt->input->line++; ctxt->input->col = 1; \
1271 } else ctxt->input->col++; \
1272 ctxt->nbChars++; \
1273 ctxt->input->cur++; \
1274 } \
1275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1276 if ((*ctxt->input->cur == 0) && \
1277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1278 xmlPopInput(ctxt); \
1279 } while (0)
1280
Daniel Veillarda880b122003-04-21 21:36:41 +00001281#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001282 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1283 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001284 xmlSHRINK (ctxt);
1285
1286static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1287 xmlParserInputShrink(ctxt->input);
1288 if ((*ctxt->input->cur == 0) &&
1289 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1290 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001291 }
Owen Taylor3473f882001-02-23 17:55:21 +00001292
Daniel Veillarda880b122003-04-21 21:36:41 +00001293#define GROW if ((ctxt->progressive == 0) && \
1294 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001295 xmlGROW (ctxt);
1296
1297static void xmlGROW (xmlParserCtxtPtr ctxt) {
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if ((*ctxt->input->cur == 0) &&
1300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1301 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001302}
Owen Taylor3473f882001-02-23 17:55:21 +00001303
1304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1305
1306#define NEXT xmlNextChar(ctxt)
1307
Daniel Veillard21a0f912001-02-25 19:54:14 +00001308#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001309 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001310 ctxt->input->cur++; \
1311 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001312 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001313 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1314 }
1315
Owen Taylor3473f882001-02-23 17:55:21 +00001316#define NEXTL(l) do { \
1317 if (*(ctxt->input->cur) == '\n') { \
1318 ctxt->input->line++; ctxt->input->col = 1; \
1319 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001320 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001321 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001322 } while (0)
1323
1324#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1325#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1326
1327#define COPY_BUF(l,b,i,v) \
1328 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001329 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001330
1331/**
1332 * xmlSkipBlankChars:
1333 * @ctxt: the XML parser context
1334 *
1335 * skip all blanks character found at that point in the input streams.
1336 * It pops up finished entities in the process if allowable at that point.
1337 *
1338 * Returns the number of space chars skipped
1339 */
1340
1341int
1342xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001343 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001344
1345 /*
1346 * It's Okay to use CUR/NEXT here since all the blanks are on
1347 * the ASCII range.
1348 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001349 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1350 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001351 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001352 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001353 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001354 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001355 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001356 if (*cur == '\n') {
1357 ctxt->input->line++; ctxt->input->col = 1;
1358 }
1359 cur++;
1360 res++;
1361 if (*cur == 0) {
1362 ctxt->input->cur = cur;
1363 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1364 cur = ctxt->input->cur;
1365 }
1366 }
1367 ctxt->input->cur = cur;
1368 } else {
1369 int cur;
1370 do {
1371 cur = CUR;
1372 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1373 NEXT;
1374 cur = CUR;
1375 res++;
1376 }
1377 while ((cur == 0) && (ctxt->inputNr > 1) &&
1378 (ctxt->instate != XML_PARSER_COMMENT)) {
1379 xmlPopInput(ctxt);
1380 cur = CUR;
1381 }
1382 /*
1383 * Need to handle support of entities branching here
1384 */
1385 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1386 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1387 }
Owen Taylor3473f882001-02-23 17:55:21 +00001388 return(res);
1389}
1390
1391/************************************************************************
1392 * *
1393 * Commodity functions to handle entities *
1394 * *
1395 ************************************************************************/
1396
1397/**
1398 * xmlPopInput:
1399 * @ctxt: an XML parser context
1400 *
1401 * xmlPopInput: the current input pointed by ctxt->input came to an end
1402 * pop it and return the next char.
1403 *
1404 * Returns the current xmlChar in the parser context
1405 */
1406xmlChar
1407xmlPopInput(xmlParserCtxtPtr ctxt) {
1408 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1409 if (xmlParserDebugEntities)
1410 xmlGenericError(xmlGenericErrorContext,
1411 "Popping input %d\n", ctxt->inputNr);
1412 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001413 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001414 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1415 return(xmlPopInput(ctxt));
1416 return(CUR);
1417}
1418
1419/**
1420 * xmlPushInput:
1421 * @ctxt: an XML parser context
1422 * @input: an XML parser input fragment (entity, XML fragment ...).
1423 *
1424 * xmlPushInput: switch to a new input stream which is stacked on top
1425 * of the previous one(s).
1426 */
1427void
1428xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1429 if (input == NULL) return;
1430
1431 if (xmlParserDebugEntities) {
1432 if ((ctxt->input != NULL) && (ctxt->input->filename))
1433 xmlGenericError(xmlGenericErrorContext,
1434 "%s(%d): ", ctxt->input->filename,
1435 ctxt->input->line);
1436 xmlGenericError(xmlGenericErrorContext,
1437 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1438 }
1439 inputPush(ctxt, input);
1440 GROW;
1441}
1442
1443/**
1444 * xmlParseCharRef:
1445 * @ctxt: an XML parser context
1446 *
1447 * parse Reference declarations
1448 *
1449 * [66] CharRef ::= '&#' [0-9]+ ';' |
1450 * '&#x' [0-9a-fA-F]+ ';'
1451 *
1452 * [ WFC: Legal Character ]
1453 * Characters referred to using character references must match the
1454 * production for Char.
1455 *
1456 * Returns the value parsed (as an int), 0 in case of error
1457 */
1458int
1459xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001460 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001461 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001462 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001463
Owen Taylor3473f882001-02-23 17:55:21 +00001464 /*
1465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1466 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001467 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001468 (NXT(2) == 'x')) {
1469 SKIP(3);
1470 GROW;
1471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001472 if (count++ > 20) {
1473 count = 0;
1474 GROW;
1475 }
1476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001477 val = val * 16 + (CUR - '0');
1478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1479 val = val * 16 + (CUR - 'a') + 10;
1480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1481 val = val * 16 + (CUR - 'A') + 10;
1482 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001483 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001484 val = 0;
1485 break;
1486 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001487 if (val > 0x10FFFF)
1488 outofrange = val;
1489
Owen Taylor3473f882001-02-23 17:55:21 +00001490 NEXT;
1491 count++;
1492 }
1493 if (RAW == ';') {
1494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001495 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001496 ctxt->nbChars ++;
1497 ctxt->input->cur++;
1498 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001499 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001500 SKIP(2);
1501 GROW;
1502 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001503 if (count++ > 20) {
1504 count = 0;
1505 GROW;
1506 }
1507 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001508 val = val * 10 + (CUR - '0');
1509 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001510 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001511 val = 0;
1512 break;
1513 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001514 if (val > 0x10FFFF)
1515 outofrange = val;
1516
Owen Taylor3473f882001-02-23 17:55:21 +00001517 NEXT;
1518 count++;
1519 }
1520 if (RAW == ';') {
1521 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001522 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001523 ctxt->nbChars ++;
1524 ctxt->input->cur++;
1525 }
1526 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001527 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 }
1529
1530 /*
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001535 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001536 return(val);
1537 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001538 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1539 "xmlParseCharRef: invalid xmlChar value %d\n",
1540 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001541 }
1542 return(0);
1543}
1544
1545/**
1546 * xmlParseStringCharRef:
1547 * @ctxt: an XML parser context
1548 * @str: a pointer to an index in the string
1549 *
1550 * parse Reference declarations, variant parsing from a string rather
1551 * than an an input flow.
1552 *
1553 * [66] CharRef ::= '&#' [0-9]+ ';' |
1554 * '&#x' [0-9a-fA-F]+ ';'
1555 *
1556 * [ WFC: Legal Character ]
1557 * Characters referred to using character references must match the
1558 * production for Char.
1559 *
1560 * Returns the value parsed (as an int), 0 in case of error, str will be
1561 * updated to the current value of the index
1562 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001563static int
Owen Taylor3473f882001-02-23 17:55:21 +00001564xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1565 const xmlChar *ptr;
1566 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001567 unsigned int val = 0;
1568 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 if ((str == NULL) || (*str == NULL)) return(0);
1571 ptr = *str;
1572 cur = *ptr;
1573 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1574 ptr += 3;
1575 cur = *ptr;
1576 while (cur != ';') { /* Non input consuming loop */
1577 if ((cur >= '0') && (cur <= '9'))
1578 val = val * 16 + (cur - '0');
1579 else if ((cur >= 'a') && (cur <= 'f'))
1580 val = val * 16 + (cur - 'a') + 10;
1581 else if ((cur >= 'A') && (cur <= 'F'))
1582 val = val * 16 + (cur - 'A') + 10;
1583 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 val = 0;
1586 break;
1587 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001588 if (val > 0x10FFFF)
1589 outofrange = val;
1590
Owen Taylor3473f882001-02-23 17:55:21 +00001591 ptr++;
1592 cur = *ptr;
1593 }
1594 if (cur == ';')
1595 ptr++;
1596 } else if ((cur == '&') && (ptr[1] == '#')){
1597 ptr += 2;
1598 cur = *ptr;
1599 while (cur != ';') { /* Non input consuming loops */
1600 if ((cur >= '0') && (cur <= '9'))
1601 val = val * 10 + (cur - '0');
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001617 return(0);
1618 }
1619 *str = ptr;
1620
1621 /*
1622 * [ WFC: Legal Character ]
1623 * Characters referred to using character references must match the
1624 * production for Char.
1625 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001627 return(val);
1628 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001629 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1630 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1631 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001632 }
1633 return(0);
1634}
1635
1636/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001637 * xmlNewBlanksWrapperInputStream:
1638 * @ctxt: an XML parser context
1639 * @entity: an Entity pointer
1640 *
1641 * Create a new input stream for wrapping
1642 * blanks around a PEReference
1643 *
1644 * Returns the new input stream or NULL
1645 */
1646
1647static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1648
Daniel Veillardf4862f02002-09-10 11:13:43 +00001649static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001650xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1651 xmlParserInputPtr input;
1652 xmlChar *buffer;
1653 size_t length;
1654 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1656 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001657 return(NULL);
1658 }
1659 if (xmlParserDebugEntities)
1660 xmlGenericError(xmlGenericErrorContext,
1661 "new blanks wrapper for entity: %s\n", entity->name);
1662 input = xmlNewInputStream(ctxt);
1663 if (input == NULL) {
1664 return(NULL);
1665 }
1666 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001667 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001668 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001669 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001670 return(NULL);
1671 }
1672 buffer [0] = ' ';
1673 buffer [1] = '%';
1674 buffer [length-3] = ';';
1675 buffer [length-2] = ' ';
1676 buffer [length-1] = 0;
1677 memcpy(buffer + 2, entity->name, length - 5);
1678 input->free = deallocblankswrapper;
1679 input->base = buffer;
1680 input->cur = buffer;
1681 input->length = length;
1682 input->end = &buffer[length];
1683 return(input);
1684}
1685
1686/**
Owen Taylor3473f882001-02-23 17:55:21 +00001687 * xmlParserHandlePEReference:
1688 * @ctxt: the parser context
1689 *
1690 * [69] PEReference ::= '%' Name ';'
1691 *
1692 * [ WFC: No Recursion ]
1693 * A parsed entity must not contain a recursive
1694 * reference to itself, either directly or indirectly.
1695 *
1696 * [ WFC: Entity Declared ]
1697 * In a document without any DTD, a document with only an internal DTD
1698 * subset which contains no parameter entity references, or a document
1699 * with "standalone='yes'", ... ... The declaration of a parameter
1700 * entity must precede any reference to it...
1701 *
1702 * [ VC: Entity Declared ]
1703 * In a document with an external subset or external parameter entities
1704 * with "standalone='no'", ... ... The declaration of a parameter entity
1705 * must precede any reference to it...
1706 *
1707 * [ WFC: In DTD ]
1708 * Parameter-entity references may only appear in the DTD.
1709 * NOTE: misleading but this is handled.
1710 *
1711 * A PEReference may have been detected in the current input stream
1712 * the handling is done accordingly to
1713 * http://www.w3.org/TR/REC-xml#entproc
1714 * i.e.
1715 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001716 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001717 */
1718void
1719xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001720 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 xmlEntityPtr entity = NULL;
1722 xmlParserInputPtr input;
1723
Owen Taylor3473f882001-02-23 17:55:21 +00001724 if (RAW != '%') return;
1725 switch(ctxt->instate) {
1726 case XML_PARSER_CDATA_SECTION:
1727 return;
1728 case XML_PARSER_COMMENT:
1729 return;
1730 case XML_PARSER_START_TAG:
1731 return;
1732 case XML_PARSER_END_TAG:
1733 return;
1734 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 return;
1737 case XML_PARSER_PROLOG:
1738 case XML_PARSER_START:
1739 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 return;
1742 case XML_PARSER_ENTITY_DECL:
1743 case XML_PARSER_CONTENT:
1744 case XML_PARSER_ATTRIBUTE_VALUE:
1745 case XML_PARSER_PI:
1746 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001747 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001748 /* we just ignore it there */
1749 return;
1750 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001751 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 return;
1753 case XML_PARSER_ENTITY_VALUE:
1754 /*
1755 * NOTE: in the case of entity values, we don't do the
1756 * substitution here since we need the literal
1757 * entity value to be able to save the internal
1758 * subset of the document.
1759 * This will be handled by xmlStringDecodeEntities
1760 */
1761 return;
1762 case XML_PARSER_DTD:
1763 /*
1764 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1765 * In the internal DTD subset, parameter-entity references
1766 * can occur only where markup declarations can occur, not
1767 * within markup declarations.
1768 * In that case this is handled in xmlParseMarkupDecl
1769 */
1770 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1771 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001772 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001773 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001774 break;
1775 case XML_PARSER_IGNORE:
1776 return;
1777 }
1778
1779 NEXT;
1780 name = xmlParseName(ctxt);
1781 if (xmlParserDebugEntities)
1782 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001783 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001784 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001785 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001786 } else {
1787 if (RAW == ';') {
1788 NEXT;
1789 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1790 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1791 if (entity == NULL) {
1792
1793 /*
1794 * [ WFC: Entity Declared ]
1795 * In a document without any DTD, a document with only an
1796 * internal DTD subset which contains no parameter entity
1797 * references, or a document with "standalone='yes'", ...
1798 * ... The declaration of a parameter entity must precede
1799 * any reference to it...
1800 */
1801 if ((ctxt->standalone == 1) ||
1802 ((ctxt->hasExternalSubset == 0) &&
1803 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001804 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001805 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001806 } else {
1807 /*
1808 * [ VC: Entity Declared ]
1809 * In a document with an external subset or external
1810 * parameter entities with "standalone='no'", ...
1811 * ... The declaration of a parameter entity must precede
1812 * any reference to it...
1813 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001814 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1815 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1816 "PEReference: %%%s; not found\n",
1817 name);
1818 } else
1819 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1820 "PEReference: %%%s; not found\n",
1821 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001822 ctxt->valid = 0;
1823 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001824 } else if (ctxt->input->free != deallocblankswrapper) {
1825 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1826 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001827 } else {
1828 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1829 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001830 xmlChar start[4];
1831 xmlCharEncoding enc;
1832
Owen Taylor3473f882001-02-23 17:55:21 +00001833 /*
1834 * handle the extra spaces added before and after
1835 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001836 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001837 */
1838 input = xmlNewEntityInputStream(ctxt, entity);
1839 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001840
1841 /*
1842 * Get the 4 first bytes and decode the charset
1843 * if enc != XML_CHAR_ENCODING_NONE
1844 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001845 * Note that, since we may have some non-UTF8
1846 * encoding (like UTF16, bug 135229), the 'length'
1847 * is not known, but we can calculate based upon
1848 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 */
1850 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001851 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001852 start[0] = RAW;
1853 start[1] = NXT(1);
1854 start[2] = NXT(2);
1855 start[3] = NXT(3);
1856 enc = xmlDetectCharEncoding(start, 4);
1857 if (enc != XML_CHAR_ENCODING_NONE) {
1858 xmlSwitchEncoding(ctxt, enc);
1859 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860 }
1861
Owen Taylor3473f882001-02-23 17:55:21 +00001862 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001863 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1864 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001865 xmlParseTextDecl(ctxt);
1866 }
Owen Taylor3473f882001-02-23 17:55:21 +00001867 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001868 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1869 "PEReference: %s is not a parameter entity\n",
1870 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001871 }
1872 }
1873 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001874 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001875 }
Owen Taylor3473f882001-02-23 17:55:21 +00001876 }
1877}
1878
1879/*
1880 * Macro used to grow the current buffer.
1881 */
1882#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001883 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001884 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001885 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001886 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001887 if (tmp == NULL) goto mem_error; \
1888 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001889}
1890
1891/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001892 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001893 * @ctxt: the parser context
1894 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001895 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001896 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1897 * @end: an end marker xmlChar, 0 if none
1898 * @end2: an end marker xmlChar, 0 if none
1899 * @end3: an end marker xmlChar, 0 if none
1900 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 *
1903 * [67] Reference ::= EntityRef | CharRef
1904 *
1905 * [69] PEReference ::= '%' Name ';'
1906 *
1907 * Returns A newly allocated string with the substitution done. The caller
1908 * must deallocate it !
1909 */
1910xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1912 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 xmlChar *buffer = NULL;
1914 int buffer_size = 0;
1915
1916 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001918 xmlEntityPtr ent;
1919 int c,l;
1920 int nbchars = 0;
1921
Daniel Veillarde57ec792003-09-10 10:50:59 +00001922 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001923 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001925
1926 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001927 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001928 return(NULL);
1929 }
1930
1931 /*
1932 * allocate a translation buffer.
1933 */
1934 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001935 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001936 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001937
1938 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001939 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001940 * we are operating on already parsed values.
1941 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001942 if (str < last)
1943 c = CUR_SCHAR(str, l);
1944 else
1945 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 while ((c != 0) && (c != end) && /* non input consuming loop */
1947 (c != end2) && (c != end3)) {
1948
1949 if (c == 0) break;
1950 if ((c == '&') && (str[1] == '#')) {
1951 int val = xmlParseStringCharRef(ctxt, &str);
1952 if (val != 0) {
1953 COPY_BUF(0,buffer,nbchars,val);
1954 }
1955 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1956 if (xmlParserDebugEntities)
1957 xmlGenericError(xmlGenericErrorContext,
1958 "String decoding Entity Reference: %.30s\n",
1959 str);
1960 ent = xmlParseStringEntityRef(ctxt, &str);
1961 if ((ent != NULL) &&
1962 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1963 if (ent->content != NULL) {
1964 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1965 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001966 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1967 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001968 }
1969 } else if ((ent != NULL) && (ent->content != NULL)) {
1970 xmlChar *rep;
1971
1972 ctxt->depth++;
1973 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1974 0, 0, 0);
1975 ctxt->depth--;
1976 if (rep != NULL) {
1977 current = rep;
1978 while (*current != 0) { /* non input consuming loop */
1979 buffer[nbchars++] = *current++;
1980 if (nbchars >
1981 buffer_size - XML_PARSER_BUFFER_SIZE) {
1982 growBuffer(buffer);
1983 }
1984 }
1985 xmlFree(rep);
1986 }
1987 } else if (ent != NULL) {
1988 int i = xmlStrlen(ent->name);
1989 const xmlChar *cur = ent->name;
1990
1991 buffer[nbchars++] = '&';
1992 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1993 growBuffer(buffer);
1994 }
1995 for (;i > 0;i--)
1996 buffer[nbchars++] = *cur++;
1997 buffer[nbchars++] = ';';
1998 }
1999 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2000 if (xmlParserDebugEntities)
2001 xmlGenericError(xmlGenericErrorContext,
2002 "String decoding PE Reference: %.30s\n", str);
2003 ent = xmlParseStringPEReference(ctxt, &str);
2004 if (ent != NULL) {
2005 xmlChar *rep;
2006
2007 ctxt->depth++;
2008 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2009 0, 0, 0);
2010 ctxt->depth--;
2011 if (rep != NULL) {
2012 current = rep;
2013 while (*current != 0) { /* non input consuming loop */
2014 buffer[nbchars++] = *current++;
2015 if (nbchars >
2016 buffer_size - XML_PARSER_BUFFER_SIZE) {
2017 growBuffer(buffer);
2018 }
2019 }
2020 xmlFree(rep);
2021 }
2022 }
2023 } else {
2024 COPY_BUF(l,buffer,nbchars,c);
2025 str += l;
2026 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2027 growBuffer(buffer);
2028 }
2029 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002030 if (str < last)
2031 c = CUR_SCHAR(str, l);
2032 else
2033 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002034 }
2035 buffer[nbchars++] = 0;
2036 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002037
2038mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002040 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002041}
2042
Daniel Veillarde57ec792003-09-10 10:50:59 +00002043/**
2044 * xmlStringDecodeEntities:
2045 * @ctxt: the parser context
2046 * @str: the input string
2047 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2048 * @end: an end marker xmlChar, 0 if none
2049 * @end2: an end marker xmlChar, 0 if none
2050 * @end3: an end marker xmlChar, 0 if none
2051 *
2052 * Takes a entity string content and process to do the adequate substitutions.
2053 *
2054 * [67] Reference ::= EntityRef | CharRef
2055 *
2056 * [69] PEReference ::= '%' Name ';'
2057 *
2058 * Returns A newly allocated string with the substitution done. The caller
2059 * must deallocate it !
2060 */
2061xmlChar *
2062xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2063 xmlChar end, xmlChar end2, xmlChar end3) {
2064 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2065 end, end2, end3));
2066}
Owen Taylor3473f882001-02-23 17:55:21 +00002067
2068/************************************************************************
2069 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002070 * Commodity functions, cleanup needed ? *
2071 * *
2072 ************************************************************************/
2073
2074/**
2075 * areBlanks:
2076 * @ctxt: an XML parser context
2077 * @str: a xmlChar *
2078 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002079 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002080 *
2081 * Is this a sequence of blank chars that one can ignore ?
2082 *
2083 * Returns 1 if ignorable 0 otherwise.
2084 */
2085
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002086static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2087 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002088 int i, ret;
2089 xmlNodePtr lastChild;
2090
Daniel Veillard05c13a22001-09-09 08:38:09 +00002091 /*
2092 * Don't spend time trying to differentiate them, the same callback is
2093 * used !
2094 */
2095 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002096 return(0);
2097
Owen Taylor3473f882001-02-23 17:55:21 +00002098 /*
2099 * Check for xml:space value.
2100 */
2101 if (*(ctxt->space) == 1)
2102 return(0);
2103
2104 /*
2105 * Check that the string is made of blanks
2106 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002107 if (blank_chars == 0) {
2108 for (i = 0;i < len;i++)
2109 if (!(IS_BLANK_CH(str[i]))) return(0);
2110 }
Owen Taylor3473f882001-02-23 17:55:21 +00002111
2112 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002113 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002114 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002115 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002116 if (ctxt->myDoc != NULL) {
2117 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2118 if (ret == 0) return(1);
2119 if (ret == 1) return(0);
2120 }
2121
2122 /*
2123 * Otherwise, heuristic :-\
2124 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002125 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002126 if ((ctxt->node->children == NULL) &&
2127 (RAW == '<') && (NXT(1) == '/')) return(0);
2128
2129 lastChild = xmlGetLastChild(ctxt->node);
2130 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002131 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2132 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002133 } else if (xmlNodeIsText(lastChild))
2134 return(0);
2135 else if ((ctxt->node->children != NULL) &&
2136 (xmlNodeIsText(ctxt->node->children)))
2137 return(0);
2138 return(1);
2139}
2140
Owen Taylor3473f882001-02-23 17:55:21 +00002141/************************************************************************
2142 * *
2143 * Extra stuff for namespace support *
2144 * Relates to http://www.w3.org/TR/WD-xml-names *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlSplitQName:
2150 * @ctxt: an XML parser context
2151 * @name: an XML parser context
2152 * @prefix: a xmlChar **
2153 *
2154 * parse an UTF8 encoded XML qualified name string
2155 *
2156 * [NS 5] QName ::= (Prefix ':')? LocalPart
2157 *
2158 * [NS 6] Prefix ::= NCName
2159 *
2160 * [NS 7] LocalPart ::= NCName
2161 *
2162 * Returns the local part, and prefix is updated
2163 * to get the Prefix if any.
2164 */
2165
2166xmlChar *
2167xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2168 xmlChar buf[XML_MAX_NAMELEN + 5];
2169 xmlChar *buffer = NULL;
2170 int len = 0;
2171 int max = XML_MAX_NAMELEN;
2172 xmlChar *ret = NULL;
2173 const xmlChar *cur = name;
2174 int c;
2175
2176 *prefix = NULL;
2177
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002178 if (cur == NULL) return(NULL);
2179
Owen Taylor3473f882001-02-23 17:55:21 +00002180#ifndef XML_XML_NAMESPACE
2181 /* xml: prefix is not really a namespace */
2182 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2183 (cur[2] == 'l') && (cur[3] == ':'))
2184 return(xmlStrdup(name));
2185#endif
2186
Daniel Veillard597bc482003-07-24 16:08:28 +00002187 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002188 if (cur[0] == ':')
2189 return(xmlStrdup(name));
2190
2191 c = *cur++;
2192 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2193 buf[len++] = c;
2194 c = *cur++;
2195 }
2196 if (len >= max) {
2197 /*
2198 * Okay someone managed to make a huge name, so he's ready to pay
2199 * for the processing speed.
2200 */
2201 max = len * 2;
2202
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002203 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002204 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002205 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002206 return(NULL);
2207 }
2208 memcpy(buffer, buf, len);
2209 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2210 if (len + 10 > max) {
2211 max *= 2;
2212 buffer = (xmlChar *) xmlRealloc(buffer,
2213 max * sizeof(xmlChar));
2214 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002215 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002216 return(NULL);
2217 }
2218 }
2219 buffer[len++] = c;
2220 c = *cur++;
2221 }
2222 buffer[len] = 0;
2223 }
2224
Daniel Veillard597bc482003-07-24 16:08:28 +00002225 /* nasty but well=formed
2226 if ((c == ':') && (*cur == 0)) {
2227 return(xmlStrdup(name));
2228 } */
2229
Owen Taylor3473f882001-02-23 17:55:21 +00002230 if (buffer == NULL)
2231 ret = xmlStrndup(buf, len);
2232 else {
2233 ret = buffer;
2234 buffer = NULL;
2235 max = XML_MAX_NAMELEN;
2236 }
2237
2238
2239 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002240 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002241 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002242 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002243 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002244 }
Owen Taylor3473f882001-02-23 17:55:21 +00002245 len = 0;
2246
Daniel Veillardbb284f42002-10-16 18:02:47 +00002247 /*
2248 * Check that the first character is proper to start
2249 * a new name
2250 */
2251 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2252 ((c >= 0x41) && (c <= 0x5A)) ||
2253 (c == '_') || (c == ':'))) {
2254 int l;
2255 int first = CUR_SCHAR(cur, l);
2256
2257 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002258 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002259 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002260 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002261 }
2262 }
2263 cur++;
2264
Owen Taylor3473f882001-02-23 17:55:21 +00002265 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2266 buf[len++] = c;
2267 c = *cur++;
2268 }
2269 if (len >= max) {
2270 /*
2271 * Okay someone managed to make a huge name, so he's ready to pay
2272 * for the processing speed.
2273 */
2274 max = len * 2;
2275
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002276 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002277 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002278 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002279 return(NULL);
2280 }
2281 memcpy(buffer, buf, len);
2282 while (c != 0) { /* tested bigname2.xml */
2283 if (len + 10 > max) {
2284 max *= 2;
2285 buffer = (xmlChar *) xmlRealloc(buffer,
2286 max * sizeof(xmlChar));
2287 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002288 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002289 return(NULL);
2290 }
2291 }
2292 buffer[len++] = c;
2293 c = *cur++;
2294 }
2295 buffer[len] = 0;
2296 }
2297
2298 if (buffer == NULL)
2299 ret = xmlStrndup(buf, len);
2300 else {
2301 ret = buffer;
2302 }
2303 }
2304
2305 return(ret);
2306}
2307
2308/************************************************************************
2309 * *
2310 * The parser itself *
2311 * Relates to http://www.w3.org/TR/REC-xml *
2312 * *
2313 ************************************************************************/
2314
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002315static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002316static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002317 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002318
Owen Taylor3473f882001-02-23 17:55:21 +00002319/**
2320 * xmlParseName:
2321 * @ctxt: an XML parser context
2322 *
2323 * parse an XML name.
2324 *
2325 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2326 * CombiningChar | Extender
2327 *
2328 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2329 *
2330 * [6] Names ::= Name (S Name)*
2331 *
2332 * Returns the Name parsed or NULL
2333 */
2334
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002335const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002336xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002337 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002338 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 int count = 0;
2340
2341 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002342
2343 /*
2344 * Accelerator for simple ASCII names
2345 */
2346 in = ctxt->input->cur;
2347 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2348 ((*in >= 0x41) && (*in <= 0x5A)) ||
2349 (*in == '_') || (*in == ':')) {
2350 in++;
2351 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2352 ((*in >= 0x41) && (*in <= 0x5A)) ||
2353 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002354 (*in == '_') || (*in == '-') ||
2355 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002356 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002357 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002358 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002359 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002360 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002361 ctxt->nbChars += count;
2362 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 if (ret == NULL)
2364 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002365 return(ret);
2366 }
2367 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002368 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002369}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002370
Daniel Veillard46de64e2002-05-29 08:21:33 +00002371/**
2372 * xmlParseNameAndCompare:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse an XML name and compares for match
2376 * (specialized for endtag parsing)
2377 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002378 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2379 * and the name for mismatch
2380 */
2381
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002382static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002383xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002384 register const xmlChar *cmp = other;
2385 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002386 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002387
2388 GROW;
2389
2390 in = ctxt->input->cur;
2391 while (*in != 0 && *in == *cmp) {
2392 ++in;
2393 ++cmp;
2394 }
William M. Brack76e95df2003-10-18 16:20:14 +00002395 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002396 /* success */
2397 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002398 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002399 }
2400 /* failure (or end of input buffer), check with full function */
2401 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002402 /* strings coming from the dictionnary direct compare possible */
2403 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002404 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002405 }
2406 return ret;
2407}
2408
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002409static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002410xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002411 int len = 0, l;
2412 int c;
2413 int count = 0;
2414
2415 /*
2416 * Handler for more complex cases
2417 */
2418 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002419 c = CUR_CHAR(l);
2420 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2421 (!IS_LETTER(c) && (c != '_') &&
2422 (c != ':'))) {
2423 return(NULL);
2424 }
2425
2426 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002427 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002428 (c == '.') || (c == '-') ||
2429 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002430 (IS_COMBINING(c)) ||
2431 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002432 if (count++ > 100) {
2433 count = 0;
2434 GROW;
2435 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002436 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002437 NEXTL(l);
2438 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002440 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002441}
2442
2443/**
2444 * xmlParseStringName:
2445 * @ctxt: an XML parser context
2446 * @str: a pointer to the string pointer (IN/OUT)
2447 *
2448 * parse an XML name.
2449 *
2450 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2451 * CombiningChar | Extender
2452 *
2453 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2454 *
2455 * [6] Names ::= Name (S Name)*
2456 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002457 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002458 * is updated to the current location in the string.
2459 */
2460
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002461static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002462xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2463 xmlChar buf[XML_MAX_NAMELEN + 5];
2464 const xmlChar *cur = *str;
2465 int len = 0, l;
2466 int c;
2467
2468 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002469 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002470 (c != ':')) {
2471 return(NULL);
2472 }
2473
William M. Brack871611b2003-10-18 04:53:14 +00002474 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002475 (c == '.') || (c == '-') ||
2476 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002477 (IS_COMBINING(c)) ||
2478 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002479 COPY_BUF(l,buf,len,c);
2480 cur += l;
2481 c = CUR_SCHAR(cur, l);
2482 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2483 /*
2484 * Okay someone managed to make a huge name, so he's ready to pay
2485 * for the processing speed.
2486 */
2487 xmlChar *buffer;
2488 int max = len * 2;
2489
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002490 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002491 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002492 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002493 return(NULL);
2494 }
2495 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002496 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002497 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002498 (c == '.') || (c == '-') ||
2499 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002500 (IS_COMBINING(c)) ||
2501 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (len + 10 > max) {
2503 max *= 2;
2504 buffer = (xmlChar *) xmlRealloc(buffer,
2505 max * sizeof(xmlChar));
2506 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002507 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002508 return(NULL);
2509 }
2510 }
2511 COPY_BUF(l,buffer,len,c);
2512 cur += l;
2513 c = CUR_SCHAR(cur, l);
2514 }
2515 buffer[len] = 0;
2516 *str = cur;
2517 return(buffer);
2518 }
2519 }
2520 *str = cur;
2521 return(xmlStrndup(buf, len));
2522}
2523
2524/**
2525 * xmlParseNmtoken:
2526 * @ctxt: an XML parser context
2527 *
2528 * parse an XML Nmtoken.
2529 *
2530 * [7] Nmtoken ::= (NameChar)+
2531 *
2532 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2533 *
2534 * Returns the Nmtoken parsed or NULL
2535 */
2536
2537xmlChar *
2538xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2539 xmlChar buf[XML_MAX_NAMELEN + 5];
2540 int len = 0, l;
2541 int c;
2542 int count = 0;
2543
2544 GROW;
2545 c = CUR_CHAR(l);
2546
William M. Brack871611b2003-10-18 04:53:14 +00002547 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002548 (c == '.') || (c == '-') ||
2549 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002550 (IS_COMBINING(c)) ||
2551 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002552 if (count++ > 100) {
2553 count = 0;
2554 GROW;
2555 }
2556 COPY_BUF(l,buf,len,c);
2557 NEXTL(l);
2558 c = CUR_CHAR(l);
2559 if (len >= XML_MAX_NAMELEN) {
2560 /*
2561 * Okay someone managed to make a huge token, so he's ready to pay
2562 * for the processing speed.
2563 */
2564 xmlChar *buffer;
2565 int max = len * 2;
2566
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002567 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002568 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002569 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 return(NULL);
2571 }
2572 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002573 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002574 (c == '.') || (c == '-') ||
2575 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002576 (IS_COMBINING(c)) ||
2577 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002578 if (count++ > 100) {
2579 count = 0;
2580 GROW;
2581 }
2582 if (len + 10 > max) {
2583 max *= 2;
2584 buffer = (xmlChar *) xmlRealloc(buffer,
2585 max * sizeof(xmlChar));
2586 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002587 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 return(NULL);
2589 }
2590 }
2591 COPY_BUF(l,buffer,len,c);
2592 NEXTL(l);
2593 c = CUR_CHAR(l);
2594 }
2595 buffer[len] = 0;
2596 return(buffer);
2597 }
2598 }
2599 if (len == 0)
2600 return(NULL);
2601 return(xmlStrndup(buf, len));
2602}
2603
2604/**
2605 * xmlParseEntityValue:
2606 * @ctxt: an XML parser context
2607 * @orig: if non-NULL store a copy of the original entity value
2608 *
2609 * parse a value for ENTITY declarations
2610 *
2611 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2612 * "'" ([^%&'] | PEReference | Reference)* "'"
2613 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002614 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002615 */
2616
2617xmlChar *
2618xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2619 xmlChar *buf = NULL;
2620 int len = 0;
2621 int size = XML_PARSER_BUFFER_SIZE;
2622 int c, l;
2623 xmlChar stop;
2624 xmlChar *ret = NULL;
2625 const xmlChar *cur = NULL;
2626 xmlParserInputPtr input;
2627
2628 if (RAW == '"') stop = '"';
2629 else if (RAW == '\'') stop = '\'';
2630 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002631 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002632 return(NULL);
2633 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002634 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002635 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002636 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002637 return(NULL);
2638 }
2639
2640 /*
2641 * The content of the entity definition is copied in a buffer.
2642 */
2643
2644 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2645 input = ctxt->input;
2646 GROW;
2647 NEXT;
2648 c = CUR_CHAR(l);
2649 /*
2650 * NOTE: 4.4.5 Included in Literal
2651 * When a parameter entity reference appears in a literal entity
2652 * value, ... a single or double quote character in the replacement
2653 * text is always treated as a normal data character and will not
2654 * terminate the literal.
2655 * In practice it means we stop the loop only when back at parsing
2656 * the initial entity and the quote is found
2657 */
William M. Brack871611b2003-10-18 04:53:14 +00002658 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002659 (ctxt->input != input))) {
2660 if (len + 5 >= size) {
2661 size *= 2;
2662 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2663 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002664 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002665 return(NULL);
2666 }
2667 }
2668 COPY_BUF(l,buf,len,c);
2669 NEXTL(l);
2670 /*
2671 * Pop-up of finished entities.
2672 */
2673 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2674 xmlPopInput(ctxt);
2675
2676 GROW;
2677 c = CUR_CHAR(l);
2678 if (c == 0) {
2679 GROW;
2680 c = CUR_CHAR(l);
2681 }
2682 }
2683 buf[len] = 0;
2684
2685 /*
2686 * Raise problem w.r.t. '&' and '%' being used in non-entities
2687 * reference constructs. Note Charref will be handled in
2688 * xmlStringDecodeEntities()
2689 */
2690 cur = buf;
2691 while (*cur != 0) { /* non input consuming */
2692 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2693 xmlChar *name;
2694 xmlChar tmp = *cur;
2695
2696 cur++;
2697 name = xmlParseStringName(ctxt, &cur);
2698 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002699 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002700 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002701 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002702 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002703 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2704 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002705 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 }
2707 if (name != NULL)
2708 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002709 if (*cur == 0)
2710 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002711 }
2712 cur++;
2713 }
2714
2715 /*
2716 * Then PEReference entities are substituted.
2717 */
2718 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002719 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002720 xmlFree(buf);
2721 } else {
2722 NEXT;
2723 /*
2724 * NOTE: 4.4.7 Bypassed
2725 * When a general entity reference appears in the EntityValue in
2726 * an entity declaration, it is bypassed and left as is.
2727 * so XML_SUBSTITUTE_REF is not set here.
2728 */
2729 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2730 0, 0, 0);
2731 if (orig != NULL)
2732 *orig = buf;
2733 else
2734 xmlFree(buf);
2735 }
2736
2737 return(ret);
2738}
2739
2740/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002741 * xmlParseAttValueComplex:
2742 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002743 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002744 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002745 *
2746 * parse a value for an attribute, this is the fallback function
2747 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002748 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002749 *
2750 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2751 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002752static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002753xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002754 xmlChar limit = 0;
2755 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 int len = 0;
2757 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002758 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002759 xmlChar *current = NULL;
2760 xmlEntityPtr ent;
2761
Owen Taylor3473f882001-02-23 17:55:21 +00002762 if (NXT(0) == '"') {
2763 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2764 limit = '"';
2765 NEXT;
2766 } else if (NXT(0) == '\'') {
2767 limit = '\'';
2768 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2769 NEXT;
2770 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774
2775 /*
2776 * allocate a translation buffer.
2777 */
2778 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002779 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002780 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002781
2782 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002783 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002784 */
2785 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002786 while ((NXT(0) != limit) && /* checked */
2787 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002788 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002789 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002790 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if (NXT(1) == '#') {
2792 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002793
Owen Taylor3473f882001-02-23 17:55:21 +00002794 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002795 if (ctxt->replaceEntities) {
2796 if (len > buf_size - 10) {
2797 growBuffer(buf);
2798 }
2799 buf[len++] = '&';
2800 } else {
2801 /*
2802 * The reparsing will be done in xmlStringGetNodeList()
2803 * called by the attribute() function in SAX.c
2804 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002805 if (len > buf_size - 10) {
2806 growBuffer(buf);
2807 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002808 buf[len++] = '&';
2809 buf[len++] = '#';
2810 buf[len++] = '3';
2811 buf[len++] = '8';
2812 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002813 }
2814 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002815 if (len > buf_size - 10) {
2816 growBuffer(buf);
2817 }
Owen Taylor3473f882001-02-23 17:55:21 +00002818 len += xmlCopyChar(0, &buf[len], val);
2819 }
2820 } else {
2821 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002822 if ((ent != NULL) &&
2823 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2824 if (len > buf_size - 10) {
2825 growBuffer(buf);
2826 }
2827 if ((ctxt->replaceEntities == 0) &&
2828 (ent->content[0] == '&')) {
2829 buf[len++] = '&';
2830 buf[len++] = '#';
2831 buf[len++] = '3';
2832 buf[len++] = '8';
2833 buf[len++] = ';';
2834 } else {
2835 buf[len++] = ent->content[0];
2836 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002837 } else if ((ent != NULL) &&
2838 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002839 xmlChar *rep;
2840
2841 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2842 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002843 XML_SUBSTITUTE_REF,
2844 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002845 if (rep != NULL) {
2846 current = rep;
2847 while (*current != 0) { /* non input consuming */
2848 buf[len++] = *current++;
2849 if (len > buf_size - 10) {
2850 growBuffer(buf);
2851 }
2852 }
2853 xmlFree(rep);
2854 }
2855 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002856 if (len > buf_size - 10) {
2857 growBuffer(buf);
2858 }
Owen Taylor3473f882001-02-23 17:55:21 +00002859 if (ent->content != NULL)
2860 buf[len++] = ent->content[0];
2861 }
2862 } else if (ent != NULL) {
2863 int i = xmlStrlen(ent->name);
2864 const xmlChar *cur = ent->name;
2865
2866 /*
2867 * This may look absurd but is needed to detect
2868 * entities problems
2869 */
2870 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2871 (ent->content != NULL)) {
2872 xmlChar *rep;
2873 rep = xmlStringDecodeEntities(ctxt, ent->content,
2874 XML_SUBSTITUTE_REF, 0, 0, 0);
2875 if (rep != NULL)
2876 xmlFree(rep);
2877 }
2878
2879 /*
2880 * Just output the reference
2881 */
2882 buf[len++] = '&';
2883 if (len > buf_size - i - 10) {
2884 growBuffer(buf);
2885 }
2886 for (;i > 0;i--)
2887 buf[len++] = *cur++;
2888 buf[len++] = ';';
2889 }
2890 }
2891 } else {
2892 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002893 if ((len != 0) || (!normalize)) {
2894 if ((!normalize) || (!in_space)) {
2895 COPY_BUF(l,buf,len,0x20);
2896 if (len > buf_size - 10) {
2897 growBuffer(buf);
2898 }
2899 }
2900 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002901 }
2902 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002903 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002904 COPY_BUF(l,buf,len,c);
2905 if (len > buf_size - 10) {
2906 growBuffer(buf);
2907 }
2908 }
2909 NEXTL(l);
2910 }
2911 GROW;
2912 c = CUR_CHAR(l);
2913 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002914 if ((in_space) && (normalize)) {
2915 while (buf[len - 1] == 0x20) len--;
2916 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002917 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002918 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002919 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002920 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002921 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2922 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002923 } else
2924 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002925 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002926 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002927
2928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002929 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002930 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002931}
2932
2933/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002934 * xmlParseAttValue:
2935 * @ctxt: an XML parser context
2936 *
2937 * parse a value for an attribute
2938 * Note: the parser won't do substitution of entities here, this
2939 * will be handled later in xmlStringGetNodeList
2940 *
2941 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2942 * "'" ([^<&'] | Reference)* "'"
2943 *
2944 * 3.3.3 Attribute-Value Normalization:
2945 * Before the value of an attribute is passed to the application or
2946 * checked for validity, the XML processor must normalize it as follows:
2947 * - a character reference is processed by appending the referenced
2948 * character to the attribute value
2949 * - an entity reference is processed by recursively processing the
2950 * replacement text of the entity
2951 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2952 * appending #x20 to the normalized value, except that only a single
2953 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2954 * parsed entity or the literal entity value of an internal parsed entity
2955 * - other characters are processed by appending them to the normalized value
2956 * If the declared value is not CDATA, then the XML processor must further
2957 * process the normalized attribute value by discarding any leading and
2958 * trailing space (#x20) characters, and by replacing sequences of space
2959 * (#x20) characters by a single space (#x20) character.
2960 * All attributes for which no declaration has been read should be treated
2961 * by a non-validating parser as if declared CDATA.
2962 *
2963 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2964 */
2965
2966
2967xmlChar *
2968xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002969 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002970}
2971
2972/**
Owen Taylor3473f882001-02-23 17:55:21 +00002973 * xmlParseSystemLiteral:
2974 * @ctxt: an XML parser context
2975 *
2976 * parse an XML Literal
2977 *
2978 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2979 *
2980 * Returns the SystemLiteral parsed or NULL
2981 */
2982
2983xmlChar *
2984xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2985 xmlChar *buf = NULL;
2986 int len = 0;
2987 int size = XML_PARSER_BUFFER_SIZE;
2988 int cur, l;
2989 xmlChar stop;
2990 int state = ctxt->instate;
2991 int count = 0;
2992
2993 SHRINK;
2994 if (RAW == '"') {
2995 NEXT;
2996 stop = '"';
2997 } else if (RAW == '\'') {
2998 NEXT;
2999 stop = '\'';
3000 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003001 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003002 return(NULL);
3003 }
3004
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003005 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003006 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003007 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003008 return(NULL);
3009 }
3010 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3011 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003012 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003013 if (len + 5 >= size) {
3014 size *= 2;
3015 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3016 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003017 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003018 ctxt->instate = (xmlParserInputState) state;
3019 return(NULL);
3020 }
3021 }
3022 count++;
3023 if (count > 50) {
3024 GROW;
3025 count = 0;
3026 }
3027 COPY_BUF(l,buf,len,cur);
3028 NEXTL(l);
3029 cur = CUR_CHAR(l);
3030 if (cur == 0) {
3031 GROW;
3032 SHRINK;
3033 cur = CUR_CHAR(l);
3034 }
3035 }
3036 buf[len] = 0;
3037 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003038 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003039 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003040 } else {
3041 NEXT;
3042 }
3043 return(buf);
3044}
3045
3046/**
3047 * xmlParsePubidLiteral:
3048 * @ctxt: an XML parser context
3049 *
3050 * parse an XML public literal
3051 *
3052 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3053 *
3054 * Returns the PubidLiteral parsed or NULL.
3055 */
3056
3057xmlChar *
3058xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3059 xmlChar *buf = NULL;
3060 int len = 0;
3061 int size = XML_PARSER_BUFFER_SIZE;
3062 xmlChar cur;
3063 xmlChar stop;
3064 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003065 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003066
3067 SHRINK;
3068 if (RAW == '"') {
3069 NEXT;
3070 stop = '"';
3071 } else if (RAW == '\'') {
3072 NEXT;
3073 stop = '\'';
3074 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003075 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003076 return(NULL);
3077 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003078 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003079 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003080 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003081 return(NULL);
3082 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003083 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003084 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003085 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003086 if (len + 1 >= size) {
3087 size *= 2;
3088 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3089 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003090 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003091 return(NULL);
3092 }
3093 }
3094 buf[len++] = cur;
3095 count++;
3096 if (count > 50) {
3097 GROW;
3098 count = 0;
3099 }
3100 NEXT;
3101 cur = CUR;
3102 if (cur == 0) {
3103 GROW;
3104 SHRINK;
3105 cur = CUR;
3106 }
3107 }
3108 buf[len] = 0;
3109 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003110 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003111 } else {
3112 NEXT;
3113 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003114 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003115 return(buf);
3116}
3117
Daniel Veillard48b2f892001-02-25 16:11:03 +00003118void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003119/**
3120 * xmlParseCharData:
3121 * @ctxt: an XML parser context
3122 * @cdata: int indicating whether we are within a CDATA section
3123 *
3124 * parse a CharData section.
3125 * if we are within a CDATA section ']]>' marks an end of section.
3126 *
3127 * The right angle bracket (>) may be represented using the string "&gt;",
3128 * and must, for compatibility, be escaped using "&gt;" or a character
3129 * reference when it appears in the string "]]>" in content, when that
3130 * string is not marking the end of a CDATA section.
3131 *
3132 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3133 */
3134
3135void
3136xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003137 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003138 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003139 int line = ctxt->input->line;
3140 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003141
3142 SHRINK;
3143 GROW;
3144 /*
3145 * Accelerated common case where input don't need to be
3146 * modified before passing it to the handler.
3147 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003148 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003149 in = ctxt->input->cur;
3150 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003151get_more_space:
3152 while (*in == 0x20) in++;
3153 if (*in == 0xA) {
3154 ctxt->input->line++;
3155 in++;
3156 while (*in == 0xA) {
3157 ctxt->input->line++;
3158 in++;
3159 }
3160 goto get_more_space;
3161 }
3162 if (*in == '<') {
3163 nbchar = in - ctxt->input->cur;
3164 if (nbchar > 0) {
3165 const xmlChar *tmp = ctxt->input->cur;
3166 ctxt->input->cur = in;
3167
3168 if (ctxt->sax->ignorableWhitespace !=
3169 ctxt->sax->characters) {
3170 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3171 ctxt->sax->ignorableWhitespace(ctxt->userData,
3172 tmp, nbchar);
3173 } else if (ctxt->sax->characters != NULL)
3174 ctxt->sax->characters(ctxt->userData,
3175 tmp, nbchar);
3176 } else if (ctxt->sax->characters != NULL) {
3177 ctxt->sax->characters(ctxt->userData,
3178 tmp, nbchar);
3179 }
3180 }
3181 return;
3182 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003183get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003184 while (((*in > ']') && (*in <= 0x7F)) ||
3185 ((*in > '&') && (*in < '<')) ||
3186 ((*in > '<') && (*in < ']')) ||
3187 ((*in >= 0x20) && (*in < '&')) ||
3188 (*in == 0x09))
3189 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003190 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003191 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003192 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003193 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003194 ctxt->input->line++;
3195 in++;
3196 }
3197 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003198 }
3199 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003200 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003201 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003202 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003203 return;
3204 }
3205 in++;
3206 goto get_more;
3207 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003208 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003209 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003210 if ((ctxt->sax->ignorableWhitespace !=
3211 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003212 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003213 const xmlChar *tmp = ctxt->input->cur;
3214 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003215
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003216 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003217 ctxt->sax->ignorableWhitespace(ctxt->userData,
3218 tmp, nbchar);
3219 } else if (ctxt->sax->characters != NULL)
3220 ctxt->sax->characters(ctxt->userData,
3221 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003222 line = ctxt->input->line;
3223 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003224 } else {
3225 if (ctxt->sax->characters != NULL)
3226 ctxt->sax->characters(ctxt->userData,
3227 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003228 line = ctxt->input->line;
3229 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003230 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003231 }
3232 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003233 if (*in == 0xD) {
3234 in++;
3235 if (*in == 0xA) {
3236 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003237 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003238 ctxt->input->line++;
3239 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003240 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003241 in--;
3242 }
3243 if (*in == '<') {
3244 return;
3245 }
3246 if (*in == '&') {
3247 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003248 }
3249 SHRINK;
3250 GROW;
3251 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003252 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003253 nbchar = 0;
3254 }
Daniel Veillard50582112001-03-26 22:52:16 +00003255 ctxt->input->line = line;
3256 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003257 xmlParseCharDataComplex(ctxt, cdata);
3258}
3259
Daniel Veillard01c13b52002-12-10 15:19:08 +00003260/**
3261 * xmlParseCharDataComplex:
3262 * @ctxt: an XML parser context
3263 * @cdata: int indicating whether we are within a CDATA section
3264 *
3265 * parse a CharData section.this is the fallback function
3266 * of xmlParseCharData() when the parsing requires handling
3267 * of non-ASCII characters.
3268 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003269void
3270xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003271 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3272 int nbchar = 0;
3273 int cur, l;
3274 int count = 0;
3275
3276 SHRINK;
3277 GROW;
3278 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003279 while ((cur != '<') && /* checked */
3280 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003281 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003282 if ((cur == ']') && (NXT(1) == ']') &&
3283 (NXT(2) == '>')) {
3284 if (cdata) break;
3285 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003286 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003287 }
3288 }
3289 COPY_BUF(l,buf,nbchar,cur);
3290 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003291 buf[nbchar] = 0;
3292
Owen Taylor3473f882001-02-23 17:55:21 +00003293 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003294 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003295 */
3296 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003297 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003298 if (ctxt->sax->ignorableWhitespace != NULL)
3299 ctxt->sax->ignorableWhitespace(ctxt->userData,
3300 buf, nbchar);
3301 } else {
3302 if (ctxt->sax->characters != NULL)
3303 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3304 }
3305 }
3306 nbchar = 0;
3307 }
3308 count++;
3309 if (count > 50) {
3310 GROW;
3311 count = 0;
3312 }
3313 NEXTL(l);
3314 cur = CUR_CHAR(l);
3315 }
3316 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003317 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003318 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003319 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003320 */
3321 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003322 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003323 if (ctxt->sax->ignorableWhitespace != NULL)
3324 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3325 } else {
3326 if (ctxt->sax->characters != NULL)
3327 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3328 }
3329 }
3330 }
3331}
3332
3333/**
3334 * xmlParseExternalID:
3335 * @ctxt: an XML parser context
3336 * @publicID: a xmlChar** receiving PubidLiteral
3337 * @strict: indicate whether we should restrict parsing to only
3338 * production [75], see NOTE below
3339 *
3340 * Parse an External ID or a Public ID
3341 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003342 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003343 * 'PUBLIC' S PubidLiteral S SystemLiteral
3344 *
3345 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3346 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3347 *
3348 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3349 *
3350 * Returns the function returns SystemLiteral and in the second
3351 * case publicID receives PubidLiteral, is strict is off
3352 * it is possible to return NULL and have publicID set.
3353 */
3354
3355xmlChar *
3356xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3357 xmlChar *URI = NULL;
3358
3359 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003360
3361 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003362 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003363 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003364 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003365 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3366 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003367 }
3368 SKIP_BLANKS;
3369 URI = xmlParseSystemLiteral(ctxt);
3370 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003371 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003373 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003374 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003377 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003378 }
3379 SKIP_BLANKS;
3380 *publicID = xmlParsePubidLiteral(ctxt);
3381 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003382 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003383 }
3384 if (strict) {
3385 /*
3386 * We don't handle [83] so "S SystemLiteral" is required.
3387 */
William M. Brack76e95df2003-10-18 16:20:14 +00003388 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003389 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003390 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003391 }
3392 } else {
3393 /*
3394 * We handle [83] so we return immediately, if
3395 * "S SystemLiteral" is not detected. From a purely parsing
3396 * point of view that's a nice mess.
3397 */
3398 const xmlChar *ptr;
3399 GROW;
3400
3401 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003402 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003403
William M. Brack76e95df2003-10-18 16:20:14 +00003404 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003405 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3406 }
3407 SKIP_BLANKS;
3408 URI = xmlParseSystemLiteral(ctxt);
3409 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003410 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003411 }
3412 }
3413 return(URI);
3414}
3415
3416/**
3417 * xmlParseComment:
3418 * @ctxt: an XML parser context
3419 *
3420 * Skip an XML (SGML) comment <!-- .... -->
3421 * The spec says that "For compatibility, the string "--" (double-hyphen)
3422 * must not occur within comments. "
3423 *
3424 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3425 */
3426void
3427xmlParseComment(xmlParserCtxtPtr ctxt) {
3428 xmlChar *buf = NULL;
3429 int len;
3430 int size = XML_PARSER_BUFFER_SIZE;
3431 int q, ql;
3432 int r, rl;
3433 int cur, l;
3434 xmlParserInputState state;
3435 xmlParserInputPtr input = ctxt->input;
3436 int count = 0;
3437
3438 /*
3439 * Check that there is a comment right here.
3440 */
3441 if ((RAW != '<') || (NXT(1) != '!') ||
3442 (NXT(2) != '-') || (NXT(3) != '-')) return;
3443
3444 state = ctxt->instate;
3445 ctxt->instate = XML_PARSER_COMMENT;
3446 SHRINK;
3447 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003448 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003449 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003450 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003451 ctxt->instate = state;
3452 return;
3453 }
3454 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003455 if (q == 0)
3456 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003457 NEXTL(ql);
3458 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003459 if (r == 0)
3460 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003461 NEXTL(rl);
3462 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003463 if (cur == 0)
3464 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003465 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003466 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003467 ((cur != '>') ||
3468 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003469 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 }
3472 if (len + 5 >= size) {
3473 size *= 2;
3474 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3475 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003476 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003477 ctxt->instate = state;
3478 return;
3479 }
3480 }
3481 COPY_BUF(ql,buf,len,q);
3482 q = r;
3483 ql = rl;
3484 r = cur;
3485 rl = l;
3486
3487 count++;
3488 if (count > 50) {
3489 GROW;
3490 count = 0;
3491 }
3492 NEXTL(l);
3493 cur = CUR_CHAR(l);
3494 if (cur == 0) {
3495 SHRINK;
3496 GROW;
3497 cur = CUR_CHAR(l);
3498 }
3499 }
3500 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003501 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003502 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003503 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 xmlFree(buf);
3505 } else {
3506 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003507 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3508 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003509 }
3510 NEXT;
3511 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3512 (!ctxt->disableSAX))
3513 ctxt->sax->comment(ctxt->userData, buf);
3514 xmlFree(buf);
3515 }
3516 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003517 return;
3518not_terminated:
3519 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3520 "Comment not terminated\n", NULL);
3521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522}
3523
3524/**
3525 * xmlParsePITarget:
3526 * @ctxt: an XML parser context
3527 *
3528 * parse the name of a PI
3529 *
3530 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3531 *
3532 * Returns the PITarget name or NULL
3533 */
3534
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003535const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003536xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003537 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003538
3539 name = xmlParseName(ctxt);
3540 if ((name != NULL) &&
3541 ((name[0] == 'x') || (name[0] == 'X')) &&
3542 ((name[1] == 'm') || (name[1] == 'M')) &&
3543 ((name[2] == 'l') || (name[2] == 'L'))) {
3544 int i;
3545 if ((name[0] == 'x') && (name[1] == 'm') &&
3546 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003547 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003548 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003549 return(name);
3550 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003551 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003552 return(name);
3553 }
3554 for (i = 0;;i++) {
3555 if (xmlW3CPIs[i] == NULL) break;
3556 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3557 return(name);
3558 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003559 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3560 "xmlParsePITarget: invalid name prefix 'xml'\n",
3561 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
3563 return(name);
3564}
3565
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003566#ifdef LIBXML_CATALOG_ENABLED
3567/**
3568 * xmlParseCatalogPI:
3569 * @ctxt: an XML parser context
3570 * @catalog: the PI value string
3571 *
3572 * parse an XML Catalog Processing Instruction.
3573 *
3574 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3575 *
3576 * Occurs only if allowed by the user and if happening in the Misc
3577 * part of the document before any doctype informations
3578 * This will add the given catalog to the parsing context in order
3579 * to be used if there is a resolution need further down in the document
3580 */
3581
3582static void
3583xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3584 xmlChar *URL = NULL;
3585 const xmlChar *tmp, *base;
3586 xmlChar marker;
3587
3588 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003589 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003590 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3591 goto error;
3592 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003593 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003594 if (*tmp != '=') {
3595 return;
3596 }
3597 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003598 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003599 marker = *tmp;
3600 if ((marker != '\'') && (marker != '"'))
3601 goto error;
3602 tmp++;
3603 base = tmp;
3604 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3605 if (*tmp == 0)
3606 goto error;
3607 URL = xmlStrndup(base, tmp - base);
3608 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003609 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003610 if (*tmp != 0)
3611 goto error;
3612
3613 if (URL != NULL) {
3614 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3615 xmlFree(URL);
3616 }
3617 return;
3618
3619error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003620 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3621 "Catalog PI syntax error: %s\n",
3622 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003623 if (URL != NULL)
3624 xmlFree(URL);
3625}
3626#endif
3627
Owen Taylor3473f882001-02-23 17:55:21 +00003628/**
3629 * xmlParsePI:
3630 * @ctxt: an XML parser context
3631 *
3632 * parse an XML Processing Instruction.
3633 *
3634 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3635 *
3636 * The processing is transfered to SAX once parsed.
3637 */
3638
3639void
3640xmlParsePI(xmlParserCtxtPtr ctxt) {
3641 xmlChar *buf = NULL;
3642 int len = 0;
3643 int size = XML_PARSER_BUFFER_SIZE;
3644 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003645 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003646 xmlParserInputState state;
3647 int count = 0;
3648
3649 if ((RAW == '<') && (NXT(1) == '?')) {
3650 xmlParserInputPtr input = ctxt->input;
3651 state = ctxt->instate;
3652 ctxt->instate = XML_PARSER_PI;
3653 /*
3654 * this is a Processing Instruction.
3655 */
3656 SKIP(2);
3657 SHRINK;
3658
3659 /*
3660 * Parse the target name and check for special support like
3661 * namespace.
3662 */
3663 target = xmlParsePITarget(ctxt);
3664 if (target != NULL) {
3665 if ((RAW == '?') && (NXT(1) == '>')) {
3666 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003667 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3668 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003669 }
3670 SKIP(2);
3671
3672 /*
3673 * SAX: PI detected.
3674 */
3675 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3676 (ctxt->sax->processingInstruction != NULL))
3677 ctxt->sax->processingInstruction(ctxt->userData,
3678 target, NULL);
3679 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003680 return;
3681 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003682 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003683 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 ctxt->instate = state;
3686 return;
3687 }
3688 cur = CUR;
3689 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003690 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3691 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003692 }
3693 SKIP_BLANKS;
3694 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003695 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003696 ((cur != '?') || (NXT(1) != '>'))) {
3697 if (len + 5 >= size) {
3698 size *= 2;
3699 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3700 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003701 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003702 ctxt->instate = state;
3703 return;
3704 }
3705 }
3706 count++;
3707 if (count > 50) {
3708 GROW;
3709 count = 0;
3710 }
3711 COPY_BUF(l,buf,len,cur);
3712 NEXTL(l);
3713 cur = CUR_CHAR(l);
3714 if (cur == 0) {
3715 SHRINK;
3716 GROW;
3717 cur = CUR_CHAR(l);
3718 }
3719 }
3720 buf[len] = 0;
3721 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003722 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3723 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003724 } else {
3725 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003726 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3727 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003728 }
3729 SKIP(2);
3730
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003731#ifdef LIBXML_CATALOG_ENABLED
3732 if (((state == XML_PARSER_MISC) ||
3733 (state == XML_PARSER_START)) &&
3734 (xmlStrEqual(target, XML_CATALOG_PI))) {
3735 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3736 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3737 (allow == XML_CATA_ALLOW_ALL))
3738 xmlParseCatalogPI(ctxt, buf);
3739 }
3740#endif
3741
3742
Owen Taylor3473f882001-02-23 17:55:21 +00003743 /*
3744 * SAX: PI detected.
3745 */
3746 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3747 (ctxt->sax->processingInstruction != NULL))
3748 ctxt->sax->processingInstruction(ctxt->userData,
3749 target, buf);
3750 }
3751 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003753 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003754 }
3755 ctxt->instate = state;
3756 }
3757}
3758
3759/**
3760 * xmlParseNotationDecl:
3761 * @ctxt: an XML parser context
3762 *
3763 * parse a notation declaration
3764 *
3765 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3766 *
3767 * Hence there is actually 3 choices:
3768 * 'PUBLIC' S PubidLiteral
3769 * 'PUBLIC' S PubidLiteral S SystemLiteral
3770 * and 'SYSTEM' S SystemLiteral
3771 *
3772 * See the NOTE on xmlParseExternalID().
3773 */
3774
3775void
3776xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003777 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003778 xmlChar *Pubid;
3779 xmlChar *Systemid;
3780
Daniel Veillarda07050d2003-10-19 14:46:32 +00003781 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003782 xmlParserInputPtr input = ctxt->input;
3783 SHRINK;
3784 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003785 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3787 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003788 return;
3789 }
3790 SKIP_BLANKS;
3791
Daniel Veillard76d66f42001-05-16 21:05:17 +00003792 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003793 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003794 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003795 return;
3796 }
William M. Brack76e95df2003-10-18 16:20:14 +00003797 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003799 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003800 return;
3801 }
3802 SKIP_BLANKS;
3803
3804 /*
3805 * Parse the IDs.
3806 */
3807 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3808 SKIP_BLANKS;
3809
3810 if (RAW == '>') {
3811 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003812 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3813 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 NEXT;
3816 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3817 (ctxt->sax->notationDecl != NULL))
3818 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3819 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003820 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 if (Systemid != NULL) xmlFree(Systemid);
3823 if (Pubid != NULL) xmlFree(Pubid);
3824 }
3825}
3826
3827/**
3828 * xmlParseEntityDecl:
3829 * @ctxt: an XML parser context
3830 *
3831 * parse <!ENTITY declarations
3832 *
3833 * [70] EntityDecl ::= GEDecl | PEDecl
3834 *
3835 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3836 *
3837 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3838 *
3839 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3840 *
3841 * [74] PEDef ::= EntityValue | ExternalID
3842 *
3843 * [76] NDataDecl ::= S 'NDATA' S Name
3844 *
3845 * [ VC: Notation Declared ]
3846 * The Name must match the declared name of a notation.
3847 */
3848
3849void
3850xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003851 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 xmlChar *value = NULL;
3853 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003854 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003855 int isParameter = 0;
3856 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003857 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003858
3859 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003860 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003861 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003862 SHRINK;
3863 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003864 skipped = SKIP_BLANKS;
3865 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003866 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3867 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
Owen Taylor3473f882001-02-23 17:55:21 +00003869
3870 if (RAW == '%') {
3871 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003872 skipped = SKIP_BLANKS;
3873 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003874 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3875 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003876 }
Owen Taylor3473f882001-02-23 17:55:21 +00003877 isParameter = 1;
3878 }
3879
Daniel Veillard76d66f42001-05-16 21:05:17 +00003880 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003881 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003882 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3883 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003884 return;
3885 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003886 skipped = SKIP_BLANKS;
3887 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003888 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3889 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003890 }
Owen Taylor3473f882001-02-23 17:55:21 +00003891
Daniel Veillardf5582f12002-06-11 10:08:16 +00003892 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003893 /*
3894 * handle the various case of definitions...
3895 */
3896 if (isParameter) {
3897 if ((RAW == '"') || (RAW == '\'')) {
3898 value = xmlParseEntityValue(ctxt, &orig);
3899 if (value) {
3900 if ((ctxt->sax != NULL) &&
3901 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3902 ctxt->sax->entityDecl(ctxt->userData, name,
3903 XML_INTERNAL_PARAMETER_ENTITY,
3904 NULL, NULL, value);
3905 }
3906 } else {
3907 URI = xmlParseExternalID(ctxt, &literal, 1);
3908 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003909 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003910 }
3911 if (URI) {
3912 xmlURIPtr uri;
3913
3914 uri = xmlParseURI((const char *) URI);
3915 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003916 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3917 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003918 /*
3919 * This really ought to be a well formedness error
3920 * but the XML Core WG decided otherwise c.f. issue
3921 * E26 of the XML erratas.
3922 */
Owen Taylor3473f882001-02-23 17:55:21 +00003923 } else {
3924 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003925 /*
3926 * Okay this is foolish to block those but not
3927 * invalid URIs.
3928 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 } else {
3931 if ((ctxt->sax != NULL) &&
3932 (!ctxt->disableSAX) &&
3933 (ctxt->sax->entityDecl != NULL))
3934 ctxt->sax->entityDecl(ctxt->userData, name,
3935 XML_EXTERNAL_PARAMETER_ENTITY,
3936 literal, URI, NULL);
3937 }
3938 xmlFreeURI(uri);
3939 }
3940 }
3941 }
3942 } else {
3943 if ((RAW == '"') || (RAW == '\'')) {
3944 value = xmlParseEntityValue(ctxt, &orig);
3945 if ((ctxt->sax != NULL) &&
3946 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3947 ctxt->sax->entityDecl(ctxt->userData, name,
3948 XML_INTERNAL_GENERAL_ENTITY,
3949 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003950 /*
3951 * For expat compatibility in SAX mode.
3952 */
3953 if ((ctxt->myDoc == NULL) ||
3954 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3955 if (ctxt->myDoc == NULL) {
3956 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3957 }
3958 if (ctxt->myDoc->intSubset == NULL)
3959 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3960 BAD_CAST "fake", NULL, NULL);
3961
Daniel Veillard1af9a412003-08-20 22:54:39 +00003962 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3963 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003964 }
Owen Taylor3473f882001-02-23 17:55:21 +00003965 } else {
3966 URI = xmlParseExternalID(ctxt, &literal, 1);
3967 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
3970 if (URI) {
3971 xmlURIPtr uri;
3972
3973 uri = xmlParseURI((const char *)URI);
3974 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003975 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3976 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003977 /*
3978 * This really ought to be a well formedness error
3979 * but the XML Core WG decided otherwise c.f. issue
3980 * E26 of the XML erratas.
3981 */
Owen Taylor3473f882001-02-23 17:55:21 +00003982 } else {
3983 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003984 /*
3985 * Okay this is foolish to block those but not
3986 * invalid URIs.
3987 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003988 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003989 }
3990 xmlFreeURI(uri);
3991 }
3992 }
William M. Brack76e95df2003-10-18 16:20:14 +00003993 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3995 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003996 }
3997 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003998 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003999 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004000 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004001 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4002 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004003 }
4004 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004005 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004006 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4007 (ctxt->sax->unparsedEntityDecl != NULL))
4008 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4009 literal, URI, ndata);
4010 } else {
4011 if ((ctxt->sax != NULL) &&
4012 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4013 ctxt->sax->entityDecl(ctxt->userData, name,
4014 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4015 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004016 /*
4017 * For expat compatibility in SAX mode.
4018 * assuming the entity repalcement was asked for
4019 */
4020 if ((ctxt->replaceEntities != 0) &&
4021 ((ctxt->myDoc == NULL) ||
4022 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4023 if (ctxt->myDoc == NULL) {
4024 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4025 }
4026
4027 if (ctxt->myDoc->intSubset == NULL)
4028 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4029 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004030 xmlSAX2EntityDecl(ctxt, name,
4031 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4032 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004033 }
Owen Taylor3473f882001-02-23 17:55:21 +00004034 }
4035 }
4036 }
4037 SKIP_BLANKS;
4038 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004039 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004040 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 } else {
4042 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4044 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004045 }
4046 NEXT;
4047 }
4048 if (orig != NULL) {
4049 /*
4050 * Ugly mechanism to save the raw entity value.
4051 */
4052 xmlEntityPtr cur = NULL;
4053
4054 if (isParameter) {
4055 if ((ctxt->sax != NULL) &&
4056 (ctxt->sax->getParameterEntity != NULL))
4057 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4058 } else {
4059 if ((ctxt->sax != NULL) &&
4060 (ctxt->sax->getEntity != NULL))
4061 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004062 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004063 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004064 }
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
4066 if (cur != NULL) {
4067 if (cur->orig != NULL)
4068 xmlFree(orig);
4069 else
4070 cur->orig = orig;
4071 } else
4072 xmlFree(orig);
4073 }
Owen Taylor3473f882001-02-23 17:55:21 +00004074 if (value != NULL) xmlFree(value);
4075 if (URI != NULL) xmlFree(URI);
4076 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004077 }
4078}
4079
4080/**
4081 * xmlParseDefaultDecl:
4082 * @ctxt: an XML parser context
4083 * @value: Receive a possible fixed default value for the attribute
4084 *
4085 * Parse an attribute default declaration
4086 *
4087 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4088 *
4089 * [ VC: Required Attribute ]
4090 * if the default declaration is the keyword #REQUIRED, then the
4091 * attribute must be specified for all elements of the type in the
4092 * attribute-list declaration.
4093 *
4094 * [ VC: Attribute Default Legal ]
4095 * The declared default value must meet the lexical constraints of
4096 * the declared attribute type c.f. xmlValidateAttributeDecl()
4097 *
4098 * [ VC: Fixed Attribute Default ]
4099 * if an attribute has a default value declared with the #FIXED
4100 * keyword, instances of that attribute must match the default value.
4101 *
4102 * [ WFC: No < in Attribute Values ]
4103 * handled in xmlParseAttValue()
4104 *
4105 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4106 * or XML_ATTRIBUTE_FIXED.
4107 */
4108
4109int
4110xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4111 int val;
4112 xmlChar *ret;
4113
4114 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004115 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004116 SKIP(9);
4117 return(XML_ATTRIBUTE_REQUIRED);
4118 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004119 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004120 SKIP(8);
4121 return(XML_ATTRIBUTE_IMPLIED);
4122 }
4123 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004124 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004125 SKIP(6);
4126 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004127 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4129 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004130 }
4131 SKIP_BLANKS;
4132 }
4133 ret = xmlParseAttValue(ctxt);
4134 ctxt->instate = XML_PARSER_DTD;
4135 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004136 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004137 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004138 } else
4139 *value = ret;
4140 return(val);
4141}
4142
4143/**
4144 * xmlParseNotationType:
4145 * @ctxt: an XML parser context
4146 *
4147 * parse an Notation attribute type.
4148 *
4149 * Note: the leading 'NOTATION' S part has already being parsed...
4150 *
4151 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4152 *
4153 * [ VC: Notation Attributes ]
4154 * Values of this type must match one of the notation names included
4155 * in the declaration; all notation names in the declaration must be declared.
4156 *
4157 * Returns: the notation attribute tree built while parsing
4158 */
4159
4160xmlEnumerationPtr
4161xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004162 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004163 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4164
4165 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004166 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004167 return(NULL);
4168 }
4169 SHRINK;
4170 do {
4171 NEXT;
4172 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004173 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004174 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4176 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004177 return(ret);
4178 }
4179 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004180 if (cur == NULL) return(ret);
4181 if (last == NULL) ret = last = cur;
4182 else {
4183 last->next = cur;
4184 last = cur;
4185 }
4186 SKIP_BLANKS;
4187 } while (RAW == '|');
4188 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004189 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if ((last != NULL) && (last != ret))
4191 xmlFreeEnumeration(last);
4192 return(ret);
4193 }
4194 NEXT;
4195 return(ret);
4196}
4197
4198/**
4199 * xmlParseEnumerationType:
4200 * @ctxt: an XML parser context
4201 *
4202 * parse an Enumeration attribute type.
4203 *
4204 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4205 *
4206 * [ VC: Enumeration ]
4207 * Values of this type must match one of the Nmtoken tokens in
4208 * the declaration
4209 *
4210 * Returns: the enumeration attribute tree built while parsing
4211 */
4212
4213xmlEnumerationPtr
4214xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4215 xmlChar *name;
4216 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4217
4218 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004219 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004220 return(NULL);
4221 }
4222 SHRINK;
4223 do {
4224 NEXT;
4225 SKIP_BLANKS;
4226 name = xmlParseNmtoken(ctxt);
4227 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004228 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 return(ret);
4230 }
4231 cur = xmlCreateEnumeration(name);
4232 xmlFree(name);
4233 if (cur == NULL) return(ret);
4234 if (last == NULL) ret = last = cur;
4235 else {
4236 last->next = cur;
4237 last = cur;
4238 }
4239 SKIP_BLANKS;
4240 } while (RAW == '|');
4241 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004242 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004243 return(ret);
4244 }
4245 NEXT;
4246 return(ret);
4247}
4248
4249/**
4250 * xmlParseEnumeratedType:
4251 * @ctxt: an XML parser context
4252 * @tree: the enumeration tree built while parsing
4253 *
4254 * parse an Enumerated attribute type.
4255 *
4256 * [57] EnumeratedType ::= NotationType | Enumeration
4257 *
4258 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4259 *
4260 *
4261 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4262 */
4263
4264int
4265xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004266 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004267 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004268 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4270 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004271 return(0);
4272 }
4273 SKIP_BLANKS;
4274 *tree = xmlParseNotationType(ctxt);
4275 if (*tree == NULL) return(0);
4276 return(XML_ATTRIBUTE_NOTATION);
4277 }
4278 *tree = xmlParseEnumerationType(ctxt);
4279 if (*tree == NULL) return(0);
4280 return(XML_ATTRIBUTE_ENUMERATION);
4281}
4282
4283/**
4284 * xmlParseAttributeType:
4285 * @ctxt: an XML parser context
4286 * @tree: the enumeration tree built while parsing
4287 *
4288 * parse the Attribute list def for an element
4289 *
4290 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4291 *
4292 * [55] StringType ::= 'CDATA'
4293 *
4294 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4295 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4296 *
4297 * Validity constraints for attribute values syntax are checked in
4298 * xmlValidateAttributeValue()
4299 *
4300 * [ VC: ID ]
4301 * Values of type ID must match the Name production. A name must not
4302 * appear more than once in an XML document as a value of this type;
4303 * i.e., ID values must uniquely identify the elements which bear them.
4304 *
4305 * [ VC: One ID per Element Type ]
4306 * No element type may have more than one ID attribute specified.
4307 *
4308 * [ VC: ID Attribute Default ]
4309 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4310 *
4311 * [ VC: IDREF ]
4312 * Values of type IDREF must match the Name production, and values
4313 * of type IDREFS must match Names; each IDREF Name must match the value
4314 * of an ID attribute on some element in the XML document; i.e. IDREF
4315 * values must match the value of some ID attribute.
4316 *
4317 * [ VC: Entity Name ]
4318 * Values of type ENTITY must match the Name production, values
4319 * of type ENTITIES must match Names; each Entity Name must match the
4320 * name of an unparsed entity declared in the DTD.
4321 *
4322 * [ VC: Name Token ]
4323 * Values of type NMTOKEN must match the Nmtoken production; values
4324 * of type NMTOKENS must match Nmtokens.
4325 *
4326 * Returns the attribute type
4327 */
4328int
4329xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4330 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004331 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004332 SKIP(5);
4333 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004334 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004335 SKIP(6);
4336 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004337 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004338 SKIP(5);
4339 return(XML_ATTRIBUTE_IDREF);
4340 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4341 SKIP(2);
4342 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004343 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004344 SKIP(6);
4345 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004346 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004347 SKIP(8);
4348 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004349 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004350 SKIP(8);
4351 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004352 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004353 SKIP(7);
4354 return(XML_ATTRIBUTE_NMTOKEN);
4355 }
4356 return(xmlParseEnumeratedType(ctxt, tree));
4357}
4358
4359/**
4360 * xmlParseAttributeListDecl:
4361 * @ctxt: an XML parser context
4362 *
4363 * : parse the Attribute list def for an element
4364 *
4365 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4366 *
4367 * [53] AttDef ::= S Name S AttType S DefaultDecl
4368 *
4369 */
4370void
4371xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004372 const xmlChar *elemName;
4373 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlEnumerationPtr tree;
4375
Daniel Veillarda07050d2003-10-19 14:46:32 +00004376 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 xmlParserInputPtr input = ctxt->input;
4378
4379 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004380 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004381 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004382 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004383 }
4384 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004385 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004386 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004387 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4388 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004389 return;
4390 }
4391 SKIP_BLANKS;
4392 GROW;
4393 while (RAW != '>') {
4394 const xmlChar *check = CUR_PTR;
4395 int type;
4396 int def;
4397 xmlChar *defaultValue = NULL;
4398
4399 GROW;
4400 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004401 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004403 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4404 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004405 break;
4406 }
4407 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004408 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004410 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004411 if (defaultValue != NULL)
4412 xmlFree(defaultValue);
4413 break;
4414 }
4415 SKIP_BLANKS;
4416
4417 type = xmlParseAttributeType(ctxt, &tree);
4418 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004419 if (defaultValue != NULL)
4420 xmlFree(defaultValue);
4421 break;
4422 }
4423
4424 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004425 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4427 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004428 if (defaultValue != NULL)
4429 xmlFree(defaultValue);
4430 if (tree != NULL)
4431 xmlFreeEnumeration(tree);
4432 break;
4433 }
4434 SKIP_BLANKS;
4435
4436 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4437 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004438 if (defaultValue != NULL)
4439 xmlFree(defaultValue);
4440 if (tree != NULL)
4441 xmlFreeEnumeration(tree);
4442 break;
4443 }
4444
4445 GROW;
4446 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004447 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004449 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004450 if (defaultValue != NULL)
4451 xmlFree(defaultValue);
4452 if (tree != NULL)
4453 xmlFreeEnumeration(tree);
4454 break;
4455 }
4456 SKIP_BLANKS;
4457 }
4458 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004459 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4460 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004461 if (defaultValue != NULL)
4462 xmlFree(defaultValue);
4463 if (tree != NULL)
4464 xmlFreeEnumeration(tree);
4465 break;
4466 }
4467 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4468 (ctxt->sax->attributeDecl != NULL))
4469 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4470 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004471 else if (tree != NULL)
4472 xmlFreeEnumeration(tree);
4473
4474 if ((ctxt->sax2) && (defaultValue != NULL) &&
4475 (def != XML_ATTRIBUTE_IMPLIED) &&
4476 (def != XML_ATTRIBUTE_REQUIRED)) {
4477 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4478 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004479 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4480 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4481 }
Owen Taylor3473f882001-02-23 17:55:21 +00004482 if (defaultValue != NULL)
4483 xmlFree(defaultValue);
4484 GROW;
4485 }
4486 if (RAW == '>') {
4487 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4489 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491 NEXT;
4492 }
Owen Taylor3473f882001-02-23 17:55:21 +00004493 }
4494}
4495
4496/**
4497 * xmlParseElementMixedContentDecl:
4498 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004499 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004500 *
4501 * parse the declaration for a Mixed Element content
4502 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4503 *
4504 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4505 * '(' S? '#PCDATA' S? ')'
4506 *
4507 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4508 *
4509 * [ VC: No Duplicate Types ]
4510 * The same name must not appear more than once in a single
4511 * mixed-content declaration.
4512 *
4513 * returns: the list of the xmlElementContentPtr describing the element choices
4514 */
4515xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004516xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004517 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004518 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004519
4520 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004521 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004522 SKIP(7);
4523 SKIP_BLANKS;
4524 SHRINK;
4525 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004526 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004527 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4528"Element content declaration doesn't start and stop in the same entity\n",
4529 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004530 }
Owen Taylor3473f882001-02-23 17:55:21 +00004531 NEXT;
4532 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4533 if (RAW == '*') {
4534 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4535 NEXT;
4536 }
4537 return(ret);
4538 }
4539 if ((RAW == '(') || (RAW == '|')) {
4540 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4541 if (ret == NULL) return(NULL);
4542 }
4543 while (RAW == '|') {
4544 NEXT;
4545 if (elem == NULL) {
4546 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4547 if (ret == NULL) return(NULL);
4548 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004549 if (cur != NULL)
4550 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004551 cur = ret;
4552 } else {
4553 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4554 if (n == NULL) return(NULL);
4555 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004556 if (n->c1 != NULL)
4557 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004559 if (n != NULL)
4560 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004561 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004562 }
4563 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004564 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004565 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004566 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004567 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004568 xmlFreeElementContent(cur);
4569 return(NULL);
4570 }
4571 SKIP_BLANKS;
4572 GROW;
4573 }
4574 if ((RAW == ')') && (NXT(1) == '*')) {
4575 if (elem != NULL) {
4576 cur->c2 = xmlNewElementContent(elem,
4577 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004578 if (cur->c2 != NULL)
4579 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004580 }
4581 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004582 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004583 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4584"Element content declaration doesn't start and stop in the same entity\n",
4585 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004586 }
Owen Taylor3473f882001-02-23 17:55:21 +00004587 SKIP(2);
4588 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004589 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004590 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004591 return(NULL);
4592 }
4593
4594 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004595 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004596 }
4597 return(ret);
4598}
4599
4600/**
4601 * xmlParseElementChildrenContentDecl:
4602 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004603 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004604 *
4605 * parse the declaration for a Mixed Element content
4606 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4607 *
4608 *
4609 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4610 *
4611 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4612 *
4613 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4614 *
4615 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4616 *
4617 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4618 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004619 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004620 * opening or closing parentheses in a choice, seq, or Mixed
4621 * construct is contained in the replacement text for a parameter
4622 * entity, both must be contained in the same replacement text. For
4623 * interoperability, if a parameter-entity reference appears in a
4624 * choice, seq, or Mixed construct, its replacement text should not
4625 * be empty, and neither the first nor last non-blank character of
4626 * the replacement text should be a connector (| or ,).
4627 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004628 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004629 * hierarchy.
4630 */
4631xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004632xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004633 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004634 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004635 xmlChar type = 0;
4636
4637 SKIP_BLANKS;
4638 GROW;
4639 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004640 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004641
Owen Taylor3473f882001-02-23 17:55:21 +00004642 /* Recurse on first child */
4643 NEXT;
4644 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004645 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004646 SKIP_BLANKS;
4647 GROW;
4648 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004649 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004650 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004651 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004652 return(NULL);
4653 }
4654 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004655 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004656 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004657 return(NULL);
4658 }
Owen Taylor3473f882001-02-23 17:55:21 +00004659 GROW;
4660 if (RAW == '?') {
4661 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4662 NEXT;
4663 } else if (RAW == '*') {
4664 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4665 NEXT;
4666 } else if (RAW == '+') {
4667 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4668 NEXT;
4669 } else {
4670 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4671 }
Owen Taylor3473f882001-02-23 17:55:21 +00004672 GROW;
4673 }
4674 SKIP_BLANKS;
4675 SHRINK;
4676 while (RAW != ')') {
4677 /*
4678 * Each loop we parse one separator and one element.
4679 */
4680 if (RAW == ',') {
4681 if (type == 0) type = CUR;
4682
4683 /*
4684 * Detect "Name | Name , Name" error
4685 */
4686 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004687 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004688 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004689 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004690 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004691 xmlFreeElementContent(last);
4692 if (ret != NULL)
4693 xmlFreeElementContent(ret);
4694 return(NULL);
4695 }
4696 NEXT;
4697
4698 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4699 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004700 if ((last != NULL) && (last != ret))
4701 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004702 xmlFreeElementContent(ret);
4703 return(NULL);
4704 }
4705 if (last == NULL) {
4706 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004707 if (ret != NULL)
4708 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004709 ret = cur = op;
4710 } else {
4711 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004712 if (op != NULL)
4713 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004714 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004715 if (last != NULL)
4716 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 cur =op;
4718 last = NULL;
4719 }
4720 } else if (RAW == '|') {
4721 if (type == 0) type = CUR;
4722
4723 /*
4724 * Detect "Name , Name | Name" error
4725 */
4726 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004727 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004728 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004729 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004730 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004731 xmlFreeElementContent(last);
4732 if (ret != NULL)
4733 xmlFreeElementContent(ret);
4734 return(NULL);
4735 }
4736 NEXT;
4737
4738 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4739 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004740 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004741 xmlFreeElementContent(last);
4742 if (ret != NULL)
4743 xmlFreeElementContent(ret);
4744 return(NULL);
4745 }
4746 if (last == NULL) {
4747 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004748 if (ret != NULL)
4749 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004750 ret = cur = op;
4751 } else {
4752 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004753 if (op != NULL)
4754 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004756 if (last != NULL)
4757 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004758 cur =op;
4759 last = NULL;
4760 }
4761 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004762 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004763 if (ret != NULL)
4764 xmlFreeElementContent(ret);
4765 return(NULL);
4766 }
4767 GROW;
4768 SKIP_BLANKS;
4769 GROW;
4770 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004771 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004772 /* Recurse on second child */
4773 NEXT;
4774 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004775 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004776 SKIP_BLANKS;
4777 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004778 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004779 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004780 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 if (ret != NULL)
4782 xmlFreeElementContent(ret);
4783 return(NULL);
4784 }
4785 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (RAW == '?') {
4787 last->ocur = XML_ELEMENT_CONTENT_OPT;
4788 NEXT;
4789 } else if (RAW == '*') {
4790 last->ocur = XML_ELEMENT_CONTENT_MULT;
4791 NEXT;
4792 } else if (RAW == '+') {
4793 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4794 NEXT;
4795 } else {
4796 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4797 }
4798 }
4799 SKIP_BLANKS;
4800 GROW;
4801 }
4802 if ((cur != NULL) && (last != NULL)) {
4803 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004804 if (last != NULL)
4805 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004807 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004808 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4809"Element content declaration doesn't start and stop in the same entity\n",
4810 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004811 }
Owen Taylor3473f882001-02-23 17:55:21 +00004812 NEXT;
4813 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004814 if (ret != NULL) {
4815 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4816 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4817 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4818 else
4819 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4820 }
Owen Taylor3473f882001-02-23 17:55:21 +00004821 NEXT;
4822 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004823 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004824 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004825 cur = ret;
4826 /*
4827 * Some normalization:
4828 * (a | b* | c?)* == (a | b | c)*
4829 */
4830 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4831 if ((cur->c1 != NULL) &&
4832 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4833 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4834 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4835 if ((cur->c2 != NULL) &&
4836 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4837 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4838 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4839 cur = cur->c2;
4840 }
4841 }
Owen Taylor3473f882001-02-23 17:55:21 +00004842 NEXT;
4843 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004844 if (ret != NULL) {
4845 int found = 0;
4846
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004847 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4848 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4849 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004850 else
4851 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004852 /*
4853 * Some normalization:
4854 * (a | b*)+ == (a | b)*
4855 * (a | b?)+ == (a | b)*
4856 */
4857 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4858 if ((cur->c1 != NULL) &&
4859 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4860 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4861 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4862 found = 1;
4863 }
4864 if ((cur->c2 != NULL) &&
4865 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4866 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4867 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4868 found = 1;
4869 }
4870 cur = cur->c2;
4871 }
4872 if (found)
4873 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4874 }
Owen Taylor3473f882001-02-23 17:55:21 +00004875 NEXT;
4876 }
4877 return(ret);
4878}
4879
4880/**
4881 * xmlParseElementContentDecl:
4882 * @ctxt: an XML parser context
4883 * @name: the name of the element being defined.
4884 * @result: the Element Content pointer will be stored here if any
4885 *
4886 * parse the declaration for an Element content either Mixed or Children,
4887 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4888 *
4889 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4890 *
4891 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4892 */
4893
4894int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004895xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004896 xmlElementContentPtr *result) {
4897
4898 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004900 int res;
4901
4902 *result = NULL;
4903
4904 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004905 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004906 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004907 return(-1);
4908 }
4909 NEXT;
4910 GROW;
4911 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004912 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004913 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 res = XML_ELEMENT_TYPE_MIXED;
4915 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004916 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004917 res = XML_ELEMENT_TYPE_ELEMENT;
4918 }
Owen Taylor3473f882001-02-23 17:55:21 +00004919 SKIP_BLANKS;
4920 *result = tree;
4921 return(res);
4922}
4923
4924/**
4925 * xmlParseElementDecl:
4926 * @ctxt: an XML parser context
4927 *
4928 * parse an Element declaration.
4929 *
4930 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4931 *
4932 * [ VC: Unique Element Type Declaration ]
4933 * No element type may be declared more than once
4934 *
4935 * Returns the type of the element, or -1 in case of error
4936 */
4937int
4938xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004939 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 int ret = -1;
4941 xmlElementContentPtr content = NULL;
4942
4943 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004944 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004945 xmlParserInputPtr input = ctxt->input;
4946
4947 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004948 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004949 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4950 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004953 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004954 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004955 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4956 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004957 return(-1);
4958 }
4959 while ((RAW == 0) && (ctxt->inputNr > 1))
4960 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004961 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004962 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4963 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 }
4965 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004966 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004967 SKIP(5);
4968 /*
4969 * Element must always be empty.
4970 */
4971 ret = XML_ELEMENT_TYPE_EMPTY;
4972 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4973 (NXT(2) == 'Y')) {
4974 SKIP(3);
4975 /*
4976 * Element is a generic container.
4977 */
4978 ret = XML_ELEMENT_TYPE_ANY;
4979 } else if (RAW == '(') {
4980 ret = xmlParseElementContentDecl(ctxt, name, &content);
4981 } else {
4982 /*
4983 * [ WFC: PEs in Internal Subset ] error handling.
4984 */
4985 if ((RAW == '%') && (ctxt->external == 0) &&
4986 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004987 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004988 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004989 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004990 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004991 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4992 }
Owen Taylor3473f882001-02-23 17:55:21 +00004993 return(-1);
4994 }
4995
4996 SKIP_BLANKS;
4997 /*
4998 * Pop-up of finished entities.
4999 */
5000 while ((RAW == 0) && (ctxt->inputNr > 1))
5001 xmlPopInput(ctxt);
5002 SKIP_BLANKS;
5003
5004 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005005 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5009 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
5011
5012 NEXT;
5013 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5014 (ctxt->sax->elementDecl != NULL))
5015 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5016 content);
5017 }
5018 if (content != NULL) {
5019 xmlFreeElementContent(content);
5020 }
Owen Taylor3473f882001-02-23 17:55:21 +00005021 }
5022 return(ret);
5023}
5024
5025/**
Owen Taylor3473f882001-02-23 17:55:21 +00005026 * xmlParseConditionalSections
5027 * @ctxt: an XML parser context
5028 *
5029 * [61] conditionalSect ::= includeSect | ignoreSect
5030 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5031 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5032 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5033 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5034 */
5035
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005036static void
Owen Taylor3473f882001-02-23 17:55:21 +00005037xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5038 SKIP(3);
5039 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005040 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005041 SKIP(7);
5042 SKIP_BLANKS;
5043 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005044 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005045 } else {
5046 NEXT;
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Entering INCLUDE Conditional Section\n");
5055 }
5056
5057 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5058 (NXT(2) != '>'))) {
5059 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005060 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005061
5062 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5063 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005064 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005065 NEXT;
5066 } else if (RAW == '%') {
5067 xmlParsePEReference(ctxt);
5068 } else
5069 xmlParseMarkupDecl(ctxt);
5070
5071 /*
5072 * Pop-up of finished entities.
5073 */
5074 while ((RAW == 0) && (ctxt->inputNr > 1))
5075 xmlPopInput(ctxt);
5076
Daniel Veillardfdc91562002-07-01 21:52:03 +00005077 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005078 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005079 break;
5080 }
5081 }
5082 if (xmlParserDebugEntities) {
5083 if ((ctxt->input != NULL) && (ctxt->input->filename))
5084 xmlGenericError(xmlGenericErrorContext,
5085 "%s(%d): ", ctxt->input->filename,
5086 ctxt->input->line);
5087 xmlGenericError(xmlGenericErrorContext,
5088 "Leaving INCLUDE Conditional Section\n");
5089 }
5090
Daniel Veillarda07050d2003-10-19 14:46:32 +00005091 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005092 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005093 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005094 int depth = 0;
5095
5096 SKIP(6);
5097 SKIP_BLANKS;
5098 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005099 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005100 } else {
5101 NEXT;
5102 }
5103 if (xmlParserDebugEntities) {
5104 if ((ctxt->input != NULL) && (ctxt->input->filename))
5105 xmlGenericError(xmlGenericErrorContext,
5106 "%s(%d): ", ctxt->input->filename,
5107 ctxt->input->line);
5108 xmlGenericError(xmlGenericErrorContext,
5109 "Entering IGNORE Conditional Section\n");
5110 }
5111
5112 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005113 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005114 * But disable SAX event generating DTD building in the meantime
5115 */
5116 state = ctxt->disableSAX;
5117 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005118 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005119 ctxt->instate = XML_PARSER_IGNORE;
5120
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005121 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5123 depth++;
5124 SKIP(3);
5125 continue;
5126 }
5127 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5128 if (--depth >= 0) SKIP(3);
5129 continue;
5130 }
5131 NEXT;
5132 continue;
5133 }
5134
5135 ctxt->disableSAX = state;
5136 ctxt->instate = instate;
5137
5138 if (xmlParserDebugEntities) {
5139 if ((ctxt->input != NULL) && (ctxt->input->filename))
5140 xmlGenericError(xmlGenericErrorContext,
5141 "%s(%d): ", ctxt->input->filename,
5142 ctxt->input->line);
5143 xmlGenericError(xmlGenericErrorContext,
5144 "Leaving IGNORE Conditional Section\n");
5145 }
5146
5147 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005148 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005149 }
5150
5151 if (RAW == 0)
5152 SHRINK;
5153
5154 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005155 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 } else {
5157 SKIP(3);
5158 }
5159}
5160
5161/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005162 * xmlParseMarkupDecl:
5163 * @ctxt: an XML parser context
5164 *
5165 * parse Markup declarations
5166 *
5167 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5168 * NotationDecl | PI | Comment
5169 *
5170 * [ VC: Proper Declaration/PE Nesting ]
5171 * Parameter-entity replacement text must be properly nested with
5172 * markup declarations. That is to say, if either the first character
5173 * or the last character of a markup declaration (markupdecl above) is
5174 * contained in the replacement text for a parameter-entity reference,
5175 * both must be contained in the same replacement text.
5176 *
5177 * [ WFC: PEs in Internal Subset ]
5178 * In the internal DTD subset, parameter-entity references can occur
5179 * only where markup declarations can occur, not within markup declarations.
5180 * (This does not apply to references that occur in external parameter
5181 * entities or to the external subset.)
5182 */
5183void
5184xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5185 GROW;
5186 xmlParseElementDecl(ctxt);
5187 xmlParseAttributeListDecl(ctxt);
5188 xmlParseEntityDecl(ctxt);
5189 xmlParseNotationDecl(ctxt);
5190 xmlParsePI(ctxt);
5191 xmlParseComment(ctxt);
5192 /*
5193 * This is only for internal subset. On external entities,
5194 * the replacement is done before parsing stage
5195 */
5196 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5197 xmlParsePEReference(ctxt);
5198
5199 /*
5200 * Conditional sections are allowed from entities included
5201 * by PE References in the internal subset.
5202 */
5203 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5204 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5205 xmlParseConditionalSections(ctxt);
5206 }
5207 }
5208
5209 ctxt->instate = XML_PARSER_DTD;
5210}
5211
5212/**
5213 * xmlParseTextDecl:
5214 * @ctxt: an XML parser context
5215 *
5216 * parse an XML declaration header for external entities
5217 *
5218 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5219 *
5220 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5221 */
5222
5223void
5224xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5225 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005226 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005227
5228 /*
5229 * We know that '<?xml' is here.
5230 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005231 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005232 SKIP(5);
5233 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005234 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005235 return;
5236 }
5237
William M. Brack76e95df2003-10-18 16:20:14 +00005238 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005239 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5240 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005241 }
5242 SKIP_BLANKS;
5243
5244 /*
5245 * We may have the VersionInfo here.
5246 */
5247 version = xmlParseVersionInfo(ctxt);
5248 if (version == NULL)
5249 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005251 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5253 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005254 }
5255 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005256 ctxt->input->version = version;
5257
5258 /*
5259 * We must have the encoding declaration
5260 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005261 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005262 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5263 /*
5264 * The XML REC instructs us to stop parsing right here
5265 */
5266 return;
5267 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005268 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5269 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5270 "Missing encoding in text declaration\n");
5271 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005272
5273 SKIP_BLANKS;
5274 if ((RAW == '?') && (NXT(1) == '>')) {
5275 SKIP(2);
5276 } else if (RAW == '>') {
5277 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005278 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005279 NEXT;
5280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005281 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005282 MOVETO_ENDTAG(CUR_PTR);
5283 NEXT;
5284 }
5285}
5286
5287/**
Owen Taylor3473f882001-02-23 17:55:21 +00005288 * xmlParseExternalSubset:
5289 * @ctxt: an XML parser context
5290 * @ExternalID: the external identifier
5291 * @SystemID: the system identifier (or URL)
5292 *
5293 * parse Markup declarations from an external subset
5294 *
5295 * [30] extSubset ::= textDecl? extSubsetDecl
5296 *
5297 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5298 */
5299void
5300xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5301 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005302 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005304 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005305 xmlParseTextDecl(ctxt);
5306 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5307 /*
5308 * The XML REC instructs us to stop parsing right here
5309 */
5310 ctxt->instate = XML_PARSER_EOF;
5311 return;
5312 }
5313 }
5314 if (ctxt->myDoc == NULL) {
5315 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5316 }
5317 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5318 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5319
5320 ctxt->instate = XML_PARSER_DTD;
5321 ctxt->external = 1;
5322 while (((RAW == '<') && (NXT(1) == '?')) ||
5323 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005324 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005326 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005327
5328 GROW;
5329 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5330 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005331 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005332 NEXT;
5333 } else if (RAW == '%') {
5334 xmlParsePEReference(ctxt);
5335 } else
5336 xmlParseMarkupDecl(ctxt);
5337
5338 /*
5339 * Pop-up of finished entities.
5340 */
5341 while ((RAW == 0) && (ctxt->inputNr > 1))
5342 xmlPopInput(ctxt);
5343
Daniel Veillardfdc91562002-07-01 21:52:03 +00005344 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005345 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005346 break;
5347 }
5348 }
5349
5350 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005351 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005352 }
5353
5354}
5355
5356/**
5357 * xmlParseReference:
5358 * @ctxt: an XML parser context
5359 *
5360 * parse and handle entity references in content, depending on the SAX
5361 * interface, this may end-up in a call to character() if this is a
5362 * CharRef, a predefined entity, if there is no reference() callback.
5363 * or if the parser was asked to switch to that mode.
5364 *
5365 * [67] Reference ::= EntityRef | CharRef
5366 */
5367void
5368xmlParseReference(xmlParserCtxtPtr ctxt) {
5369 xmlEntityPtr ent;
5370 xmlChar *val;
5371 if (RAW != '&') return;
5372
5373 if (NXT(1) == '#') {
5374 int i = 0;
5375 xmlChar out[10];
5376 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005377 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005378
5379 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5380 /*
5381 * So we are using non-UTF-8 buffers
5382 * Check that the char fit on 8bits, if not
5383 * generate a CharRef.
5384 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005385 if (value <= 0xFF) {
5386 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005387 out[1] = 0;
5388 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5389 (!ctxt->disableSAX))
5390 ctxt->sax->characters(ctxt->userData, out, 1);
5391 } else {
5392 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005393 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005395 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005396 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5397 (!ctxt->disableSAX))
5398 ctxt->sax->reference(ctxt->userData, out);
5399 }
5400 } else {
5401 /*
5402 * Just encode the value in UTF-8
5403 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005404 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005405 out[i] = 0;
5406 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5407 (!ctxt->disableSAX))
5408 ctxt->sax->characters(ctxt->userData, out, i);
5409 }
5410 } else {
5411 ent = xmlParseEntityRef(ctxt);
5412 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005413 if (!ctxt->wellFormed)
5414 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005415 if ((ent->name != NULL) &&
5416 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5417 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005418 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005419
5420
5421 /*
5422 * The first reference to the entity trigger a parsing phase
5423 * where the ent->children is filled with the result from
5424 * the parsing.
5425 */
5426 if (ent->children == NULL) {
5427 xmlChar *value;
5428 value = ent->content;
5429
5430 /*
5431 * Check that this entity is well formed
5432 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005433 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005434 (value[1] == 0) && (value[0] == '<') &&
5435 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5436 /*
5437 * DONE: get definite answer on this !!!
5438 * Lots of entity decls are used to declare a single
5439 * char
5440 * <!ENTITY lt "<">
5441 * Which seems to be valid since
5442 * 2.4: The ampersand character (&) and the left angle
5443 * bracket (<) may appear in their literal form only
5444 * when used ... They are also legal within the literal
5445 * entity value of an internal entity declaration;i
5446 * see "4.3.2 Well-Formed Parsed Entities".
5447 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5448 * Looking at the OASIS test suite and James Clark
5449 * tests, this is broken. However the XML REC uses
5450 * it. Is the XML REC not well-formed ????
5451 * This is a hack to avoid this problem
5452 *
5453 * ANSWER: since lt gt amp .. are already defined,
5454 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005455 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005456 * is lousy but acceptable.
5457 */
5458 list = xmlNewDocText(ctxt->myDoc, value);
5459 if (list != NULL) {
5460 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5461 (ent->children == NULL)) {
5462 ent->children = list;
5463 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005464 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005465 list->parent = (xmlNodePtr) ent;
5466 } else {
5467 xmlFreeNodeList(list);
5468 }
5469 } else if (list != NULL) {
5470 xmlFreeNodeList(list);
5471 }
5472 } else {
5473 /*
5474 * 4.3.2: An internal general parsed entity is well-formed
5475 * if its replacement text matches the production labeled
5476 * content.
5477 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005478
5479 void *user_data;
5480 /*
5481 * This is a bit hackish but this seems the best
5482 * way to make sure both SAX and DOM entity support
5483 * behaves okay.
5484 */
5485 if (ctxt->userData == ctxt)
5486 user_data = NULL;
5487 else
5488 user_data = ctxt->userData;
5489
Owen Taylor3473f882001-02-23 17:55:21 +00005490 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5491 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005492 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5493 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005494 ctxt->depth--;
5495 } else if (ent->etype ==
5496 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5497 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005498 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005499 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005500 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 ctxt->depth--;
5502 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005503 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005504 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5505 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005506 }
5507 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005509 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005510 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005511 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5512 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005513 (ent->children == NULL)) {
5514 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005515 if (ctxt->replaceEntities) {
5516 /*
5517 * Prune it directly in the generated document
5518 * except for single text nodes.
5519 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005520 if (((list->type == XML_TEXT_NODE) &&
5521 (list->next == NULL)) ||
5522 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005523 list->parent = (xmlNodePtr) ent;
5524 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005525 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005526 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005527 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005528 while (list != NULL) {
5529 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005530 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005531 if (list->next == NULL)
5532 ent->last = list;
5533 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005534 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005535 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005536#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005537 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5538 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005539#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005540 }
5541 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005542 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005543 while (list != NULL) {
5544 list->parent = (xmlNodePtr) ent;
5545 if (list->next == NULL)
5546 ent->last = list;
5547 list = list->next;
5548 }
Owen Taylor3473f882001-02-23 17:55:21 +00005549 }
5550 } else {
5551 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005552 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005553 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005554 } else if ((ret != XML_ERR_OK) &&
5555 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005556 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005557 } else if (list != NULL) {
5558 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005559 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005560 }
5561 }
5562 }
5563 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5564 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5565 /*
5566 * Create a node.
5567 */
5568 ctxt->sax->reference(ctxt->userData, ent->name);
5569 return;
5570 } else if (ctxt->replaceEntities) {
5571 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5572 /*
5573 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005574 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005575 * In the first occurrence list contains the replacement.
5576 * progressive == 2 means we are operating on the Reader
5577 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005578 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005579 if (((list == NULL) && (ent->owner == 0)) ||
5580 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005581 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005582
5583 /*
5584 * when operating on a reader, the entities definitions
5585 * are always owning the entities subtree.
5586 if (ctxt->parseMode == XML_PARSE_READER)
5587 ent->owner = 1;
5588 */
5589
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 cur = ent->children;
5591 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005592 nw = xmlCopyNode(cur, 1);
5593 if (nw != NULL) {
5594 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005595 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005596 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005597 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005598 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005599 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005600 if (cur == ent->last) {
5601 /*
5602 * needed to detect some strange empty
5603 * node cases in the reader tests
5604 */
5605 if ((ctxt->parseMode == XML_PARSE_READER) &&
5606 (nw->type == XML_ELEMENT_NODE) &&
5607 (nw->children == NULL))
5608 nw->extra = 1;
5609
Daniel Veillard62f313b2001-07-04 19:49:14 +00005610 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005611 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005612 cur = cur->next;
5613 }
Daniel Veillard81273902003-09-30 00:43:48 +00005614#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005615 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005616 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005617#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005618 } else if (list == NULL) {
5619 xmlNodePtr nw = NULL, cur, next, last,
5620 firstChild = NULL;
5621 /*
5622 * Copy the entity child list and make it the new
5623 * entity child list. The goal is to make sure any
5624 * ID or REF referenced will be the one from the
5625 * document content and not the entity copy.
5626 */
5627 cur = ent->children;
5628 ent->children = NULL;
5629 last = ent->last;
5630 ent->last = NULL;
5631 while (cur != NULL) {
5632 next = cur->next;
5633 cur->next = NULL;
5634 cur->parent = NULL;
5635 nw = xmlCopyNode(cur, 1);
5636 if (nw != NULL) {
5637 nw->_private = cur->_private;
5638 if (firstChild == NULL){
5639 firstChild = cur;
5640 }
5641 xmlAddChild((xmlNodePtr) ent, nw);
5642 xmlAddChild(ctxt->node, cur);
5643 }
5644 if (cur == last)
5645 break;
5646 cur = next;
5647 }
5648 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005649#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005650 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5651 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005652#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005653 } else {
5654 /*
5655 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005656 * node with a possible previous text one which
5657 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005658 */
5659 if (ent->children->type == XML_TEXT_NODE)
5660 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5661 if ((ent->last != ent->children) &&
5662 (ent->last->type == XML_TEXT_NODE))
5663 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5664 xmlAddChildList(ctxt->node, ent->children);
5665 }
5666
Owen Taylor3473f882001-02-23 17:55:21 +00005667 /*
5668 * This is to avoid a nasty side effect, see
5669 * characters() in SAX.c
5670 */
5671 ctxt->nodemem = 0;
5672 ctxt->nodelen = 0;
5673 return;
5674 } else {
5675 /*
5676 * Probably running in SAX mode
5677 */
5678 xmlParserInputPtr input;
5679
5680 input = xmlNewEntityInputStream(ctxt, ent);
5681 xmlPushInput(ctxt, input);
5682 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005683 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5684 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005685 xmlParseTextDecl(ctxt);
5686 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5687 /*
5688 * The XML REC instructs us to stop parsing right here
5689 */
5690 ctxt->instate = XML_PARSER_EOF;
5691 return;
5692 }
5693 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005694 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5695 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005696 }
5697 }
5698 return;
5699 }
5700 }
5701 } else {
5702 val = ent->content;
5703 if (val == NULL) return;
5704 /*
5705 * inline the entity.
5706 */
5707 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5708 (!ctxt->disableSAX))
5709 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5710 }
5711 }
5712}
5713
5714/**
5715 * xmlParseEntityRef:
5716 * @ctxt: an XML parser context
5717 *
5718 * parse ENTITY references declarations
5719 *
5720 * [68] EntityRef ::= '&' Name ';'
5721 *
5722 * [ WFC: Entity Declared ]
5723 * In a document without any DTD, a document with only an internal DTD
5724 * subset which contains no parameter entity references, or a document
5725 * with "standalone='yes'", the Name given in the entity reference
5726 * must match that in an entity declaration, except that well-formed
5727 * documents need not declare any of the following entities: amp, lt,
5728 * gt, apos, quot. The declaration of a parameter entity must precede
5729 * any reference to it. Similarly, the declaration of a general entity
5730 * must precede any reference to it which appears in a default value in an
5731 * attribute-list declaration. Note that if entities are declared in the
5732 * external subset or in external parameter entities, a non-validating
5733 * processor is not obligated to read and process their declarations;
5734 * for such documents, the rule that an entity must be declared is a
5735 * well-formedness constraint only if standalone='yes'.
5736 *
5737 * [ WFC: Parsed Entity ]
5738 * An entity reference must not contain the name of an unparsed entity
5739 *
5740 * Returns the xmlEntityPtr if found, or NULL otherwise.
5741 */
5742xmlEntityPtr
5743xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005744 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 xmlEntityPtr ent = NULL;
5746
5747 GROW;
5748
5749 if (RAW == '&') {
5750 NEXT;
5751 name = xmlParseName(ctxt);
5752 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5754 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005755 } else {
5756 if (RAW == ';') {
5757 NEXT;
5758 /*
5759 * Ask first SAX for entity resolution, otherwise try the
5760 * predefined set.
5761 */
5762 if (ctxt->sax != NULL) {
5763 if (ctxt->sax->getEntity != NULL)
5764 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005765 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005766 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005767 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5768 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005769 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005770 }
Owen Taylor3473f882001-02-23 17:55:21 +00005771 }
5772 /*
5773 * [ WFC: Entity Declared ]
5774 * In a document without any DTD, a document with only an
5775 * internal DTD subset which contains no parameter entity
5776 * references, or a document with "standalone='yes'", the
5777 * Name given in the entity reference must match that in an
5778 * entity declaration, except that well-formed documents
5779 * need not declare any of the following entities: amp, lt,
5780 * gt, apos, quot.
5781 * The declaration of a parameter entity must precede any
5782 * reference to it.
5783 * Similarly, the declaration of a general entity must
5784 * precede any reference to it which appears in a default
5785 * value in an attribute-list declaration. Note that if
5786 * entities are declared in the external subset or in
5787 * external parameter entities, a non-validating processor
5788 * is not obligated to read and process their declarations;
5789 * for such documents, the rule that an entity must be
5790 * declared is a well-formedness constraint only if
5791 * standalone='yes'.
5792 */
5793 if (ent == NULL) {
5794 if ((ctxt->standalone == 1) ||
5795 ((ctxt->hasExternalSubset == 0) &&
5796 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005797 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005798 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005800 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005801 "Entity '%s' not defined\n", name);
5802 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005803 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005804 }
5805
5806 /*
5807 * [ WFC: Parsed Entity ]
5808 * An entity reference must not contain the name of an
5809 * unparsed entity
5810 */
5811 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005812 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005813 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005814 }
5815
5816 /*
5817 * [ WFC: No External Entity References ]
5818 * Attribute values cannot contain direct or indirect
5819 * entity references to external entities.
5820 */
5821 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5822 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005823 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5824 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005825 }
5826 /*
5827 * [ WFC: No < in Attribute Values ]
5828 * The replacement text of any entity referred to directly or
5829 * indirectly in an attribute value (other than "&lt;") must
5830 * not contain a <.
5831 */
5832 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5833 (ent != NULL) &&
5834 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5835 (ent->content != NULL) &&
5836 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005837 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005838 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 }
5840
5841 /*
5842 * Internal check, no parameter entities here ...
5843 */
5844 else {
5845 switch (ent->etype) {
5846 case XML_INTERNAL_PARAMETER_ENTITY:
5847 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005848 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5849 "Attempt to reference the parameter entity '%s'\n",
5850 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 break;
5852 default:
5853 break;
5854 }
5855 }
5856
5857 /*
5858 * [ WFC: No Recursion ]
5859 * A parsed entity must not contain a recursive reference
5860 * to itself, either directly or indirectly.
5861 * Done somewhere else
5862 */
5863
5864 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005865 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005866 }
Owen Taylor3473f882001-02-23 17:55:21 +00005867 }
5868 }
5869 return(ent);
5870}
5871
5872/**
5873 * xmlParseStringEntityRef:
5874 * @ctxt: an XML parser context
5875 * @str: a pointer to an index in the string
5876 *
5877 * parse ENTITY references declarations, but this version parses it from
5878 * a string value.
5879 *
5880 * [68] EntityRef ::= '&' Name ';'
5881 *
5882 * [ WFC: Entity Declared ]
5883 * In a document without any DTD, a document with only an internal DTD
5884 * subset which contains no parameter entity references, or a document
5885 * with "standalone='yes'", the Name given in the entity reference
5886 * must match that in an entity declaration, except that well-formed
5887 * documents need not declare any of the following entities: amp, lt,
5888 * gt, apos, quot. The declaration of a parameter entity must precede
5889 * any reference to it. Similarly, the declaration of a general entity
5890 * must precede any reference to it which appears in a default value in an
5891 * attribute-list declaration. Note that if entities are declared in the
5892 * external subset or in external parameter entities, a non-validating
5893 * processor is not obligated to read and process their declarations;
5894 * for such documents, the rule that an entity must be declared is a
5895 * well-formedness constraint only if standalone='yes'.
5896 *
5897 * [ WFC: Parsed Entity ]
5898 * An entity reference must not contain the name of an unparsed entity
5899 *
5900 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5901 * is updated to the current location in the string.
5902 */
5903xmlEntityPtr
5904xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5905 xmlChar *name;
5906 const xmlChar *ptr;
5907 xmlChar cur;
5908 xmlEntityPtr ent = NULL;
5909
5910 if ((str == NULL) || (*str == NULL))
5911 return(NULL);
5912 ptr = *str;
5913 cur = *ptr;
5914 if (cur == '&') {
5915 ptr++;
5916 cur = *ptr;
5917 name = xmlParseStringName(ctxt, &ptr);
5918 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005921 } else {
5922 if (*ptr == ';') {
5923 ptr++;
5924 /*
5925 * Ask first SAX for entity resolution, otherwise try the
5926 * predefined set.
5927 */
5928 if (ctxt->sax != NULL) {
5929 if (ctxt->sax->getEntity != NULL)
5930 ent = ctxt->sax->getEntity(ctxt->userData, name);
5931 if (ent == NULL)
5932 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005933 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005934 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005935 }
Owen Taylor3473f882001-02-23 17:55:21 +00005936 }
5937 /*
5938 * [ WFC: Entity Declared ]
5939 * In a document without any DTD, a document with only an
5940 * internal DTD subset which contains no parameter entity
5941 * references, or a document with "standalone='yes'", the
5942 * Name given in the entity reference must match that in an
5943 * entity declaration, except that well-formed documents
5944 * need not declare any of the following entities: amp, lt,
5945 * gt, apos, quot.
5946 * The declaration of a parameter entity must precede any
5947 * reference to it.
5948 * Similarly, the declaration of a general entity must
5949 * precede any reference to it which appears in a default
5950 * value in an attribute-list declaration. Note that if
5951 * entities are declared in the external subset or in
5952 * external parameter entities, a non-validating processor
5953 * is not obligated to read and process their declarations;
5954 * for such documents, the rule that an entity must be
5955 * declared is a well-formedness constraint only if
5956 * standalone='yes'.
5957 */
5958 if (ent == NULL) {
5959 if ((ctxt->standalone == 1) ||
5960 ((ctxt->hasExternalSubset == 0) &&
5961 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005962 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005963 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005964 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005965 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005966 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005967 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005968 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005969 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005970 }
5971
5972 /*
5973 * [ WFC: Parsed Entity ]
5974 * An entity reference must not contain the name of an
5975 * unparsed entity
5976 */
5977 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005978 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005979 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005980 }
5981
5982 /*
5983 * [ WFC: No External Entity References ]
5984 * Attribute values cannot contain direct or indirect
5985 * entity references to external entities.
5986 */
5987 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5988 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005989 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005990 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005991 }
5992 /*
5993 * [ WFC: No < in Attribute Values ]
5994 * The replacement text of any entity referred to directly or
5995 * indirectly in an attribute value (other than "&lt;") must
5996 * not contain a <.
5997 */
5998 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5999 (ent != NULL) &&
6000 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6001 (ent->content != NULL) &&
6002 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006003 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6004 "'<' in entity '%s' is not allowed in attributes values\n",
6005 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006006 }
6007
6008 /*
6009 * Internal check, no parameter entities here ...
6010 */
6011 else {
6012 switch (ent->etype) {
6013 case XML_INTERNAL_PARAMETER_ENTITY:
6014 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006015 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6016 "Attempt to reference the parameter entity '%s'\n",
6017 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006018 break;
6019 default:
6020 break;
6021 }
6022 }
6023
6024 /*
6025 * [ WFC: No Recursion ]
6026 * A parsed entity must not contain a recursive reference
6027 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006028 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006029 */
6030
6031 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006032 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006033 }
6034 xmlFree(name);
6035 }
6036 }
6037 *str = ptr;
6038 return(ent);
6039}
6040
6041/**
6042 * xmlParsePEReference:
6043 * @ctxt: an XML parser context
6044 *
6045 * parse PEReference declarations
6046 * The entity content is handled directly by pushing it's content as
6047 * a new input stream.
6048 *
6049 * [69] PEReference ::= '%' Name ';'
6050 *
6051 * [ WFC: No Recursion ]
6052 * A parsed entity must not contain a recursive
6053 * reference to itself, either directly or indirectly.
6054 *
6055 * [ WFC: Entity Declared ]
6056 * In a document without any DTD, a document with only an internal DTD
6057 * subset which contains no parameter entity references, or a document
6058 * with "standalone='yes'", ... ... The declaration of a parameter
6059 * entity must precede any reference to it...
6060 *
6061 * [ VC: Entity Declared ]
6062 * In a document with an external subset or external parameter entities
6063 * with "standalone='no'", ... ... The declaration of a parameter entity
6064 * must precede any reference to it...
6065 *
6066 * [ WFC: In DTD ]
6067 * Parameter-entity references may only appear in the DTD.
6068 * NOTE: misleading but this is handled.
6069 */
6070void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006071xmlParsePEReference(xmlParserCtxtPtr ctxt)
6072{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006073 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006074 xmlEntityPtr entity = NULL;
6075 xmlParserInputPtr input;
6076
6077 if (RAW == '%') {
6078 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006079 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006080 if (name == NULL) {
6081 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6082 "xmlParsePEReference: no name\n");
6083 } else {
6084 if (RAW == ';') {
6085 NEXT;
6086 if ((ctxt->sax != NULL) &&
6087 (ctxt->sax->getParameterEntity != NULL))
6088 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6089 name);
6090 if (entity == NULL) {
6091 /*
6092 * [ WFC: Entity Declared ]
6093 * In a document without any DTD, a document with only an
6094 * internal DTD subset which contains no parameter entity
6095 * references, or a document with "standalone='yes'", ...
6096 * ... The declaration of a parameter entity must precede
6097 * any reference to it...
6098 */
6099 if ((ctxt->standalone == 1) ||
6100 ((ctxt->hasExternalSubset == 0) &&
6101 (ctxt->hasPErefs == 0))) {
6102 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6103 "PEReference: %%%s; not found\n",
6104 name);
6105 } else {
6106 /*
6107 * [ VC: Entity Declared ]
6108 * In a document with an external subset or external
6109 * parameter entities with "standalone='no'", ...
6110 * ... The declaration of a parameter entity must
6111 * precede any reference to it...
6112 */
6113 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6114 "PEReference: %%%s; not found\n",
6115 name, NULL);
6116 ctxt->valid = 0;
6117 }
6118 } else {
6119 /*
6120 * Internal checking in case the entity quest barfed
6121 */
6122 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6123 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6124 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6125 "Internal: %%%s; is not a parameter entity\n",
6126 name, NULL);
6127 } else if (ctxt->input->free != deallocblankswrapper) {
6128 input =
6129 xmlNewBlanksWrapperInputStream(ctxt, entity);
6130 xmlPushInput(ctxt, input);
6131 } else {
6132 /*
6133 * TODO !!!
6134 * handle the extra spaces added before and after
6135 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6136 */
6137 input = xmlNewEntityInputStream(ctxt, entity);
6138 xmlPushInput(ctxt, input);
6139 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006140 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006141 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006142 xmlParseTextDecl(ctxt);
6143 if (ctxt->errNo ==
6144 XML_ERR_UNSUPPORTED_ENCODING) {
6145 /*
6146 * The XML REC instructs us to stop parsing
6147 * right here
6148 */
6149 ctxt->instate = XML_PARSER_EOF;
6150 return;
6151 }
6152 }
6153 }
6154 }
6155 ctxt->hasPErefs = 1;
6156 } else {
6157 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6158 }
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 }
6161}
6162
6163/**
6164 * xmlParseStringPEReference:
6165 * @ctxt: an XML parser context
6166 * @str: a pointer to an index in the string
6167 *
6168 * parse PEReference declarations
6169 *
6170 * [69] PEReference ::= '%' Name ';'
6171 *
6172 * [ WFC: No Recursion ]
6173 * A parsed entity must not contain a recursive
6174 * reference to itself, either directly or indirectly.
6175 *
6176 * [ WFC: Entity Declared ]
6177 * In a document without any DTD, a document with only an internal DTD
6178 * subset which contains no parameter entity references, or a document
6179 * with "standalone='yes'", ... ... The declaration of a parameter
6180 * entity must precede any reference to it...
6181 *
6182 * [ VC: Entity Declared ]
6183 * In a document with an external subset or external parameter entities
6184 * with "standalone='no'", ... ... The declaration of a parameter entity
6185 * must precede any reference to it...
6186 *
6187 * [ WFC: In DTD ]
6188 * Parameter-entity references may only appear in the DTD.
6189 * NOTE: misleading but this is handled.
6190 *
6191 * Returns the string of the entity content.
6192 * str is updated to the current value of the index
6193 */
6194xmlEntityPtr
6195xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6196 const xmlChar *ptr;
6197 xmlChar cur;
6198 xmlChar *name;
6199 xmlEntityPtr entity = NULL;
6200
6201 if ((str == NULL) || (*str == NULL)) return(NULL);
6202 ptr = *str;
6203 cur = *ptr;
6204 if (cur == '%') {
6205 ptr++;
6206 cur = *ptr;
6207 name = xmlParseStringName(ctxt, &ptr);
6208 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006209 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6210 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006211 } else {
6212 cur = *ptr;
6213 if (cur == ';') {
6214 ptr++;
6215 cur = *ptr;
6216 if ((ctxt->sax != NULL) &&
6217 (ctxt->sax->getParameterEntity != NULL))
6218 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6219 name);
6220 if (entity == NULL) {
6221 /*
6222 * [ WFC: Entity Declared ]
6223 * In a document without any DTD, a document with only an
6224 * internal DTD subset which contains no parameter entity
6225 * references, or a document with "standalone='yes'", ...
6226 * ... The declaration of a parameter entity must precede
6227 * any reference to it...
6228 */
6229 if ((ctxt->standalone == 1) ||
6230 ((ctxt->hasExternalSubset == 0) &&
6231 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006232 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006233 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006234 } else {
6235 /*
6236 * [ VC: Entity Declared ]
6237 * In a document with an external subset or external
6238 * parameter entities with "standalone='no'", ...
6239 * ... The declaration of a parameter entity must
6240 * precede any reference to it...
6241 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006242 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6243 "PEReference: %%%s; not found\n",
6244 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006245 ctxt->valid = 0;
6246 }
6247 } else {
6248 /*
6249 * Internal checking in case the entity quest barfed
6250 */
6251 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6252 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006253 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6254 "%%%s; is not a parameter entity\n",
6255 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006256 }
6257 }
6258 ctxt->hasPErefs = 1;
6259 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006260 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 }
6262 xmlFree(name);
6263 }
6264 }
6265 *str = ptr;
6266 return(entity);
6267}
6268
6269/**
6270 * xmlParseDocTypeDecl:
6271 * @ctxt: an XML parser context
6272 *
6273 * parse a DOCTYPE declaration
6274 *
6275 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6276 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6277 *
6278 * [ VC: Root Element Type ]
6279 * The Name in the document type declaration must match the element
6280 * type of the root element.
6281 */
6282
6283void
6284xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006285 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006286 xmlChar *ExternalID = NULL;
6287 xmlChar *URI = NULL;
6288
6289 /*
6290 * We know that '<!DOCTYPE' has been detected.
6291 */
6292 SKIP(9);
6293
6294 SKIP_BLANKS;
6295
6296 /*
6297 * Parse the DOCTYPE name.
6298 */
6299 name = xmlParseName(ctxt);
6300 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006301 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6302 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006303 }
6304 ctxt->intSubName = name;
6305
6306 SKIP_BLANKS;
6307
6308 /*
6309 * Check for SystemID and ExternalID
6310 */
6311 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6312
6313 if ((URI != NULL) || (ExternalID != NULL)) {
6314 ctxt->hasExternalSubset = 1;
6315 }
6316 ctxt->extSubURI = URI;
6317 ctxt->extSubSystem = ExternalID;
6318
6319 SKIP_BLANKS;
6320
6321 /*
6322 * Create and update the internal subset.
6323 */
6324 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6325 (!ctxt->disableSAX))
6326 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6327
6328 /*
6329 * Is there any internal subset declarations ?
6330 * they are handled separately in xmlParseInternalSubset()
6331 */
6332 if (RAW == '[')
6333 return;
6334
6335 /*
6336 * We should be at the end of the DOCTYPE declaration.
6337 */
6338 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006339 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006340 }
6341 NEXT;
6342}
6343
6344/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006345 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006346 * @ctxt: an XML parser context
6347 *
6348 * parse the internal subset declaration
6349 *
6350 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6351 */
6352
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006353static void
Owen Taylor3473f882001-02-23 17:55:21 +00006354xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6355 /*
6356 * Is there any DTD definition ?
6357 */
6358 if (RAW == '[') {
6359 ctxt->instate = XML_PARSER_DTD;
6360 NEXT;
6361 /*
6362 * Parse the succession of Markup declarations and
6363 * PEReferences.
6364 * Subsequence (markupdecl | PEReference | S)*
6365 */
6366 while (RAW != ']') {
6367 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006368 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006369
6370 SKIP_BLANKS;
6371 xmlParseMarkupDecl(ctxt);
6372 xmlParsePEReference(ctxt);
6373
6374 /*
6375 * Pop-up of finished entities.
6376 */
6377 while ((RAW == 0) && (ctxt->inputNr > 1))
6378 xmlPopInput(ctxt);
6379
6380 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006381 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006382 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006383 break;
6384 }
6385 }
6386 if (RAW == ']') {
6387 NEXT;
6388 SKIP_BLANKS;
6389 }
6390 }
6391
6392 /*
6393 * We should be at the end of the DOCTYPE declaration.
6394 */
6395 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006396 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006397 }
6398 NEXT;
6399}
6400
Daniel Veillard81273902003-09-30 00:43:48 +00006401#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006402/**
6403 * xmlParseAttribute:
6404 * @ctxt: an XML parser context
6405 * @value: a xmlChar ** used to store the value of the attribute
6406 *
6407 * parse an attribute
6408 *
6409 * [41] Attribute ::= Name Eq AttValue
6410 *
6411 * [ WFC: No External Entity References ]
6412 * Attribute values cannot contain direct or indirect entity references
6413 * to external entities.
6414 *
6415 * [ WFC: No < in Attribute Values ]
6416 * The replacement text of any entity referred to directly or indirectly in
6417 * an attribute value (other than "&lt;") must not contain a <.
6418 *
6419 * [ VC: Attribute Value Type ]
6420 * The attribute must have been declared; the value must be of the type
6421 * declared for it.
6422 *
6423 * [25] Eq ::= S? '=' S?
6424 *
6425 * With namespace:
6426 *
6427 * [NS 11] Attribute ::= QName Eq AttValue
6428 *
6429 * Also the case QName == xmlns:??? is handled independently as a namespace
6430 * definition.
6431 *
6432 * Returns the attribute name, and the value in *value.
6433 */
6434
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006435const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006436xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006437 const xmlChar *name;
6438 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006439
6440 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006441 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006442 name = xmlParseName(ctxt);
6443 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006445 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006446 return(NULL);
6447 }
6448
6449 /*
6450 * read the value
6451 */
6452 SKIP_BLANKS;
6453 if (RAW == '=') {
6454 NEXT;
6455 SKIP_BLANKS;
6456 val = xmlParseAttValue(ctxt);
6457 ctxt->instate = XML_PARSER_CONTENT;
6458 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006459 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006460 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 return(NULL);
6462 }
6463
6464 /*
6465 * Check that xml:lang conforms to the specification
6466 * No more registered as an error, just generate a warning now
6467 * since this was deprecated in XML second edition
6468 */
6469 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6470 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006471 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6472 "Malformed value for xml:lang : %s\n",
6473 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 }
6475 }
6476
6477 /*
6478 * Check that xml:space conforms to the specification
6479 */
6480 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6481 if (xmlStrEqual(val, BAD_CAST "default"))
6482 *(ctxt->space) = 0;
6483 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6484 *(ctxt->space) = 1;
6485 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006486 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006487"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006488 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006489 }
6490 }
6491
6492 *value = val;
6493 return(name);
6494}
6495
6496/**
6497 * xmlParseStartTag:
6498 * @ctxt: an XML parser context
6499 *
6500 * parse a start of tag either for rule element or
6501 * EmptyElement. In both case we don't parse the tag closing chars.
6502 *
6503 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6504 *
6505 * [ WFC: Unique Att Spec ]
6506 * No attribute name may appear more than once in the same start-tag or
6507 * empty-element tag.
6508 *
6509 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6510 *
6511 * [ WFC: Unique Att Spec ]
6512 * No attribute name may appear more than once in the same start-tag or
6513 * empty-element tag.
6514 *
6515 * With namespace:
6516 *
6517 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6518 *
6519 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6520 *
6521 * Returns the element name parsed
6522 */
6523
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006524const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006525xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006526 const xmlChar *name;
6527 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006529 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006530 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006531 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006532 int i;
6533
6534 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006535 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006536
6537 name = xmlParseName(ctxt);
6538 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006539 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006540 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006541 return(NULL);
6542 }
6543
6544 /*
6545 * Now parse the attributes, it ends up with the ending
6546 *
6547 * (S Attribute)* S?
6548 */
6549 SKIP_BLANKS;
6550 GROW;
6551
Daniel Veillard21a0f912001-02-25 19:54:14 +00006552 while ((RAW != '>') &&
6553 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006554 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006555 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006556 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006557
6558 attname = xmlParseAttribute(ctxt, &attvalue);
6559 if ((attname != NULL) && (attvalue != NULL)) {
6560 /*
6561 * [ WFC: Unique Att Spec ]
6562 * No attribute name may appear more than once in the same
6563 * start-tag or empty-element tag.
6564 */
6565 for (i = 0; i < nbatts;i += 2) {
6566 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006567 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 xmlFree(attvalue);
6569 goto failed;
6570 }
6571 }
Owen Taylor3473f882001-02-23 17:55:21 +00006572 /*
6573 * Add the pair to atts
6574 */
6575 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006576 maxatts = 22; /* allow for 10 attrs by default */
6577 atts = (const xmlChar **)
6578 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006579 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006580 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006581 if (attvalue != NULL)
6582 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006583 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006584 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006585 ctxt->atts = atts;
6586 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006588 const xmlChar **n;
6589
Owen Taylor3473f882001-02-23 17:55:21 +00006590 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006591 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006592 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006593 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006594 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006595 if (attvalue != NULL)
6596 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006597 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006598 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006599 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006600 ctxt->atts = atts;
6601 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 atts[nbatts++] = attname;
6604 atts[nbatts++] = attvalue;
6605 atts[nbatts] = NULL;
6606 atts[nbatts + 1] = NULL;
6607 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006608 if (attvalue != NULL)
6609 xmlFree(attvalue);
6610 }
6611
6612failed:
6613
Daniel Veillard3772de32002-12-17 10:31:45 +00006614 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006615 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6616 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006617 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006620 }
6621 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006622 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6623 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006624 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6625 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006626 break;
6627 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006628 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006629 GROW;
6630 }
6631
6632 /*
6633 * SAX: Start of Element !
6634 */
6635 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006636 (!ctxt->disableSAX)) {
6637 if (nbatts > 0)
6638 ctxt->sax->startElement(ctxt->userData, name, atts);
6639 else
6640 ctxt->sax->startElement(ctxt->userData, name, NULL);
6641 }
Owen Taylor3473f882001-02-23 17:55:21 +00006642
6643 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006644 /* Free only the content strings */
6645 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006646 if (atts[i] != NULL)
6647 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649 return(name);
6650}
6651
6652/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006653 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006654 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006655 * @line: line of the start tag
6656 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006657 *
6658 * parse an end of tag
6659 *
6660 * [42] ETag ::= '</' Name S? '>'
6661 *
6662 * With namespace
6663 *
6664 * [NS 9] ETag ::= '</' QName S? '>'
6665 */
6666
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006667static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006668xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006669 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006670
6671 GROW;
6672 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006673 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006674 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006675 return;
6676 }
6677 SKIP(2);
6678
Daniel Veillard46de64e2002-05-29 08:21:33 +00006679 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006680
6681 /*
6682 * We should definitely be at the ending "S? '>'" part
6683 */
6684 GROW;
6685 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006686 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006687 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006688 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006689 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006690
6691 /*
6692 * [ WFC: Element Type Match ]
6693 * The Name in an element's end-tag must match the element type in the
6694 * start-tag.
6695 *
6696 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006697 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006698 if (name == NULL) name = BAD_CAST "unparseable";
6699 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006700 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006701 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006702 }
6703
6704 /*
6705 * SAX: End of Tag
6706 */
6707 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6708 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006709 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006710
Daniel Veillarde57ec792003-09-10 10:50:59 +00006711 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006712 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006713 return;
6714}
6715
6716/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006717 * xmlParseEndTag:
6718 * @ctxt: an XML parser context
6719 *
6720 * parse an end of tag
6721 *
6722 * [42] ETag ::= '</' Name S? '>'
6723 *
6724 * With namespace
6725 *
6726 * [NS 9] ETag ::= '</' QName S? '>'
6727 */
6728
6729void
6730xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006731 xmlParseEndTag1(ctxt, 0);
6732}
Daniel Veillard81273902003-09-30 00:43:48 +00006733#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006734
6735/************************************************************************
6736 * *
6737 * SAX 2 specific operations *
6738 * *
6739 ************************************************************************/
6740
6741static const xmlChar *
6742xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6743 int len = 0, l;
6744 int c;
6745 int count = 0;
6746
6747 /*
6748 * Handler for more complex cases
6749 */
6750 GROW;
6751 c = CUR_CHAR(l);
6752 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006753 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006754 return(NULL);
6755 }
6756
6757 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006758 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006759 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006760 (IS_COMBINING(c)) ||
6761 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006762 if (count++ > 100) {
6763 count = 0;
6764 GROW;
6765 }
6766 len += l;
6767 NEXTL(l);
6768 c = CUR_CHAR(l);
6769 }
6770 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6771}
6772
6773/*
6774 * xmlGetNamespace:
6775 * @ctxt: an XML parser context
6776 * @prefix: the prefix to lookup
6777 *
6778 * Lookup the namespace name for the @prefix (which ca be NULL)
6779 * The prefix must come from the @ctxt->dict dictionnary
6780 *
6781 * Returns the namespace name or NULL if not bound
6782 */
6783static const xmlChar *
6784xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6785 int i;
6786
Daniel Veillarde57ec792003-09-10 10:50:59 +00006787 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006788 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006789 if (ctxt->nsTab[i] == prefix) {
6790 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6791 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006792 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006793 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006794 return(NULL);
6795}
6796
6797/**
6798 * xmlParseNCName:
6799 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006800 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006801 *
6802 * parse an XML name.
6803 *
6804 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6805 * CombiningChar | Extender
6806 *
6807 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6808 *
6809 * Returns the Name parsed or NULL
6810 */
6811
6812static const xmlChar *
6813xmlParseNCName(xmlParserCtxtPtr ctxt) {
6814 const xmlChar *in;
6815 const xmlChar *ret;
6816 int count = 0;
6817
6818 /*
6819 * Accelerator for simple ASCII names
6820 */
6821 in = ctxt->input->cur;
6822 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6823 ((*in >= 0x41) && (*in <= 0x5A)) ||
6824 (*in == '_')) {
6825 in++;
6826 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6827 ((*in >= 0x41) && (*in <= 0x5A)) ||
6828 ((*in >= 0x30) && (*in <= 0x39)) ||
6829 (*in == '_') || (*in == '-') ||
6830 (*in == '.'))
6831 in++;
6832 if ((*in > 0) && (*in < 0x80)) {
6833 count = in - ctxt->input->cur;
6834 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6835 ctxt->input->cur = in;
6836 ctxt->nbChars += count;
6837 ctxt->input->col += count;
6838 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006839 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006840 }
6841 return(ret);
6842 }
6843 }
6844 return(xmlParseNCNameComplex(ctxt));
6845}
6846
6847/**
6848 * xmlParseQName:
6849 * @ctxt: an XML parser context
6850 * @prefix: pointer to store the prefix part
6851 *
6852 * parse an XML Namespace QName
6853 *
6854 * [6] QName ::= (Prefix ':')? LocalPart
6855 * [7] Prefix ::= NCName
6856 * [8] LocalPart ::= NCName
6857 *
6858 * Returns the Name parsed or NULL
6859 */
6860
6861static const xmlChar *
6862xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6863 const xmlChar *l, *p;
6864
6865 GROW;
6866
6867 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006868 if (l == NULL) {
6869 if (CUR == ':') {
6870 l = xmlParseName(ctxt);
6871 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006872 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6873 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006874 *prefix = NULL;
6875 return(l);
6876 }
6877 }
6878 return(NULL);
6879 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006880 if (CUR == ':') {
6881 NEXT;
6882 p = l;
6883 l = xmlParseNCName(ctxt);
6884 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006885 xmlChar *tmp;
6886
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006887 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6888 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006889 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6890 p = xmlDictLookup(ctxt->dict, tmp, -1);
6891 if (tmp != NULL) xmlFree(tmp);
6892 *prefix = NULL;
6893 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006894 }
6895 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006896 xmlChar *tmp;
6897
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006898 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6899 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006900 NEXT;
6901 tmp = (xmlChar *) xmlParseName(ctxt);
6902 if (tmp != NULL) {
6903 tmp = xmlBuildQName(tmp, l, NULL, 0);
6904 l = xmlDictLookup(ctxt->dict, tmp, -1);
6905 if (tmp != NULL) xmlFree(tmp);
6906 *prefix = p;
6907 return(l);
6908 }
6909 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6910 l = xmlDictLookup(ctxt->dict, tmp, -1);
6911 if (tmp != NULL) xmlFree(tmp);
6912 *prefix = p;
6913 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006914 }
6915 *prefix = p;
6916 } else
6917 *prefix = NULL;
6918 return(l);
6919}
6920
6921/**
6922 * xmlParseQNameAndCompare:
6923 * @ctxt: an XML parser context
6924 * @name: the localname
6925 * @prefix: the prefix, if any.
6926 *
6927 * parse an XML name and compares for match
6928 * (specialized for endtag parsing)
6929 *
6930 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6931 * and the name for mismatch
6932 */
6933
6934static const xmlChar *
6935xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6936 xmlChar const *prefix) {
6937 const xmlChar *cmp = name;
6938 const xmlChar *in;
6939 const xmlChar *ret;
6940 const xmlChar *prefix2;
6941
6942 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6943
6944 GROW;
6945 in = ctxt->input->cur;
6946
6947 cmp = prefix;
6948 while (*in != 0 && *in == *cmp) {
6949 ++in;
6950 ++cmp;
6951 }
6952 if ((*cmp == 0) && (*in == ':')) {
6953 in++;
6954 cmp = name;
6955 while (*in != 0 && *in == *cmp) {
6956 ++in;
6957 ++cmp;
6958 }
William M. Brack76e95df2003-10-18 16:20:14 +00006959 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006960 /* success */
6961 ctxt->input->cur = in;
6962 return((const xmlChar*) 1);
6963 }
6964 }
6965 /*
6966 * all strings coms from the dictionary, equality can be done directly
6967 */
6968 ret = xmlParseQName (ctxt, &prefix2);
6969 if ((ret == name) && (prefix == prefix2))
6970 return((const xmlChar*) 1);
6971 return ret;
6972}
6973
6974/**
6975 * xmlParseAttValueInternal:
6976 * @ctxt: an XML parser context
6977 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006978 * @alloc: whether the attribute was reallocated as a new string
6979 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006980 *
6981 * parse a value for an attribute.
6982 * NOTE: if no normalization is needed, the routine will return pointers
6983 * directly from the data buffer.
6984 *
6985 * 3.3.3 Attribute-Value Normalization:
6986 * Before the value of an attribute is passed to the application or
6987 * checked for validity, the XML processor must normalize it as follows:
6988 * - a character reference is processed by appending the referenced
6989 * character to the attribute value
6990 * - an entity reference is processed by recursively processing the
6991 * replacement text of the entity
6992 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6993 * appending #x20 to the normalized value, except that only a single
6994 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6995 * parsed entity or the literal entity value of an internal parsed entity
6996 * - other characters are processed by appending them to the normalized value
6997 * If the declared value is not CDATA, then the XML processor must further
6998 * process the normalized attribute value by discarding any leading and
6999 * trailing space (#x20) characters, and by replacing sequences of space
7000 * (#x20) characters by a single space (#x20) character.
7001 * All attributes for which no declaration has been read should be treated
7002 * by a non-validating parser as if declared CDATA.
7003 *
7004 * Returns the AttValue parsed or NULL. The value has to be freed by the
7005 * caller if it was copied, this can be detected by val[*len] == 0.
7006 */
7007
7008static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007009xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7010 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007011{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007012 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007013 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007014 xmlChar *ret = NULL;
7015
7016 GROW;
7017 in = (xmlChar *) CUR_PTR;
7018 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007019 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007020 return (NULL);
7021 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007022 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007023
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007024 /*
7025 * try to handle in this routine the most common case where no
7026 * allocation of a new string is required and where content is
7027 * pure ASCII.
7028 */
7029 limit = *in++;
7030 end = ctxt->input->end;
7031 start = in;
7032 if (in >= end) {
7033 const xmlChar *oldbase = ctxt->input->base;
7034 GROW;
7035 if (oldbase != ctxt->input->base) {
7036 long delta = ctxt->input->base - oldbase;
7037 start = start + delta;
7038 in = in + delta;
7039 }
7040 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007041 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007042 if (normalize) {
7043 /*
7044 * Skip any leading spaces
7045 */
7046 while ((in < end) && (*in != limit) &&
7047 ((*in == 0x20) || (*in == 0x9) ||
7048 (*in == 0xA) || (*in == 0xD))) {
7049 in++;
7050 start = in;
7051 if (in >= end) {
7052 const xmlChar *oldbase = ctxt->input->base;
7053 GROW;
7054 if (oldbase != ctxt->input->base) {
7055 long delta = ctxt->input->base - oldbase;
7056 start = start + delta;
7057 in = in + delta;
7058 }
7059 end = ctxt->input->end;
7060 }
7061 }
7062 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7063 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7064 if ((*in++ == 0x20) && (*in == 0x20)) break;
7065 if (in >= end) {
7066 const xmlChar *oldbase = ctxt->input->base;
7067 GROW;
7068 if (oldbase != ctxt->input->base) {
7069 long delta = ctxt->input->base - oldbase;
7070 start = start + delta;
7071 in = in + delta;
7072 }
7073 end = ctxt->input->end;
7074 }
7075 }
7076 last = in;
7077 /*
7078 * skip the trailing blanks
7079 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007080 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007081 while ((in < end) && (*in != limit) &&
7082 ((*in == 0x20) || (*in == 0x9) ||
7083 (*in == 0xA) || (*in == 0xD))) {
7084 in++;
7085 if (in >= end) {
7086 const xmlChar *oldbase = ctxt->input->base;
7087 GROW;
7088 if (oldbase != ctxt->input->base) {
7089 long delta = ctxt->input->base - oldbase;
7090 start = start + delta;
7091 in = in + delta;
7092 last = last + delta;
7093 }
7094 end = ctxt->input->end;
7095 }
7096 }
7097 if (*in != limit) goto need_complex;
7098 } else {
7099 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7100 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7101 in++;
7102 if (in >= end) {
7103 const xmlChar *oldbase = ctxt->input->base;
7104 GROW;
7105 if (oldbase != ctxt->input->base) {
7106 long delta = ctxt->input->base - oldbase;
7107 start = start + delta;
7108 in = in + delta;
7109 }
7110 end = ctxt->input->end;
7111 }
7112 }
7113 last = in;
7114 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007115 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007116 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007118 *len = last - start;
7119 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007121 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007122 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007123 }
7124 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007125 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007126 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007127need_complex:
7128 if (alloc) *alloc = 1;
7129 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007130}
7131
7132/**
7133 * xmlParseAttribute2:
7134 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007135 * @pref: the element prefix
7136 * @elem: the element name
7137 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007138 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007139 * @len: an int * to save the length of the attribute
7140 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007141 *
7142 * parse an attribute in the new SAX2 framework.
7143 *
7144 * Returns the attribute name, and the value in *value, .
7145 */
7146
7147static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007148xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7149 const xmlChar *pref, const xmlChar *elem,
7150 const xmlChar **prefix, xmlChar **value,
7151 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007152 const xmlChar *name;
7153 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007154 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007155
7156 *value = NULL;
7157 GROW;
7158 name = xmlParseQName(ctxt, prefix);
7159 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007160 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7161 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007162 return(NULL);
7163 }
7164
7165 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007166 * get the type if needed
7167 */
7168 if (ctxt->attsSpecial != NULL) {
7169 int type;
7170
7171 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7172 pref, elem, *prefix, name);
7173 if (type != 0) normalize = 1;
7174 }
7175
7176 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007177 * read the value
7178 */
7179 SKIP_BLANKS;
7180 if (RAW == '=') {
7181 NEXT;
7182 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007183 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184 ctxt->instate = XML_PARSER_CONTENT;
7185 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007186 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007187 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007188 return(NULL);
7189 }
7190
7191 /*
7192 * Check that xml:lang conforms to the specification
7193 * No more registered as an error, just generate a warning now
7194 * since this was deprecated in XML second edition
7195 */
7196 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7197 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007198 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7199 "Malformed value for xml:lang : %s\n",
7200 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007201 }
7202 }
7203
7204 /*
7205 * Check that xml:space conforms to the specification
7206 */
7207 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7208 if (xmlStrEqual(val, BAD_CAST "default"))
7209 *(ctxt->space) = 0;
7210 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7211 *(ctxt->space) = 1;
7212 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007213 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007214"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7215 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007216 }
7217 }
7218
7219 *value = val;
7220 return(name);
7221}
7222
7223/**
7224 * xmlParseStartTag2:
7225 * @ctxt: an XML parser context
7226 *
7227 * parse a start of tag either for rule element or
7228 * EmptyElement. In both case we don't parse the tag closing chars.
7229 * This routine is called when running SAX2 parsing
7230 *
7231 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7232 *
7233 * [ WFC: Unique Att Spec ]
7234 * No attribute name may appear more than once in the same start-tag or
7235 * empty-element tag.
7236 *
7237 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7238 *
7239 * [ WFC: Unique Att Spec ]
7240 * No attribute name may appear more than once in the same start-tag or
7241 * empty-element tag.
7242 *
7243 * With namespace:
7244 *
7245 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7246 *
7247 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7248 *
7249 * Returns the element name parsed
7250 */
7251
7252static const xmlChar *
7253xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007254 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007255 const xmlChar *localname;
7256 const xmlChar *prefix;
7257 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007258 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007259 const xmlChar *nsname;
7260 xmlChar *attvalue;
7261 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007262 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007263 int nratts, nbatts, nbdef;
7264 int i, j, nbNs, attval;
7265 const xmlChar *base;
7266 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007267
7268 if (RAW != '<') return(NULL);
7269 NEXT1;
7270
7271 /*
7272 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7273 * point since the attribute values may be stored as pointers to
7274 * the buffer and calling SHRINK would destroy them !
7275 * The Shrinking is only possible once the full set of attribute
7276 * callbacks have been done.
7277 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007278reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007279 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007280 base = ctxt->input->base;
7281 cur = ctxt->input->cur - ctxt->input->base;
7282 nbatts = 0;
7283 nratts = 0;
7284 nbdef = 0;
7285 nbNs = 0;
7286 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007287
7288 localname = xmlParseQName(ctxt, &prefix);
7289 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7291 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007292 return(NULL);
7293 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007294 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007295
7296 /*
7297 * Now parse the attributes, it ends up with the ending
7298 *
7299 * (S Attribute)* S?
7300 */
7301 SKIP_BLANKS;
7302 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007303 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304
7305 while ((RAW != '>') &&
7306 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007307 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 const xmlChar *q = CUR_PTR;
7309 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007310 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007312 attname = xmlParseAttribute2(ctxt, prefix, localname,
7313 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007314 if ((attname != NULL) && (attvalue != NULL)) {
7315 if (len < 0) len = xmlStrlen(attvalue);
7316 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007317 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7318 xmlURIPtr uri;
7319
7320 if (*URL != 0) {
7321 uri = xmlParseURI((const char *) URL);
7322 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007323 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7324 "xmlns: %s not a valid URI\n",
7325 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007326 } else {
7327 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007328 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7329 "xmlns: URI %s is not absolute\n",
7330 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007331 }
7332 xmlFreeURI(uri);
7333 }
7334 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007335 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007336 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007337 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007338 for (j = 1;j <= nbNs;j++)
7339 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7340 break;
7341 if (j <= nbNs)
7342 xmlErrAttributeDup(ctxt, NULL, attname);
7343 else
7344 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007345 if (alloc != 0) xmlFree(attvalue);
7346 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007347 continue;
7348 }
7349 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007350 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7351 xmlURIPtr uri;
7352
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007353 if (attname == ctxt->str_xml) {
7354 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007355 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7356 "xml namespace prefix mapped to wrong URI\n",
7357 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007358 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007359 /*
7360 * Do not keep a namespace definition node
7361 */
7362 if (alloc != 0) xmlFree(attvalue);
7363 SKIP_BLANKS;
7364 continue;
7365 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007366 uri = xmlParseURI((const char *) URL);
7367 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007368 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7369 "xmlns:%s: '%s' is not a valid URI\n",
7370 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007371 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007372 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007373 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7374 "xmlns:%s: URI %s is not absolute\n",
7375 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007376 }
7377 xmlFreeURI(uri);
7378 }
7379
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007381 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007382 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007383 for (j = 1;j <= nbNs;j++)
7384 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7385 break;
7386 if (j <= nbNs)
7387 xmlErrAttributeDup(ctxt, aprefix, attname);
7388 else
7389 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 if (alloc != 0) xmlFree(attvalue);
7391 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007392 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007393 continue;
7394 }
7395
7396 /*
7397 * Add the pair to atts
7398 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007399 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7400 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007401 if (attvalue[len] == 0)
7402 xmlFree(attvalue);
7403 goto failed;
7404 }
7405 maxatts = ctxt->maxatts;
7406 atts = ctxt->atts;
7407 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007408 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007409 atts[nbatts++] = attname;
7410 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007411 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007412 atts[nbatts++] = attvalue;
7413 attvalue += len;
7414 atts[nbatts++] = attvalue;
7415 /*
7416 * tag if some deallocation is needed
7417 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007418 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007419 } else {
7420 if ((attvalue != NULL) && (attvalue[len] == 0))
7421 xmlFree(attvalue);
7422 }
7423
7424failed:
7425
7426 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007427 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007428 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7429 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007430 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007431 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7432 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007433 }
7434 SKIP_BLANKS;
7435 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7436 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007437 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007438 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 break;
7440 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007441 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007442 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007443 }
7444
Daniel Veillard0fb18932003-09-07 09:14:37 +00007445 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007446 * The attributes defaulting
7447 */
7448 if (ctxt->attsDefault != NULL) {
7449 xmlDefAttrsPtr defaults;
7450
7451 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7452 if (defaults != NULL) {
7453 for (i = 0;i < defaults->nbAttrs;i++) {
7454 attname = defaults->values[4 * i];
7455 aprefix = defaults->values[4 * i + 1];
7456
7457 /*
7458 * special work for namespaces defaulted defs
7459 */
7460 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7461 /*
7462 * check that it's not a defined namespace
7463 */
7464 for (j = 1;j <= nbNs;j++)
7465 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7466 break;
7467 if (j <= nbNs) continue;
7468
7469 nsname = xmlGetNamespace(ctxt, NULL);
7470 if (nsname != defaults->values[4 * i + 2]) {
7471 if (nsPush(ctxt, NULL,
7472 defaults->values[4 * i + 2]) > 0)
7473 nbNs++;
7474 }
7475 } else if (aprefix == ctxt->str_xmlns) {
7476 /*
7477 * check that it's not a defined namespace
7478 */
7479 for (j = 1;j <= nbNs;j++)
7480 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7481 break;
7482 if (j <= nbNs) continue;
7483
7484 nsname = xmlGetNamespace(ctxt, attname);
7485 if (nsname != defaults->values[2]) {
7486 if (nsPush(ctxt, attname,
7487 defaults->values[4 * i + 2]) > 0)
7488 nbNs++;
7489 }
7490 } else {
7491 /*
7492 * check that it's not a defined attribute
7493 */
7494 for (j = 0;j < nbatts;j+=5) {
7495 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7496 break;
7497 }
7498 if (j < nbatts) continue;
7499
7500 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7501 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007502 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007503 }
7504 maxatts = ctxt->maxatts;
7505 atts = ctxt->atts;
7506 }
7507 atts[nbatts++] = attname;
7508 atts[nbatts++] = aprefix;
7509 if (aprefix == NULL)
7510 atts[nbatts++] = NULL;
7511 else
7512 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7513 atts[nbatts++] = defaults->values[4 * i + 2];
7514 atts[nbatts++] = defaults->values[4 * i + 3];
7515 nbdef++;
7516 }
7517 }
7518 }
7519 }
7520
Daniel Veillarde70c8772003-11-25 07:21:18 +00007521 /*
7522 * The attributes checkings
7523 */
7524 for (i = 0; i < nbatts;i += 5) {
7525 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7526 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7527 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7528 "Namespace prefix %s for %s on %s is not defined\n",
7529 atts[i + 1], atts[i], localname);
7530 }
7531 atts[i + 2] = nsname;
7532 /*
7533 * [ WFC: Unique Att Spec ]
7534 * No attribute name may appear more than once in the same
7535 * start-tag or empty-element tag.
7536 * As extended by the Namespace in XML REC.
7537 */
7538 for (j = 0; j < i;j += 5) {
7539 if (atts[i] == atts[j]) {
7540 if (atts[i+1] == atts[j+1]) {
7541 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7542 break;
7543 }
7544 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7545 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7546 "Namespaced Attribute %s in '%s' redefined\n",
7547 atts[i], nsname, NULL);
7548 break;
7549 }
7550 }
7551 }
7552 }
7553
Daniel Veillarde57ec792003-09-10 10:50:59 +00007554 nsname = xmlGetNamespace(ctxt, prefix);
7555 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007556 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7557 "Namespace prefix %s on %s is not defined\n",
7558 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007559 }
7560 *pref = prefix;
7561 *URI = nsname;
7562
7563 /*
7564 * SAX: Start of Element !
7565 */
7566 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7567 (!ctxt->disableSAX)) {
7568 if (nbNs > 0)
7569 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7570 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7571 nbatts / 5, nbdef, atts);
7572 else
7573 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7574 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7575 }
7576
7577 /*
7578 * Free up attribute allocated strings if needed
7579 */
7580 if (attval != 0) {
7581 for (i = 3,j = 0; j < nratts;i += 5,j++)
7582 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7583 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007584 }
7585
7586 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007587
7588base_changed:
7589 /*
7590 * the attribute strings are valid iif the base didn't changed
7591 */
7592 if (attval != 0) {
7593 for (i = 3,j = 0; j < nratts;i += 5,j++)
7594 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7595 xmlFree((xmlChar *) atts[i]);
7596 }
7597 ctxt->input->cur = ctxt->input->base + cur;
7598 if (ctxt->wellFormed == 1) {
7599 goto reparse;
7600 }
7601 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007602}
7603
7604/**
7605 * xmlParseEndTag2:
7606 * @ctxt: an XML parser context
7607 * @line: line of the start tag
7608 * @nsNr: number of namespaces on the start tag
7609 *
7610 * parse an end of tag
7611 *
7612 * [42] ETag ::= '</' Name S? '>'
7613 *
7614 * With namespace
7615 *
7616 * [NS 9] ETag ::= '</' QName S? '>'
7617 */
7618
7619static void
7620xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007621 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007622 const xmlChar *name;
7623
7624 GROW;
7625 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007626 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 return;
7628 }
7629 SKIP(2);
7630
Daniel Veillard453e71b2004-04-20 17:44:46 +00007631 if ((tlen > 0) && (strncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007632 if (ctxt->input->cur[tlen] == '>') {
7633 ctxt->input->cur += tlen + 1;
7634 goto done;
7635 }
7636 ctxt->input->cur += tlen;
7637 name = (xmlChar*)1;
7638 } else {
7639 if (prefix == NULL)
7640 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7641 else
7642 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7643 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644
7645 /*
7646 * We should definitely be at the ending "S? '>'" part
7647 */
7648 GROW;
7649 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007650 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007651 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007652 } else
7653 NEXT1;
7654
7655 /*
7656 * [ WFC: Element Type Match ]
7657 * The Name in an element's end-tag must match the element type in the
7658 * start-tag.
7659 *
7660 */
7661 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007662 if (name == NULL) name = BAD_CAST "unparseable";
7663 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007665 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007666 }
7667
7668 /*
7669 * SAX: End of Tag
7670 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007671done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7673 (!ctxt->disableSAX))
7674 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7675
Daniel Veillard0fb18932003-09-07 09:14:37 +00007676 spacePop(ctxt);
7677 if (nsNr != 0)
7678 nsPop(ctxt, nsNr);
7679 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007680}
7681
7682/**
Owen Taylor3473f882001-02-23 17:55:21 +00007683 * xmlParseCDSect:
7684 * @ctxt: an XML parser context
7685 *
7686 * Parse escaped pure raw content.
7687 *
7688 * [18] CDSect ::= CDStart CData CDEnd
7689 *
7690 * [19] CDStart ::= '<![CDATA['
7691 *
7692 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7693 *
7694 * [21] CDEnd ::= ']]>'
7695 */
7696void
7697xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7698 xmlChar *buf = NULL;
7699 int len = 0;
7700 int size = XML_PARSER_BUFFER_SIZE;
7701 int r, rl;
7702 int s, sl;
7703 int cur, l;
7704 int count = 0;
7705
Daniel Veillard8f597c32003-10-06 08:19:27 +00007706 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007707 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007708 SKIP(9);
7709 } else
7710 return;
7711
7712 ctxt->instate = XML_PARSER_CDATA_SECTION;
7713 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007714 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007715 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007716 ctxt->instate = XML_PARSER_CONTENT;
7717 return;
7718 }
7719 NEXTL(rl);
7720 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007721 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007722 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007723 ctxt->instate = XML_PARSER_CONTENT;
7724 return;
7725 }
7726 NEXTL(sl);
7727 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007728 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007729 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007730 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007731 return;
7732 }
William M. Brack871611b2003-10-18 04:53:14 +00007733 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007734 ((r != ']') || (s != ']') || (cur != '>'))) {
7735 if (len + 5 >= size) {
7736 size *= 2;
7737 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7738 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007739 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007740 return;
7741 }
7742 }
7743 COPY_BUF(rl,buf,len,r);
7744 r = s;
7745 rl = sl;
7746 s = cur;
7747 sl = l;
7748 count++;
7749 if (count > 50) {
7750 GROW;
7751 count = 0;
7752 }
7753 NEXTL(l);
7754 cur = CUR_CHAR(l);
7755 }
7756 buf[len] = 0;
7757 ctxt->instate = XML_PARSER_CONTENT;
7758 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007759 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007760 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007761 xmlFree(buf);
7762 return;
7763 }
7764 NEXTL(l);
7765
7766 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007767 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007768 */
7769 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7770 if (ctxt->sax->cdataBlock != NULL)
7771 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007772 else if (ctxt->sax->characters != NULL)
7773 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007774 }
7775 xmlFree(buf);
7776}
7777
7778/**
7779 * xmlParseContent:
7780 * @ctxt: an XML parser context
7781 *
7782 * Parse a content:
7783 *
7784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7785 */
7786
7787void
7788xmlParseContent(xmlParserCtxtPtr ctxt) {
7789 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007790 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007791 ((RAW != '<') || (NXT(1) != '/'))) {
7792 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007793 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007794 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007795
7796 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007797 * First case : a Processing Instruction.
7798 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007799 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007800 xmlParsePI(ctxt);
7801 }
7802
7803 /*
7804 * Second case : a CDSection
7805 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007806 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007807 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007808 xmlParseCDSect(ctxt);
7809 }
7810
7811 /*
7812 * Third case : a comment
7813 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007814 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007815 (NXT(2) == '-') && (NXT(3) == '-')) {
7816 xmlParseComment(ctxt);
7817 ctxt->instate = XML_PARSER_CONTENT;
7818 }
7819
7820 /*
7821 * Fourth case : a sub-element.
7822 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007823 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007824 xmlParseElement(ctxt);
7825 }
7826
7827 /*
7828 * Fifth case : a reference. If if has not been resolved,
7829 * parsing returns it's Name, create the node
7830 */
7831
Daniel Veillard21a0f912001-02-25 19:54:14 +00007832 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007833 xmlParseReference(ctxt);
7834 }
7835
7836 /*
7837 * Last case, text. Note that References are handled directly.
7838 */
7839 else {
7840 xmlParseCharData(ctxt, 0);
7841 }
7842
7843 GROW;
7844 /*
7845 * Pop-up of finished entities.
7846 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007847 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007848 xmlPopInput(ctxt);
7849 SHRINK;
7850
Daniel Veillardfdc91562002-07-01 21:52:03 +00007851 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007852 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7853 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007854 ctxt->instate = XML_PARSER_EOF;
7855 break;
7856 }
7857 }
7858}
7859
7860/**
7861 * xmlParseElement:
7862 * @ctxt: an XML parser context
7863 *
7864 * parse an XML element, this is highly recursive
7865 *
7866 * [39] element ::= EmptyElemTag | STag content ETag
7867 *
7868 * [ WFC: Element Type Match ]
7869 * The Name in an element's end-tag must match the element type in the
7870 * start-tag.
7871 *
Owen Taylor3473f882001-02-23 17:55:21 +00007872 */
7873
7874void
7875xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007876 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007877 const xmlChar *prefix;
7878 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007879 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007880 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007881 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007882 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007883
7884 /* Capture start position */
7885 if (ctxt->record_info) {
7886 node_info.begin_pos = ctxt->input->consumed +
7887 (CUR_PTR - ctxt->input->base);
7888 node_info.begin_line = ctxt->input->line;
7889 }
7890
7891 if (ctxt->spaceNr == 0)
7892 spacePush(ctxt, -1);
7893 else
7894 spacePush(ctxt, *ctxt->space);
7895
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007896 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007897#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007898 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007899#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007900 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007901#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 else
7903 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007904#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007905 if (name == NULL) {
7906 spacePop(ctxt);
7907 return;
7908 }
7909 namePush(ctxt, name);
7910 ret = ctxt->node;
7911
Daniel Veillard4432df22003-09-28 18:58:27 +00007912#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007913 /*
7914 * [ VC: Root Element Type ]
7915 * The Name in the document type declaration must match the element
7916 * type of the root element.
7917 */
7918 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7919 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7920 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007921#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007922
7923 /*
7924 * Check for an Empty Element.
7925 */
7926 if ((RAW == '/') && (NXT(1) == '>')) {
7927 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007928 if (ctxt->sax2) {
7929 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7930 (!ctxt->disableSAX))
7931 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007932#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 } else {
7934 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7935 (!ctxt->disableSAX))
7936 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007937#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007938 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007939 namePop(ctxt);
7940 spacePop(ctxt);
7941 if (nsNr != ctxt->nsNr)
7942 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007943 if ( ret != NULL && ctxt->record_info ) {
7944 node_info.end_pos = ctxt->input->consumed +
7945 (CUR_PTR - ctxt->input->base);
7946 node_info.end_line = ctxt->input->line;
7947 node_info.node = ret;
7948 xmlParserAddNodeInfo(ctxt, &node_info);
7949 }
7950 return;
7951 }
7952 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007953 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007954 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007955 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7956 "Couldn't find end of Start Tag %s line %d\n",
7957 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007958
7959 /*
7960 * end of parsing of this node.
7961 */
7962 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007963 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007964 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007965 if (nsNr != ctxt->nsNr)
7966 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007967
7968 /*
7969 * Capture end position and add node
7970 */
7971 if ( ret != NULL && ctxt->record_info ) {
7972 node_info.end_pos = ctxt->input->consumed +
7973 (CUR_PTR - ctxt->input->base);
7974 node_info.end_line = ctxt->input->line;
7975 node_info.node = ret;
7976 xmlParserAddNodeInfo(ctxt, &node_info);
7977 }
7978 return;
7979 }
7980
7981 /*
7982 * Parse the content of the element:
7983 */
7984 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007985 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007986 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007987 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007988 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007989
7990 /*
7991 * end of parsing of this node.
7992 */
7993 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007994 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007995 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 if (nsNr != ctxt->nsNr)
7997 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007998 return;
7999 }
8000
8001 /*
8002 * parse the end of tag: '</' should be here.
8003 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008004 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008005 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008006 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008007 }
8008#ifdef LIBXML_SAX1_ENABLED
8009 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008010 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008011#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008012
8013 /*
8014 * Capture end position and add node
8015 */
8016 if ( ret != NULL && ctxt->record_info ) {
8017 node_info.end_pos = ctxt->input->consumed +
8018 (CUR_PTR - ctxt->input->base);
8019 node_info.end_line = ctxt->input->line;
8020 node_info.node = ret;
8021 xmlParserAddNodeInfo(ctxt, &node_info);
8022 }
8023}
8024
8025/**
8026 * xmlParseVersionNum:
8027 * @ctxt: an XML parser context
8028 *
8029 * parse the XML version value.
8030 *
8031 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8032 *
8033 * Returns the string giving the XML version number, or NULL
8034 */
8035xmlChar *
8036xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8037 xmlChar *buf = NULL;
8038 int len = 0;
8039 int size = 10;
8040 xmlChar cur;
8041
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008042 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008043 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008044 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008045 return(NULL);
8046 }
8047 cur = CUR;
8048 while (((cur >= 'a') && (cur <= 'z')) ||
8049 ((cur >= 'A') && (cur <= 'Z')) ||
8050 ((cur >= '0') && (cur <= '9')) ||
8051 (cur == '_') || (cur == '.') ||
8052 (cur == ':') || (cur == '-')) {
8053 if (len + 1 >= size) {
8054 size *= 2;
8055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8056 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008057 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008058 return(NULL);
8059 }
8060 }
8061 buf[len++] = cur;
8062 NEXT;
8063 cur=CUR;
8064 }
8065 buf[len] = 0;
8066 return(buf);
8067}
8068
8069/**
8070 * xmlParseVersionInfo:
8071 * @ctxt: an XML parser context
8072 *
8073 * parse the XML version.
8074 *
8075 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8076 *
8077 * [25] Eq ::= S? '=' S?
8078 *
8079 * Returns the version string, e.g. "1.0"
8080 */
8081
8082xmlChar *
8083xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8084 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008085
Daniel Veillarda07050d2003-10-19 14:46:32 +00008086 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008087 SKIP(7);
8088 SKIP_BLANKS;
8089 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008090 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008091 return(NULL);
8092 }
8093 NEXT;
8094 SKIP_BLANKS;
8095 if (RAW == '"') {
8096 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008097 version = xmlParseVersionNum(ctxt);
8098 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008099 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008100 } else
8101 NEXT;
8102 } else if (RAW == '\''){
8103 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008104 version = xmlParseVersionNum(ctxt);
8105 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008106 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008107 } else
8108 NEXT;
8109 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008110 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008111 }
8112 }
8113 return(version);
8114}
8115
8116/**
8117 * xmlParseEncName:
8118 * @ctxt: an XML parser context
8119 *
8120 * parse the XML encoding name
8121 *
8122 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8123 *
8124 * Returns the encoding name value or NULL
8125 */
8126xmlChar *
8127xmlParseEncName(xmlParserCtxtPtr ctxt) {
8128 xmlChar *buf = NULL;
8129 int len = 0;
8130 int size = 10;
8131 xmlChar cur;
8132
8133 cur = CUR;
8134 if (((cur >= 'a') && (cur <= 'z')) ||
8135 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008137 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008138 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008139 return(NULL);
8140 }
8141
8142 buf[len++] = cur;
8143 NEXT;
8144 cur = CUR;
8145 while (((cur >= 'a') && (cur <= 'z')) ||
8146 ((cur >= 'A') && (cur <= 'Z')) ||
8147 ((cur >= '0') && (cur <= '9')) ||
8148 (cur == '.') || (cur == '_') ||
8149 (cur == '-')) {
8150 if (len + 1 >= size) {
8151 size *= 2;
8152 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8153 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008154 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008155 return(NULL);
8156 }
8157 }
8158 buf[len++] = cur;
8159 NEXT;
8160 cur = CUR;
8161 if (cur == 0) {
8162 SHRINK;
8163 GROW;
8164 cur = CUR;
8165 }
8166 }
8167 buf[len] = 0;
8168 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008169 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008170 }
8171 return(buf);
8172}
8173
8174/**
8175 * xmlParseEncodingDecl:
8176 * @ctxt: an XML parser context
8177 *
8178 * parse the XML encoding declaration
8179 *
8180 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8181 *
8182 * this setups the conversion filters.
8183 *
8184 * Returns the encoding value or NULL
8185 */
8186
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008187const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008188xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8189 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008190
8191 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008192 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008193 SKIP(8);
8194 SKIP_BLANKS;
8195 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008196 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008197 return(NULL);
8198 }
8199 NEXT;
8200 SKIP_BLANKS;
8201 if (RAW == '"') {
8202 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008203 encoding = xmlParseEncName(ctxt);
8204 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008205 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008206 } else
8207 NEXT;
8208 } else if (RAW == '\''){
8209 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008210 encoding = xmlParseEncName(ctxt);
8211 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008212 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008213 } else
8214 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008215 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008216 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008217 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008218 /*
8219 * UTF-16 encoding stwich has already taken place at this stage,
8220 * more over the little-endian/big-endian selection is already done
8221 */
8222 if ((encoding != NULL) &&
8223 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8224 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008225 if (ctxt->encoding != NULL)
8226 xmlFree((xmlChar *) ctxt->encoding);
8227 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008228 }
8229 /*
8230 * UTF-8 encoding is handled natively
8231 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008232 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008233 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8234 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008235 if (ctxt->encoding != NULL)
8236 xmlFree((xmlChar *) ctxt->encoding);
8237 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008238 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008239 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008240 xmlCharEncodingHandlerPtr handler;
8241
8242 if (ctxt->input->encoding != NULL)
8243 xmlFree((xmlChar *) ctxt->input->encoding);
8244 ctxt->input->encoding = encoding;
8245
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008246 handler = xmlFindCharEncodingHandler((const char *) encoding);
8247 if (handler != NULL) {
8248 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008250 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008251 "Unsupported encoding %s\n", encoding);
8252 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008253 }
8254 }
8255 }
8256 return(encoding);
8257}
8258
8259/**
8260 * xmlParseSDDecl:
8261 * @ctxt: an XML parser context
8262 *
8263 * parse the XML standalone declaration
8264 *
8265 * [32] SDDecl ::= S 'standalone' Eq
8266 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8267 *
8268 * [ VC: Standalone Document Declaration ]
8269 * TODO The standalone document declaration must have the value "no"
8270 * if any external markup declarations contain declarations of:
8271 * - attributes with default values, if elements to which these
8272 * attributes apply appear in the document without specifications
8273 * of values for these attributes, or
8274 * - entities (other than amp, lt, gt, apos, quot), if references
8275 * to those entities appear in the document, or
8276 * - attributes with values subject to normalization, where the
8277 * attribute appears in the document with a value which will change
8278 * as a result of normalization, or
8279 * - element types with element content, if white space occurs directly
8280 * within any instance of those types.
8281 *
8282 * Returns 1 if standalone, 0 otherwise
8283 */
8284
8285int
8286xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8287 int standalone = -1;
8288
8289 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008290 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008291 SKIP(10);
8292 SKIP_BLANKS;
8293 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008294 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008295 return(standalone);
8296 }
8297 NEXT;
8298 SKIP_BLANKS;
8299 if (RAW == '\''){
8300 NEXT;
8301 if ((RAW == 'n') && (NXT(1) == 'o')) {
8302 standalone = 0;
8303 SKIP(2);
8304 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8305 (NXT(2) == 's')) {
8306 standalone = 1;
8307 SKIP(3);
8308 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008309 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008310 }
8311 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008312 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008313 } else
8314 NEXT;
8315 } else if (RAW == '"'){
8316 NEXT;
8317 if ((RAW == 'n') && (NXT(1) == 'o')) {
8318 standalone = 0;
8319 SKIP(2);
8320 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8321 (NXT(2) == 's')) {
8322 standalone = 1;
8323 SKIP(3);
8324 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008325 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008326 }
8327 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008328 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 } else
8330 NEXT;
8331 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008332 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008333 }
8334 }
8335 return(standalone);
8336}
8337
8338/**
8339 * xmlParseXMLDecl:
8340 * @ctxt: an XML parser context
8341 *
8342 * parse an XML declaration header
8343 *
8344 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8345 */
8346
8347void
8348xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8349 xmlChar *version;
8350
8351 /*
8352 * We know that '<?xml' is here.
8353 */
8354 SKIP(5);
8355
William M. Brack76e95df2003-10-18 16:20:14 +00008356 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8358 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008359 }
8360 SKIP_BLANKS;
8361
8362 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008363 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008364 */
8365 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008366 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008367 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008368 } else {
8369 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8370 /*
8371 * TODO: Blueberry should be detected here
8372 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008373 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8374 "Unsupported version '%s'\n",
8375 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008376 }
8377 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008378 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008379 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008380 }
Owen Taylor3473f882001-02-23 17:55:21 +00008381
8382 /*
8383 * We may have the encoding declaration
8384 */
William M. Brack76e95df2003-10-18 16:20:14 +00008385 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008386 if ((RAW == '?') && (NXT(1) == '>')) {
8387 SKIP(2);
8388 return;
8389 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008391 }
8392 xmlParseEncodingDecl(ctxt);
8393 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8394 /*
8395 * The XML REC instructs us to stop parsing right here
8396 */
8397 return;
8398 }
8399
8400 /*
8401 * We may have the standalone status.
8402 */
William M. Brack76e95df2003-10-18 16:20:14 +00008403 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008404 if ((RAW == '?') && (NXT(1) == '>')) {
8405 SKIP(2);
8406 return;
8407 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008408 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008409 }
8410 SKIP_BLANKS;
8411 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8412
8413 SKIP_BLANKS;
8414 if ((RAW == '?') && (NXT(1) == '>')) {
8415 SKIP(2);
8416 } else if (RAW == '>') {
8417 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008418 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008419 NEXT;
8420 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008421 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008422 MOVETO_ENDTAG(CUR_PTR);
8423 NEXT;
8424 }
8425}
8426
8427/**
8428 * xmlParseMisc:
8429 * @ctxt: an XML parser context
8430 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008431 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008432 *
8433 * [27] Misc ::= Comment | PI | S
8434 */
8435
8436void
8437xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008438 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008439 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008440 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008441 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008442 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008443 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008444 NEXT;
8445 } else
8446 xmlParseComment(ctxt);
8447 }
8448}
8449
8450/**
8451 * xmlParseDocument:
8452 * @ctxt: an XML parser context
8453 *
8454 * parse an XML document (and build a tree if using the standard SAX
8455 * interface).
8456 *
8457 * [1] document ::= prolog element Misc*
8458 *
8459 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8460 *
8461 * Returns 0, -1 in case of error. the parser context is augmented
8462 * as a result of the parsing.
8463 */
8464
8465int
8466xmlParseDocument(xmlParserCtxtPtr ctxt) {
8467 xmlChar start[4];
8468 xmlCharEncoding enc;
8469
8470 xmlInitParser();
8471
8472 GROW;
8473
8474 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008475 * SAX: detecting the level.
8476 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008477 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008478
8479 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008480 * SAX: beginning of the document processing.
8481 */
8482 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8483 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8484
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008485 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8486 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008487 /*
8488 * Get the 4 first bytes and decode the charset
8489 * if enc != XML_CHAR_ENCODING_NONE
8490 * plug some encoding conversion routines.
8491 */
8492 start[0] = RAW;
8493 start[1] = NXT(1);
8494 start[2] = NXT(2);
8495 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008496 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008497 if (enc != XML_CHAR_ENCODING_NONE) {
8498 xmlSwitchEncoding(ctxt, enc);
8499 }
Owen Taylor3473f882001-02-23 17:55:21 +00008500 }
8501
8502
8503 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008504 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008505 }
8506
8507 /*
8508 * Check for the XMLDecl in the Prolog.
8509 */
8510 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008511 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008512
8513 /*
8514 * Note that we will switch encoding on the fly.
8515 */
8516 xmlParseXMLDecl(ctxt);
8517 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8518 /*
8519 * The XML REC instructs us to stop parsing right here
8520 */
8521 return(-1);
8522 }
8523 ctxt->standalone = ctxt->input->standalone;
8524 SKIP_BLANKS;
8525 } else {
8526 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8527 }
8528 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8529 ctxt->sax->startDocument(ctxt->userData);
8530
8531 /*
8532 * The Misc part of the Prolog
8533 */
8534 GROW;
8535 xmlParseMisc(ctxt);
8536
8537 /*
8538 * Then possibly doc type declaration(s) and more Misc
8539 * (doctypedecl Misc*)?
8540 */
8541 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008542 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008543
8544 ctxt->inSubset = 1;
8545 xmlParseDocTypeDecl(ctxt);
8546 if (RAW == '[') {
8547 ctxt->instate = XML_PARSER_DTD;
8548 xmlParseInternalSubset(ctxt);
8549 }
8550
8551 /*
8552 * Create and update the external subset.
8553 */
8554 ctxt->inSubset = 2;
8555 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8556 (!ctxt->disableSAX))
8557 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8558 ctxt->extSubSystem, ctxt->extSubURI);
8559 ctxt->inSubset = 0;
8560
8561
8562 ctxt->instate = XML_PARSER_PROLOG;
8563 xmlParseMisc(ctxt);
8564 }
8565
8566 /*
8567 * Time to start parsing the tree itself
8568 */
8569 GROW;
8570 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008571 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8572 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008573 } else {
8574 ctxt->instate = XML_PARSER_CONTENT;
8575 xmlParseElement(ctxt);
8576 ctxt->instate = XML_PARSER_EPILOG;
8577
8578
8579 /*
8580 * The Misc part at the end
8581 */
8582 xmlParseMisc(ctxt);
8583
Daniel Veillard561b7f82002-03-20 21:55:57 +00008584 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008585 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008586 }
8587 ctxt->instate = XML_PARSER_EOF;
8588 }
8589
8590 /*
8591 * SAX: end of the document processing.
8592 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008593 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008594 ctxt->sax->endDocument(ctxt->userData);
8595
Daniel Veillard5997aca2002-03-18 18:36:20 +00008596 /*
8597 * Remove locally kept entity definitions if the tree was not built
8598 */
8599 if ((ctxt->myDoc != NULL) &&
8600 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8601 xmlFreeDoc(ctxt->myDoc);
8602 ctxt->myDoc = NULL;
8603 }
8604
Daniel Veillardc7612992002-02-17 22:47:37 +00008605 if (! ctxt->wellFormed) {
8606 ctxt->valid = 0;
8607 return(-1);
8608 }
Owen Taylor3473f882001-02-23 17:55:21 +00008609 return(0);
8610}
8611
8612/**
8613 * xmlParseExtParsedEnt:
8614 * @ctxt: an XML parser context
8615 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008616 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008617 * An external general parsed entity is well-formed if it matches the
8618 * production labeled extParsedEnt.
8619 *
8620 * [78] extParsedEnt ::= TextDecl? content
8621 *
8622 * Returns 0, -1 in case of error. the parser context is augmented
8623 * as a result of the parsing.
8624 */
8625
8626int
8627xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8628 xmlChar start[4];
8629 xmlCharEncoding enc;
8630
8631 xmlDefaultSAXHandlerInit();
8632
Daniel Veillard309f81d2003-09-23 09:02:53 +00008633 xmlDetectSAX2(ctxt);
8634
Owen Taylor3473f882001-02-23 17:55:21 +00008635 GROW;
8636
8637 /*
8638 * SAX: beginning of the document processing.
8639 */
8640 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8641 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8642
8643 /*
8644 * Get the 4 first bytes and decode the charset
8645 * if enc != XML_CHAR_ENCODING_NONE
8646 * plug some encoding conversion routines.
8647 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008648 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8649 start[0] = RAW;
8650 start[1] = NXT(1);
8651 start[2] = NXT(2);
8652 start[3] = NXT(3);
8653 enc = xmlDetectCharEncoding(start, 4);
8654 if (enc != XML_CHAR_ENCODING_NONE) {
8655 xmlSwitchEncoding(ctxt, enc);
8656 }
Owen Taylor3473f882001-02-23 17:55:21 +00008657 }
8658
8659
8660 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008661 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008662 }
8663
8664 /*
8665 * Check for the XMLDecl in the Prolog.
8666 */
8667 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008668 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008669
8670 /*
8671 * Note that we will switch encoding on the fly.
8672 */
8673 xmlParseXMLDecl(ctxt);
8674 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8675 /*
8676 * The XML REC instructs us to stop parsing right here
8677 */
8678 return(-1);
8679 }
8680 SKIP_BLANKS;
8681 } else {
8682 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8683 }
8684 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8685 ctxt->sax->startDocument(ctxt->userData);
8686
8687 /*
8688 * Doing validity checking on chunk doesn't make sense
8689 */
8690 ctxt->instate = XML_PARSER_CONTENT;
8691 ctxt->validate = 0;
8692 ctxt->loadsubset = 0;
8693 ctxt->depth = 0;
8694
8695 xmlParseContent(ctxt);
8696
8697 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008700 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008701 }
8702
8703 /*
8704 * SAX: end of the document processing.
8705 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008706 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008707 ctxt->sax->endDocument(ctxt->userData);
8708
8709 if (! ctxt->wellFormed) return(-1);
8710 return(0);
8711}
8712
Daniel Veillard73b013f2003-09-30 12:36:01 +00008713#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008714/************************************************************************
8715 * *
8716 * Progressive parsing interfaces *
8717 * *
8718 ************************************************************************/
8719
8720/**
8721 * xmlParseLookupSequence:
8722 * @ctxt: an XML parser context
8723 * @first: the first char to lookup
8724 * @next: the next char to lookup or zero
8725 * @third: the next char to lookup or zero
8726 *
8727 * Try to find if a sequence (first, next, third) or just (first next) or
8728 * (first) is available in the input stream.
8729 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8730 * to avoid rescanning sequences of bytes, it DOES change the state of the
8731 * parser, do not use liberally.
8732 *
8733 * Returns the index to the current parsing point if the full sequence
8734 * is available, -1 otherwise.
8735 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008736static int
Owen Taylor3473f882001-02-23 17:55:21 +00008737xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8738 xmlChar next, xmlChar third) {
8739 int base, len;
8740 xmlParserInputPtr in;
8741 const xmlChar *buf;
8742
8743 in = ctxt->input;
8744 if (in == NULL) return(-1);
8745 base = in->cur - in->base;
8746 if (base < 0) return(-1);
8747 if (ctxt->checkIndex > base)
8748 base = ctxt->checkIndex;
8749 if (in->buf == NULL) {
8750 buf = in->base;
8751 len = in->length;
8752 } else {
8753 buf = in->buf->buffer->content;
8754 len = in->buf->buffer->use;
8755 }
8756 /* take into account the sequence length */
8757 if (third) len -= 2;
8758 else if (next) len --;
8759 for (;base < len;base++) {
8760 if (buf[base] == first) {
8761 if (third != 0) {
8762 if ((buf[base + 1] != next) ||
8763 (buf[base + 2] != third)) continue;
8764 } else if (next != 0) {
8765 if (buf[base + 1] != next) continue;
8766 }
8767 ctxt->checkIndex = 0;
8768#ifdef DEBUG_PUSH
8769 if (next == 0)
8770 xmlGenericError(xmlGenericErrorContext,
8771 "PP: lookup '%c' found at %d\n",
8772 first, base);
8773 else if (third == 0)
8774 xmlGenericError(xmlGenericErrorContext,
8775 "PP: lookup '%c%c' found at %d\n",
8776 first, next, base);
8777 else
8778 xmlGenericError(xmlGenericErrorContext,
8779 "PP: lookup '%c%c%c' found at %d\n",
8780 first, next, third, base);
8781#endif
8782 return(base - (in->cur - in->base));
8783 }
8784 }
8785 ctxt->checkIndex = base;
8786#ifdef DEBUG_PUSH
8787 if (next == 0)
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: lookup '%c' failed\n", first);
8790 else if (third == 0)
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: lookup '%c%c' failed\n", first, next);
8793 else
8794 xmlGenericError(xmlGenericErrorContext,
8795 "PP: lookup '%c%c%c' failed\n", first, next, third);
8796#endif
8797 return(-1);
8798}
8799
8800/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008801 * xmlParseGetLasts:
8802 * @ctxt: an XML parser context
8803 * @lastlt: pointer to store the last '<' from the input
8804 * @lastgt: pointer to store the last '>' from the input
8805 *
8806 * Lookup the last < and > in the current chunk
8807 */
8808static void
8809xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8810 const xmlChar **lastgt) {
8811 const xmlChar *tmp;
8812
8813 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8814 xmlGenericError(xmlGenericErrorContext,
8815 "Internal error: xmlParseGetLasts\n");
8816 return;
8817 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008818 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008819 tmp = ctxt->input->end;
8820 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008821 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008822 if (tmp < ctxt->input->base) {
8823 *lastlt = NULL;
8824 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008825 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008826 *lastlt = tmp;
8827 tmp++;
8828 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8829 if (*tmp == '\'') {
8830 tmp++;
8831 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8832 if (tmp < ctxt->input->end) tmp++;
8833 } else if (*tmp == '"') {
8834 tmp++;
8835 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8836 if (tmp < ctxt->input->end) tmp++;
8837 } else
8838 tmp++;
8839 }
8840 if (tmp < ctxt->input->end)
8841 *lastgt = tmp;
8842 else {
8843 tmp = *lastlt;
8844 tmp--;
8845 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8846 if (tmp >= ctxt->input->base)
8847 *lastgt = tmp;
8848 else
8849 *lastgt = NULL;
8850 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008851 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008852 } else {
8853 *lastlt = NULL;
8854 *lastgt = NULL;
8855 }
8856}
8857/**
Owen Taylor3473f882001-02-23 17:55:21 +00008858 * xmlParseTryOrFinish:
8859 * @ctxt: an XML parser context
8860 * @terminate: last chunk indicator
8861 *
8862 * Try to progress on parsing
8863 *
8864 * Returns zero if no parsing was possible
8865 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008866static int
Owen Taylor3473f882001-02-23 17:55:21 +00008867xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8868 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008869 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008870 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008871 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008872
8873#ifdef DEBUG_PUSH
8874 switch (ctxt->instate) {
8875 case XML_PARSER_EOF:
8876 xmlGenericError(xmlGenericErrorContext,
8877 "PP: try EOF\n"); break;
8878 case XML_PARSER_START:
8879 xmlGenericError(xmlGenericErrorContext,
8880 "PP: try START\n"); break;
8881 case XML_PARSER_MISC:
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: try MISC\n");break;
8884 case XML_PARSER_COMMENT:
8885 xmlGenericError(xmlGenericErrorContext,
8886 "PP: try COMMENT\n");break;
8887 case XML_PARSER_PROLOG:
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: try PROLOG\n");break;
8890 case XML_PARSER_START_TAG:
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: try START_TAG\n");break;
8893 case XML_PARSER_CONTENT:
8894 xmlGenericError(xmlGenericErrorContext,
8895 "PP: try CONTENT\n");break;
8896 case XML_PARSER_CDATA_SECTION:
8897 xmlGenericError(xmlGenericErrorContext,
8898 "PP: try CDATA_SECTION\n");break;
8899 case XML_PARSER_END_TAG:
8900 xmlGenericError(xmlGenericErrorContext,
8901 "PP: try END_TAG\n");break;
8902 case XML_PARSER_ENTITY_DECL:
8903 xmlGenericError(xmlGenericErrorContext,
8904 "PP: try ENTITY_DECL\n");break;
8905 case XML_PARSER_ENTITY_VALUE:
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: try ENTITY_VALUE\n");break;
8908 case XML_PARSER_ATTRIBUTE_VALUE:
8909 xmlGenericError(xmlGenericErrorContext,
8910 "PP: try ATTRIBUTE_VALUE\n");break;
8911 case XML_PARSER_DTD:
8912 xmlGenericError(xmlGenericErrorContext,
8913 "PP: try DTD\n");break;
8914 case XML_PARSER_EPILOG:
8915 xmlGenericError(xmlGenericErrorContext,
8916 "PP: try EPILOG\n");break;
8917 case XML_PARSER_PI:
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: try PI\n");break;
8920 case XML_PARSER_IGNORE:
8921 xmlGenericError(xmlGenericErrorContext,
8922 "PP: try IGNORE\n");break;
8923 }
8924#endif
8925
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008926 if ((ctxt->input != NULL) &&
8927 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008928 xmlSHRINK(ctxt);
8929 ctxt->checkIndex = 0;
8930 }
8931 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008932
Daniel Veillarda880b122003-04-21 21:36:41 +00008933 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008934 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8935 return(0);
8936
8937
Owen Taylor3473f882001-02-23 17:55:21 +00008938 /*
8939 * Pop-up of finished entities.
8940 */
8941 while ((RAW == 0) && (ctxt->inputNr > 1))
8942 xmlPopInput(ctxt);
8943
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008944 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008945 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008946 avail = ctxt->input->length -
8947 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008948 else {
8949 /*
8950 * If we are operating on converted input, try to flush
8951 * remainng chars to avoid them stalling in the non-converted
8952 * buffer.
8953 */
8954 if ((ctxt->input->buf->raw != NULL) &&
8955 (ctxt->input->buf->raw->use > 0)) {
8956 int base = ctxt->input->base -
8957 ctxt->input->buf->buffer->content;
8958 int current = ctxt->input->cur - ctxt->input->base;
8959
8960 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8961 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8962 ctxt->input->cur = ctxt->input->base + current;
8963 ctxt->input->end =
8964 &ctxt->input->buf->buffer->content[
8965 ctxt->input->buf->buffer->use];
8966 }
8967 avail = ctxt->input->buf->buffer->use -
8968 (ctxt->input->cur - ctxt->input->base);
8969 }
Owen Taylor3473f882001-02-23 17:55:21 +00008970 if (avail < 1)
8971 goto done;
8972 switch (ctxt->instate) {
8973 case XML_PARSER_EOF:
8974 /*
8975 * Document parsing is done !
8976 */
8977 goto done;
8978 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008979 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8980 xmlChar start[4];
8981 xmlCharEncoding enc;
8982
8983 /*
8984 * Very first chars read from the document flow.
8985 */
8986 if (avail < 4)
8987 goto done;
8988
8989 /*
8990 * Get the 4 first bytes and decode the charset
8991 * if enc != XML_CHAR_ENCODING_NONE
8992 * plug some encoding conversion routines.
8993 */
8994 start[0] = RAW;
8995 start[1] = NXT(1);
8996 start[2] = NXT(2);
8997 start[3] = NXT(3);
8998 enc = xmlDetectCharEncoding(start, 4);
8999 if (enc != XML_CHAR_ENCODING_NONE) {
9000 xmlSwitchEncoding(ctxt, enc);
9001 }
9002 break;
9003 }
Owen Taylor3473f882001-02-23 17:55:21 +00009004
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009005 if (avail < 2)
9006 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009007 cur = ctxt->input->cur[0];
9008 next = ctxt->input->cur[1];
9009 if (cur == 0) {
9010 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9011 ctxt->sax->setDocumentLocator(ctxt->userData,
9012 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009013 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009014 ctxt->instate = XML_PARSER_EOF;
9015#ifdef DEBUG_PUSH
9016 xmlGenericError(xmlGenericErrorContext,
9017 "PP: entering EOF\n");
9018#endif
9019 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9020 ctxt->sax->endDocument(ctxt->userData);
9021 goto done;
9022 }
9023 if ((cur == '<') && (next == '?')) {
9024 /* PI or XML decl */
9025 if (avail < 5) return(ret);
9026 if ((!terminate) &&
9027 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9028 return(ret);
9029 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9030 ctxt->sax->setDocumentLocator(ctxt->userData,
9031 &xmlDefaultSAXLocator);
9032 if ((ctxt->input->cur[2] == 'x') &&
9033 (ctxt->input->cur[3] == 'm') &&
9034 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009035 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009036 ret += 5;
9037#ifdef DEBUG_PUSH
9038 xmlGenericError(xmlGenericErrorContext,
9039 "PP: Parsing XML Decl\n");
9040#endif
9041 xmlParseXMLDecl(ctxt);
9042 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9043 /*
9044 * The XML REC instructs us to stop parsing right
9045 * here
9046 */
9047 ctxt->instate = XML_PARSER_EOF;
9048 return(0);
9049 }
9050 ctxt->standalone = ctxt->input->standalone;
9051 if ((ctxt->encoding == NULL) &&
9052 (ctxt->input->encoding != NULL))
9053 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9054 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9055 (!ctxt->disableSAX))
9056 ctxt->sax->startDocument(ctxt->userData);
9057 ctxt->instate = XML_PARSER_MISC;
9058#ifdef DEBUG_PUSH
9059 xmlGenericError(xmlGenericErrorContext,
9060 "PP: entering MISC\n");
9061#endif
9062 } else {
9063 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9064 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9065 (!ctxt->disableSAX))
9066 ctxt->sax->startDocument(ctxt->userData);
9067 ctxt->instate = XML_PARSER_MISC;
9068#ifdef DEBUG_PUSH
9069 xmlGenericError(xmlGenericErrorContext,
9070 "PP: entering MISC\n");
9071#endif
9072 }
9073 } else {
9074 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9075 ctxt->sax->setDocumentLocator(ctxt->userData,
9076 &xmlDefaultSAXLocator);
9077 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9078 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9079 (!ctxt->disableSAX))
9080 ctxt->sax->startDocument(ctxt->userData);
9081 ctxt->instate = XML_PARSER_MISC;
9082#ifdef DEBUG_PUSH
9083 xmlGenericError(xmlGenericErrorContext,
9084 "PP: entering MISC\n");
9085#endif
9086 }
9087 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009088 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009089 const xmlChar *name;
9090 const xmlChar *prefix;
9091 const xmlChar *URI;
9092 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009093
9094 if ((avail < 2) && (ctxt->inputNr == 1))
9095 goto done;
9096 cur = ctxt->input->cur[0];
9097 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009098 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009099 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9101 ctxt->sax->endDocument(ctxt->userData);
9102 goto done;
9103 }
9104 if (!terminate) {
9105 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009106 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009107 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009108 goto done;
9109 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9110 goto done;
9111 }
9112 }
9113 if (ctxt->spaceNr == 0)
9114 spacePush(ctxt, -1);
9115 else
9116 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009117#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009118 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009119#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009120 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009121#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009122 else
9123 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009124#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009125 if (name == NULL) {
9126 spacePop(ctxt);
9127 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009128 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9129 ctxt->sax->endDocument(ctxt->userData);
9130 goto done;
9131 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009132#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009133 /*
9134 * [ VC: Root Element Type ]
9135 * The Name in the document type declaration must match
9136 * the element type of the root element.
9137 */
9138 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9139 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9140 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009141#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009142
9143 /*
9144 * Check for an Empty Element.
9145 */
9146 if ((RAW == '/') && (NXT(1) == '>')) {
9147 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009148
9149 if (ctxt->sax2) {
9150 if ((ctxt->sax != NULL) &&
9151 (ctxt->sax->endElementNs != NULL) &&
9152 (!ctxt->disableSAX))
9153 ctxt->sax->endElementNs(ctxt->userData, name,
9154 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009155#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009156 } else {
9157 if ((ctxt->sax != NULL) &&
9158 (ctxt->sax->endElement != NULL) &&
9159 (!ctxt->disableSAX))
9160 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009161#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009162 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009163 spacePop(ctxt);
9164 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009165 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009166 } else {
9167 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009168 }
9169 break;
9170 }
9171 if (RAW == '>') {
9172 NEXT;
9173 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009174 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009175 "Couldn't find end of Start Tag %s\n",
9176 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009177 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009178 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009179 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009180 if (ctxt->sax2)
9181 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009182#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009183 else
9184 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009185#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009186
Daniel Veillarda880b122003-04-21 21:36:41 +00009187 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009188 break;
9189 }
9190 case XML_PARSER_CONTENT: {
9191 const xmlChar *test;
9192 unsigned int cons;
9193 if ((avail < 2) && (ctxt->inputNr == 1))
9194 goto done;
9195 cur = ctxt->input->cur[0];
9196 next = ctxt->input->cur[1];
9197
9198 test = CUR_PTR;
9199 cons = ctxt->input->consumed;
9200 if ((cur == '<') && (next == '/')) {
9201 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009202 break;
9203 } else if ((cur == '<') && (next == '?')) {
9204 if ((!terminate) &&
9205 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9206 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009207 xmlParsePI(ctxt);
9208 } else if ((cur == '<') && (next != '!')) {
9209 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009210 break;
9211 } else if ((cur == '<') && (next == '!') &&
9212 (ctxt->input->cur[2] == '-') &&
9213 (ctxt->input->cur[3] == '-')) {
9214 if ((!terminate) &&
9215 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9216 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009217 xmlParseComment(ctxt);
9218 ctxt->instate = XML_PARSER_CONTENT;
9219 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9220 (ctxt->input->cur[2] == '[') &&
9221 (ctxt->input->cur[3] == 'C') &&
9222 (ctxt->input->cur[4] == 'D') &&
9223 (ctxt->input->cur[5] == 'A') &&
9224 (ctxt->input->cur[6] == 'T') &&
9225 (ctxt->input->cur[7] == 'A') &&
9226 (ctxt->input->cur[8] == '[')) {
9227 SKIP(9);
9228 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 break;
9230 } else if ((cur == '<') && (next == '!') &&
9231 (avail < 9)) {
9232 goto done;
9233 } else if (cur == '&') {
9234 if ((!terminate) &&
9235 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9236 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009237 xmlParseReference(ctxt);
9238 } else {
9239 /* TODO Avoid the extra copy, handle directly !!! */
9240 /*
9241 * Goal of the following test is:
9242 * - minimize calls to the SAX 'character' callback
9243 * when they are mergeable
9244 * - handle an problem for isBlank when we only parse
9245 * a sequence of blank chars and the next one is
9246 * not available to check against '<' presence.
9247 * - tries to homogenize the differences in SAX
9248 * callbacks between the push and pull versions
9249 * of the parser.
9250 */
9251 if ((ctxt->inputNr == 1) &&
9252 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9253 if (!terminate) {
9254 if (ctxt->progressive) {
9255 if ((lastlt == NULL) ||
9256 (ctxt->input->cur > lastlt))
9257 goto done;
9258 } else if (xmlParseLookupSequence(ctxt,
9259 '<', 0, 0) < 0) {
9260 goto done;
9261 }
9262 }
9263 }
9264 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009265 xmlParseCharData(ctxt, 0);
9266 }
9267 /*
9268 * Pop-up of finished entities.
9269 */
9270 while ((RAW == 0) && (ctxt->inputNr > 1))
9271 xmlPopInput(ctxt);
9272 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009273 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9274 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009275 ctxt->instate = XML_PARSER_EOF;
9276 break;
9277 }
9278 break;
9279 }
9280 case XML_PARSER_END_TAG:
9281 if (avail < 2)
9282 goto done;
9283 if (!terminate) {
9284 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009285 /* > can be found unescaped in attribute values */
9286 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009287 goto done;
9288 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9289 goto done;
9290 }
9291 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009292 if (ctxt->sax2) {
9293 xmlParseEndTag2(ctxt,
9294 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9295 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009296 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009297 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009298 }
9299#ifdef LIBXML_SAX1_ENABLED
9300 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009301 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009302#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009303 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009304 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009305 } else {
9306 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009307 }
9308 break;
9309 case XML_PARSER_CDATA_SECTION: {
9310 /*
9311 * The Push mode need to have the SAX callback for
9312 * cdataBlock merge back contiguous callbacks.
9313 */
9314 int base;
9315
9316 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9317 if (base < 0) {
9318 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9319 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9320 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009321 ctxt->sax->cdataBlock(ctxt->userData,
9322 ctxt->input->cur,
9323 XML_PARSER_BIG_BUFFER_SIZE);
9324 else if (ctxt->sax->characters != NULL)
9325 ctxt->sax->characters(ctxt->userData,
9326 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009327 XML_PARSER_BIG_BUFFER_SIZE);
9328 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009329 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009330 ctxt->checkIndex = 0;
9331 }
9332 goto done;
9333 } else {
9334 if ((ctxt->sax != NULL) && (base > 0) &&
9335 (!ctxt->disableSAX)) {
9336 if (ctxt->sax->cdataBlock != NULL)
9337 ctxt->sax->cdataBlock(ctxt->userData,
9338 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009339 else if (ctxt->sax->characters != NULL)
9340 ctxt->sax->characters(ctxt->userData,
9341 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009342 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009343 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009344 ctxt->checkIndex = 0;
9345 ctxt->instate = XML_PARSER_CONTENT;
9346#ifdef DEBUG_PUSH
9347 xmlGenericError(xmlGenericErrorContext,
9348 "PP: entering CONTENT\n");
9349#endif
9350 }
9351 break;
9352 }
Owen Taylor3473f882001-02-23 17:55:21 +00009353 case XML_PARSER_MISC:
9354 SKIP_BLANKS;
9355 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009356 avail = ctxt->input->length -
9357 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009358 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009359 avail = ctxt->input->buf->buffer->use -
9360 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009361 if (avail < 2)
9362 goto done;
9363 cur = ctxt->input->cur[0];
9364 next = ctxt->input->cur[1];
9365 if ((cur == '<') && (next == '?')) {
9366 if ((!terminate) &&
9367 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9368 goto done;
9369#ifdef DEBUG_PUSH
9370 xmlGenericError(xmlGenericErrorContext,
9371 "PP: Parsing PI\n");
9372#endif
9373 xmlParsePI(ctxt);
9374 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009375 (ctxt->input->cur[2] == '-') &&
9376 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009377 if ((!terminate) &&
9378 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9379 goto done;
9380#ifdef DEBUG_PUSH
9381 xmlGenericError(xmlGenericErrorContext,
9382 "PP: Parsing Comment\n");
9383#endif
9384 xmlParseComment(ctxt);
9385 ctxt->instate = XML_PARSER_MISC;
9386 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009387 (ctxt->input->cur[2] == 'D') &&
9388 (ctxt->input->cur[3] == 'O') &&
9389 (ctxt->input->cur[4] == 'C') &&
9390 (ctxt->input->cur[5] == 'T') &&
9391 (ctxt->input->cur[6] == 'Y') &&
9392 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009393 (ctxt->input->cur[8] == 'E')) {
9394 if ((!terminate) &&
9395 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9396 goto done;
9397#ifdef DEBUG_PUSH
9398 xmlGenericError(xmlGenericErrorContext,
9399 "PP: Parsing internal subset\n");
9400#endif
9401 ctxt->inSubset = 1;
9402 xmlParseDocTypeDecl(ctxt);
9403 if (RAW == '[') {
9404 ctxt->instate = XML_PARSER_DTD;
9405#ifdef DEBUG_PUSH
9406 xmlGenericError(xmlGenericErrorContext,
9407 "PP: entering DTD\n");
9408#endif
9409 } else {
9410 /*
9411 * Create and update the external subset.
9412 */
9413 ctxt->inSubset = 2;
9414 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9415 (ctxt->sax->externalSubset != NULL))
9416 ctxt->sax->externalSubset(ctxt->userData,
9417 ctxt->intSubName, ctxt->extSubSystem,
9418 ctxt->extSubURI);
9419 ctxt->inSubset = 0;
9420 ctxt->instate = XML_PARSER_PROLOG;
9421#ifdef DEBUG_PUSH
9422 xmlGenericError(xmlGenericErrorContext,
9423 "PP: entering PROLOG\n");
9424#endif
9425 }
9426 } else if ((cur == '<') && (next == '!') &&
9427 (avail < 9)) {
9428 goto done;
9429 } else {
9430 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009431 ctxt->progressive = 1;
9432 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009433#ifdef DEBUG_PUSH
9434 xmlGenericError(xmlGenericErrorContext,
9435 "PP: entering START_TAG\n");
9436#endif
9437 }
9438 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009439 case XML_PARSER_PROLOG:
9440 SKIP_BLANKS;
9441 if (ctxt->input->buf == NULL)
9442 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9443 else
9444 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9445 if (avail < 2)
9446 goto done;
9447 cur = ctxt->input->cur[0];
9448 next = ctxt->input->cur[1];
9449 if ((cur == '<') && (next == '?')) {
9450 if ((!terminate) &&
9451 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9452 goto done;
9453#ifdef DEBUG_PUSH
9454 xmlGenericError(xmlGenericErrorContext,
9455 "PP: Parsing PI\n");
9456#endif
9457 xmlParsePI(ctxt);
9458 } else if ((cur == '<') && (next == '!') &&
9459 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9460 if ((!terminate) &&
9461 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9462 goto done;
9463#ifdef DEBUG_PUSH
9464 xmlGenericError(xmlGenericErrorContext,
9465 "PP: Parsing Comment\n");
9466#endif
9467 xmlParseComment(ctxt);
9468 ctxt->instate = XML_PARSER_PROLOG;
9469 } else if ((cur == '<') && (next == '!') &&
9470 (avail < 4)) {
9471 goto done;
9472 } else {
9473 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009474 if (ctxt->progressive == 0)
9475 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009477#ifdef DEBUG_PUSH
9478 xmlGenericError(xmlGenericErrorContext,
9479 "PP: entering START_TAG\n");
9480#endif
9481 }
9482 break;
9483 case XML_PARSER_EPILOG:
9484 SKIP_BLANKS;
9485 if (ctxt->input->buf == NULL)
9486 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9487 else
9488 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9489 if (avail < 2)
9490 goto done;
9491 cur = ctxt->input->cur[0];
9492 next = ctxt->input->cur[1];
9493 if ((cur == '<') && (next == '?')) {
9494 if ((!terminate) &&
9495 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9496 goto done;
9497#ifdef DEBUG_PUSH
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: Parsing PI\n");
9500#endif
9501 xmlParsePI(ctxt);
9502 ctxt->instate = XML_PARSER_EPILOG;
9503 } else if ((cur == '<') && (next == '!') &&
9504 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9505 if ((!terminate) &&
9506 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9507 goto done;
9508#ifdef DEBUG_PUSH
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: Parsing Comment\n");
9511#endif
9512 xmlParseComment(ctxt);
9513 ctxt->instate = XML_PARSER_EPILOG;
9514 } else if ((cur == '<') && (next == '!') &&
9515 (avail < 4)) {
9516 goto done;
9517 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009518 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009519 ctxt->instate = XML_PARSER_EOF;
9520#ifdef DEBUG_PUSH
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: entering EOF\n");
9523#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009524 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009525 ctxt->sax->endDocument(ctxt->userData);
9526 goto done;
9527 }
9528 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009529 case XML_PARSER_DTD: {
9530 /*
9531 * Sorry but progressive parsing of the internal subset
9532 * is not expected to be supported. We first check that
9533 * the full content of the internal subset is available and
9534 * the parsing is launched only at that point.
9535 * Internal subset ends up with "']' S? '>'" in an unescaped
9536 * section and not in a ']]>' sequence which are conditional
9537 * sections (whoever argued to keep that crap in XML deserve
9538 * a place in hell !).
9539 */
9540 int base, i;
9541 xmlChar *buf;
9542 xmlChar quote = 0;
9543
9544 base = ctxt->input->cur - ctxt->input->base;
9545 if (base < 0) return(0);
9546 if (ctxt->checkIndex > base)
9547 base = ctxt->checkIndex;
9548 buf = ctxt->input->buf->buffer->content;
9549 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9550 base++) {
9551 if (quote != 0) {
9552 if (buf[base] == quote)
9553 quote = 0;
9554 continue;
9555 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009556 if ((quote == 0) && (buf[base] == '<')) {
9557 int found = 0;
9558 /* special handling of comments */
9559 if (((unsigned int) base + 4 <
9560 ctxt->input->buf->buffer->use) &&
9561 (buf[base + 1] == '!') &&
9562 (buf[base + 2] == '-') &&
9563 (buf[base + 3] == '-')) {
9564 for (;(unsigned int) base + 3 <
9565 ctxt->input->buf->buffer->use; base++) {
9566 if ((buf[base] == '-') &&
9567 (buf[base + 1] == '-') &&
9568 (buf[base + 2] == '>')) {
9569 found = 1;
9570 base += 2;
9571 break;
9572 }
9573 }
9574 if (!found)
9575 break;
9576 continue;
9577 }
9578 }
Owen Taylor3473f882001-02-23 17:55:21 +00009579 if (buf[base] == '"') {
9580 quote = '"';
9581 continue;
9582 }
9583 if (buf[base] == '\'') {
9584 quote = '\'';
9585 continue;
9586 }
9587 if (buf[base] == ']') {
9588 if ((unsigned int) base +1 >=
9589 ctxt->input->buf->buffer->use)
9590 break;
9591 if (buf[base + 1] == ']') {
9592 /* conditional crap, skip both ']' ! */
9593 base++;
9594 continue;
9595 }
9596 for (i = 0;
9597 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9598 i++) {
9599 if (buf[base + i] == '>')
9600 goto found_end_int_subset;
9601 }
9602 break;
9603 }
9604 }
9605 /*
9606 * We didn't found the end of the Internal subset
9607 */
9608 if (quote == 0)
9609 ctxt->checkIndex = base;
9610#ifdef DEBUG_PUSH
9611 if (next == 0)
9612 xmlGenericError(xmlGenericErrorContext,
9613 "PP: lookup of int subset end filed\n");
9614#endif
9615 goto done;
9616
9617found_end_int_subset:
9618 xmlParseInternalSubset(ctxt);
9619 ctxt->inSubset = 2;
9620 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9621 (ctxt->sax->externalSubset != NULL))
9622 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9623 ctxt->extSubSystem, ctxt->extSubURI);
9624 ctxt->inSubset = 0;
9625 ctxt->instate = XML_PARSER_PROLOG;
9626 ctxt->checkIndex = 0;
9627#ifdef DEBUG_PUSH
9628 xmlGenericError(xmlGenericErrorContext,
9629 "PP: entering PROLOG\n");
9630#endif
9631 break;
9632 }
9633 case XML_PARSER_COMMENT:
9634 xmlGenericError(xmlGenericErrorContext,
9635 "PP: internal error, state == COMMENT\n");
9636 ctxt->instate = XML_PARSER_CONTENT;
9637#ifdef DEBUG_PUSH
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: entering CONTENT\n");
9640#endif
9641 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009642 case XML_PARSER_IGNORE:
9643 xmlGenericError(xmlGenericErrorContext,
9644 "PP: internal error, state == IGNORE");
9645 ctxt->instate = XML_PARSER_DTD;
9646#ifdef DEBUG_PUSH
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: entering DTD\n");
9649#endif
9650 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009651 case XML_PARSER_PI:
9652 xmlGenericError(xmlGenericErrorContext,
9653 "PP: internal error, state == PI\n");
9654 ctxt->instate = XML_PARSER_CONTENT;
9655#ifdef DEBUG_PUSH
9656 xmlGenericError(xmlGenericErrorContext,
9657 "PP: entering CONTENT\n");
9658#endif
9659 break;
9660 case XML_PARSER_ENTITY_DECL:
9661 xmlGenericError(xmlGenericErrorContext,
9662 "PP: internal error, state == ENTITY_DECL\n");
9663 ctxt->instate = XML_PARSER_DTD;
9664#ifdef DEBUG_PUSH
9665 xmlGenericError(xmlGenericErrorContext,
9666 "PP: entering DTD\n");
9667#endif
9668 break;
9669 case XML_PARSER_ENTITY_VALUE:
9670 xmlGenericError(xmlGenericErrorContext,
9671 "PP: internal error, state == ENTITY_VALUE\n");
9672 ctxt->instate = XML_PARSER_CONTENT;
9673#ifdef DEBUG_PUSH
9674 xmlGenericError(xmlGenericErrorContext,
9675 "PP: entering DTD\n");
9676#endif
9677 break;
9678 case XML_PARSER_ATTRIBUTE_VALUE:
9679 xmlGenericError(xmlGenericErrorContext,
9680 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9681 ctxt->instate = XML_PARSER_START_TAG;
9682#ifdef DEBUG_PUSH
9683 xmlGenericError(xmlGenericErrorContext,
9684 "PP: entering START_TAG\n");
9685#endif
9686 break;
9687 case XML_PARSER_SYSTEM_LITERAL:
9688 xmlGenericError(xmlGenericErrorContext,
9689 "PP: internal error, state == SYSTEM_LITERAL\n");
9690 ctxt->instate = XML_PARSER_START_TAG;
9691#ifdef DEBUG_PUSH
9692 xmlGenericError(xmlGenericErrorContext,
9693 "PP: entering START_TAG\n");
9694#endif
9695 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009696 case XML_PARSER_PUBLIC_LITERAL:
9697 xmlGenericError(xmlGenericErrorContext,
9698 "PP: internal error, state == PUBLIC_LITERAL\n");
9699 ctxt->instate = XML_PARSER_START_TAG;
9700#ifdef DEBUG_PUSH
9701 xmlGenericError(xmlGenericErrorContext,
9702 "PP: entering START_TAG\n");
9703#endif
9704 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009705 }
9706 }
9707done:
9708#ifdef DEBUG_PUSH
9709 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9710#endif
9711 return(ret);
9712}
9713
9714/**
Owen Taylor3473f882001-02-23 17:55:21 +00009715 * xmlParseChunk:
9716 * @ctxt: an XML parser context
9717 * @chunk: an char array
9718 * @size: the size in byte of the chunk
9719 * @terminate: last chunk indicator
9720 *
9721 * Parse a Chunk of memory
9722 *
9723 * Returns zero if no error, the xmlParserErrors otherwise.
9724 */
9725int
9726xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9727 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009728 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9729 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009730 if (ctxt->instate == XML_PARSER_START)
9731 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9733 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9734 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9735 int cur = ctxt->input->cur - ctxt->input->base;
9736
9737 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9738 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9739 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009740 ctxt->input->end =
9741 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009742#ifdef DEBUG_PUSH
9743 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9744#endif
9745
Owen Taylor3473f882001-02-23 17:55:21 +00009746 } else if (ctxt->instate != XML_PARSER_EOF) {
9747 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9748 xmlParserInputBufferPtr in = ctxt->input->buf;
9749 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9750 (in->raw != NULL)) {
9751 int nbchars;
9752
9753 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9754 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009755 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009756 xmlGenericError(xmlGenericErrorContext,
9757 "xmlParseChunk: encoder error\n");
9758 return(XML_ERR_INVALID_ENCODING);
9759 }
9760 }
9761 }
9762 }
9763 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009764 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9765 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009766 if (terminate) {
9767 /*
9768 * Check for termination
9769 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009770 int avail = 0;
9771 if (ctxt->input->buf == NULL)
9772 avail = ctxt->input->length -
9773 (ctxt->input->cur - ctxt->input->base);
9774 else
9775 avail = ctxt->input->buf->buffer->use -
9776 (ctxt->input->cur - ctxt->input->base);
9777
Owen Taylor3473f882001-02-23 17:55:21 +00009778 if ((ctxt->instate != XML_PARSER_EOF) &&
9779 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009780 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009781 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009782 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009784 }
Owen Taylor3473f882001-02-23 17:55:21 +00009785 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009786 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009787 ctxt->sax->endDocument(ctxt->userData);
9788 }
9789 ctxt->instate = XML_PARSER_EOF;
9790 }
9791 return((xmlParserErrors) ctxt->errNo);
9792}
9793
9794/************************************************************************
9795 * *
9796 * I/O front end functions to the parser *
9797 * *
9798 ************************************************************************/
9799
9800/**
9801 * xmlStopParser:
9802 * @ctxt: an XML parser context
9803 *
9804 * Blocks further parser processing
9805 */
9806void
9807xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009808 if (ctxt == NULL)
9809 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009810 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009811 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 if (ctxt->input != NULL)
9813 ctxt->input->cur = BAD_CAST"";
9814}
9815
9816/**
9817 * xmlCreatePushParserCtxt:
9818 * @sax: a SAX handler
9819 * @user_data: The user data returned on SAX callbacks
9820 * @chunk: a pointer to an array of chars
9821 * @size: number of chars in the array
9822 * @filename: an optional file name or URI
9823 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009824 * Create a parser context for using the XML parser in push mode.
9825 * If @buffer and @size are non-NULL, the data is used to detect
9826 * the encoding. The remaining characters will be parsed so they
9827 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009828 * To allow content encoding detection, @size should be >= 4
9829 * The value of @filename is used for fetching external entities
9830 * and error/warning reports.
9831 *
9832 * Returns the new parser context or NULL
9833 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009834
Owen Taylor3473f882001-02-23 17:55:21 +00009835xmlParserCtxtPtr
9836xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9837 const char *chunk, int size, const char *filename) {
9838 xmlParserCtxtPtr ctxt;
9839 xmlParserInputPtr inputStream;
9840 xmlParserInputBufferPtr buf;
9841 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9842
9843 /*
9844 * plug some encoding conversion routines
9845 */
9846 if ((chunk != NULL) && (size >= 4))
9847 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9848
9849 buf = xmlAllocParserInputBuffer(enc);
9850 if (buf == NULL) return(NULL);
9851
9852 ctxt = xmlNewParserCtxt();
9853 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009854 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009855 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009856 return(NULL);
9857 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009858 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9859 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009860 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009861 xmlFreeParserInputBuffer(buf);
9862 xmlFreeParserCtxt(ctxt);
9863 return(NULL);
9864 }
Owen Taylor3473f882001-02-23 17:55:21 +00009865 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009866#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009867 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009868#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009869 xmlFree(ctxt->sax);
9870 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9871 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009872 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009873 xmlFreeParserInputBuffer(buf);
9874 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 return(NULL);
9876 }
9877 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9878 if (user_data != NULL)
9879 ctxt->userData = user_data;
9880 }
9881 if (filename == NULL) {
9882 ctxt->directory = NULL;
9883 } else {
9884 ctxt->directory = xmlParserGetDirectory(filename);
9885 }
9886
9887 inputStream = xmlNewInputStream(ctxt);
9888 if (inputStream == NULL) {
9889 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009890 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009891 return(NULL);
9892 }
9893
9894 if (filename == NULL)
9895 inputStream->filename = NULL;
9896 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009897 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009898 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009899 inputStream->buf = buf;
9900 inputStream->base = inputStream->buf->buffer->content;
9901 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009902 inputStream->end =
9903 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009904
9905 inputPush(ctxt, inputStream);
9906
9907 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9908 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009909 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9910 int cur = ctxt->input->cur - ctxt->input->base;
9911
Owen Taylor3473f882001-02-23 17:55:21 +00009912 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009913
9914 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9915 ctxt->input->cur = ctxt->input->base + cur;
9916 ctxt->input->end =
9917 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009918#ifdef DEBUG_PUSH
9919 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9920#endif
9921 }
9922
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009923 if (enc != XML_CHAR_ENCODING_NONE) {
9924 xmlSwitchEncoding(ctxt, enc);
9925 }
9926
Owen Taylor3473f882001-02-23 17:55:21 +00009927 return(ctxt);
9928}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009929#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009930
9931/**
9932 * xmlCreateIOParserCtxt:
9933 * @sax: a SAX handler
9934 * @user_data: The user data returned on SAX callbacks
9935 * @ioread: an I/O read function
9936 * @ioclose: an I/O close function
9937 * @ioctx: an I/O handler
9938 * @enc: the charset encoding if known
9939 *
9940 * Create a parser context for using the XML parser with an existing
9941 * I/O stream
9942 *
9943 * Returns the new parser context or NULL
9944 */
9945xmlParserCtxtPtr
9946xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9947 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9948 void *ioctx, xmlCharEncoding enc) {
9949 xmlParserCtxtPtr ctxt;
9950 xmlParserInputPtr inputStream;
9951 xmlParserInputBufferPtr buf;
9952
9953 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9954 if (buf == NULL) return(NULL);
9955
9956 ctxt = xmlNewParserCtxt();
9957 if (ctxt == NULL) {
9958 xmlFree(buf);
9959 return(NULL);
9960 }
9961 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009963 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009964#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009965 xmlFree(ctxt->sax);
9966 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9967 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009968 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009969 xmlFree(ctxt);
9970 return(NULL);
9971 }
9972 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9973 if (user_data != NULL)
9974 ctxt->userData = user_data;
9975 }
9976
9977 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9978 if (inputStream == NULL) {
9979 xmlFreeParserCtxt(ctxt);
9980 return(NULL);
9981 }
9982 inputPush(ctxt, inputStream);
9983
9984 return(ctxt);
9985}
9986
Daniel Veillard4432df22003-09-28 18:58:27 +00009987#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009988/************************************************************************
9989 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009990 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009991 * *
9992 ************************************************************************/
9993
9994/**
9995 * xmlIOParseDTD:
9996 * @sax: the SAX handler block or NULL
9997 * @input: an Input Buffer
9998 * @enc: the charset encoding if known
9999 *
10000 * Load and parse a DTD
10001 *
10002 * Returns the resulting xmlDtdPtr or NULL in case of error.
10003 * @input will be freed at parsing end.
10004 */
10005
10006xmlDtdPtr
10007xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10008 xmlCharEncoding enc) {
10009 xmlDtdPtr ret = NULL;
10010 xmlParserCtxtPtr ctxt;
10011 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010012 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010013
10014 if (input == NULL)
10015 return(NULL);
10016
10017 ctxt = xmlNewParserCtxt();
10018 if (ctxt == NULL) {
10019 return(NULL);
10020 }
10021
10022 /*
10023 * Set-up the SAX context
10024 */
10025 if (sax != NULL) {
10026 if (ctxt->sax != NULL)
10027 xmlFree(ctxt->sax);
10028 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010029 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010030 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010031 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 /*
10034 * generate a parser input from the I/O handler
10035 */
10036
Daniel Veillard43caefb2003-12-07 19:32:22 +000010037 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010038 if (pinput == NULL) {
10039 if (sax != NULL) ctxt->sax = NULL;
10040 xmlFreeParserCtxt(ctxt);
10041 return(NULL);
10042 }
10043
10044 /*
10045 * plug some encoding conversion routines here.
10046 */
10047 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010048 if (enc != XML_CHAR_ENCODING_NONE) {
10049 xmlSwitchEncoding(ctxt, enc);
10050 }
Owen Taylor3473f882001-02-23 17:55:21 +000010051
10052 pinput->filename = NULL;
10053 pinput->line = 1;
10054 pinput->col = 1;
10055 pinput->base = ctxt->input->cur;
10056 pinput->cur = ctxt->input->cur;
10057 pinput->free = NULL;
10058
10059 /*
10060 * let's parse that entity knowing it's an external subset.
10061 */
10062 ctxt->inSubset = 2;
10063 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10064 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10065 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010066
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010067 if ((enc == XML_CHAR_ENCODING_NONE) &&
10068 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010069 /*
10070 * Get the 4 first bytes and decode the charset
10071 * if enc != XML_CHAR_ENCODING_NONE
10072 * plug some encoding conversion routines.
10073 */
10074 start[0] = RAW;
10075 start[1] = NXT(1);
10076 start[2] = NXT(2);
10077 start[3] = NXT(3);
10078 enc = xmlDetectCharEncoding(start, 4);
10079 if (enc != XML_CHAR_ENCODING_NONE) {
10080 xmlSwitchEncoding(ctxt, enc);
10081 }
10082 }
10083
Owen Taylor3473f882001-02-23 17:55:21 +000010084 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10085
10086 if (ctxt->myDoc != NULL) {
10087 if (ctxt->wellFormed) {
10088 ret = ctxt->myDoc->extSubset;
10089 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010090 if (ret != NULL) {
10091 xmlNodePtr tmp;
10092
10093 ret->doc = NULL;
10094 tmp = ret->children;
10095 while (tmp != NULL) {
10096 tmp->doc = NULL;
10097 tmp = tmp->next;
10098 }
10099 }
Owen Taylor3473f882001-02-23 17:55:21 +000010100 } else {
10101 ret = NULL;
10102 }
10103 xmlFreeDoc(ctxt->myDoc);
10104 ctxt->myDoc = NULL;
10105 }
10106 if (sax != NULL) ctxt->sax = NULL;
10107 xmlFreeParserCtxt(ctxt);
10108
10109 return(ret);
10110}
10111
10112/**
10113 * xmlSAXParseDTD:
10114 * @sax: the SAX handler block
10115 * @ExternalID: a NAME* containing the External ID of the DTD
10116 * @SystemID: a NAME* containing the URL to the DTD
10117 *
10118 * Load and parse an external subset.
10119 *
10120 * Returns the resulting xmlDtdPtr or NULL in case of error.
10121 */
10122
10123xmlDtdPtr
10124xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10125 const xmlChar *SystemID) {
10126 xmlDtdPtr ret = NULL;
10127 xmlParserCtxtPtr ctxt;
10128 xmlParserInputPtr input = NULL;
10129 xmlCharEncoding enc;
10130
10131 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10132
10133 ctxt = xmlNewParserCtxt();
10134 if (ctxt == NULL) {
10135 return(NULL);
10136 }
10137
10138 /*
10139 * Set-up the SAX context
10140 */
10141 if (sax != NULL) {
10142 if (ctxt->sax != NULL)
10143 xmlFree(ctxt->sax);
10144 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010145 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010146 }
10147
10148 /*
10149 * Ask the Entity resolver to load the damn thing
10150 */
10151
10152 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010153 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010154 if (input == NULL) {
10155 if (sax != NULL) ctxt->sax = NULL;
10156 xmlFreeParserCtxt(ctxt);
10157 return(NULL);
10158 }
10159
10160 /*
10161 * plug some encoding conversion routines here.
10162 */
10163 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010164 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10165 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10166 xmlSwitchEncoding(ctxt, enc);
10167 }
Owen Taylor3473f882001-02-23 17:55:21 +000010168
10169 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010170 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010171 input->line = 1;
10172 input->col = 1;
10173 input->base = ctxt->input->cur;
10174 input->cur = ctxt->input->cur;
10175 input->free = NULL;
10176
10177 /*
10178 * let's parse that entity knowing it's an external subset.
10179 */
10180 ctxt->inSubset = 2;
10181 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10182 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10183 ExternalID, SystemID);
10184 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10185
10186 if (ctxt->myDoc != NULL) {
10187 if (ctxt->wellFormed) {
10188 ret = ctxt->myDoc->extSubset;
10189 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010190 if (ret != NULL) {
10191 xmlNodePtr tmp;
10192
10193 ret->doc = NULL;
10194 tmp = ret->children;
10195 while (tmp != NULL) {
10196 tmp->doc = NULL;
10197 tmp = tmp->next;
10198 }
10199 }
Owen Taylor3473f882001-02-23 17:55:21 +000010200 } else {
10201 ret = NULL;
10202 }
10203 xmlFreeDoc(ctxt->myDoc);
10204 ctxt->myDoc = NULL;
10205 }
10206 if (sax != NULL) ctxt->sax = NULL;
10207 xmlFreeParserCtxt(ctxt);
10208
10209 return(ret);
10210}
10211
Daniel Veillard4432df22003-09-28 18:58:27 +000010212
Owen Taylor3473f882001-02-23 17:55:21 +000010213/**
10214 * xmlParseDTD:
10215 * @ExternalID: a NAME* containing the External ID of the DTD
10216 * @SystemID: a NAME* containing the URL to the DTD
10217 *
10218 * Load and parse an external subset.
10219 *
10220 * Returns the resulting xmlDtdPtr or NULL in case of error.
10221 */
10222
10223xmlDtdPtr
10224xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10225 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10226}
Daniel Veillard4432df22003-09-28 18:58:27 +000010227#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010228
10229/************************************************************************
10230 * *
10231 * Front ends when parsing an Entity *
10232 * *
10233 ************************************************************************/
10234
10235/**
Owen Taylor3473f882001-02-23 17:55:21 +000010236 * xmlParseCtxtExternalEntity:
10237 * @ctx: the existing parsing context
10238 * @URL: the URL for the entity to load
10239 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010240 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010241 *
10242 * Parse an external general entity within an existing parsing context
10243 * An external general parsed entity is well-formed if it matches the
10244 * production labeled extParsedEnt.
10245 *
10246 * [78] extParsedEnt ::= TextDecl? content
10247 *
10248 * Returns 0 if the entity is well formed, -1 in case of args problem and
10249 * the parser error code otherwise
10250 */
10251
10252int
10253xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010254 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010255 xmlParserCtxtPtr ctxt;
10256 xmlDocPtr newDoc;
10257 xmlSAXHandlerPtr oldsax = NULL;
10258 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010259 xmlChar start[4];
10260 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010261
10262 if (ctx->depth > 40) {
10263 return(XML_ERR_ENTITY_LOOP);
10264 }
10265
Daniel Veillardcda96922001-08-21 10:56:31 +000010266 if (lst != NULL)
10267 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010268 if ((URL == NULL) && (ID == NULL))
10269 return(-1);
10270 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10271 return(-1);
10272
10273
10274 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10275 if (ctxt == NULL) return(-1);
10276 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010277 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010278 oldsax = ctxt->sax;
10279 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010280 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010281 newDoc = xmlNewDoc(BAD_CAST "1.0");
10282 if (newDoc == NULL) {
10283 xmlFreeParserCtxt(ctxt);
10284 return(-1);
10285 }
10286 if (ctx->myDoc != NULL) {
10287 newDoc->intSubset = ctx->myDoc->intSubset;
10288 newDoc->extSubset = ctx->myDoc->extSubset;
10289 }
10290 if (ctx->myDoc->URL != NULL) {
10291 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10292 }
10293 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10294 if (newDoc->children == NULL) {
10295 ctxt->sax = oldsax;
10296 xmlFreeParserCtxt(ctxt);
10297 newDoc->intSubset = NULL;
10298 newDoc->extSubset = NULL;
10299 xmlFreeDoc(newDoc);
10300 return(-1);
10301 }
10302 nodePush(ctxt, newDoc->children);
10303 if (ctx->myDoc == NULL) {
10304 ctxt->myDoc = newDoc;
10305 } else {
10306 ctxt->myDoc = ctx->myDoc;
10307 newDoc->children->doc = ctx->myDoc;
10308 }
10309
Daniel Veillard87a764e2001-06-20 17:41:10 +000010310 /*
10311 * Get the 4 first bytes and decode the charset
10312 * if enc != XML_CHAR_ENCODING_NONE
10313 * plug some encoding conversion routines.
10314 */
10315 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010316 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10317 start[0] = RAW;
10318 start[1] = NXT(1);
10319 start[2] = NXT(2);
10320 start[3] = NXT(3);
10321 enc = xmlDetectCharEncoding(start, 4);
10322 if (enc != XML_CHAR_ENCODING_NONE) {
10323 xmlSwitchEncoding(ctxt, enc);
10324 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010325 }
10326
Owen Taylor3473f882001-02-23 17:55:21 +000010327 /*
10328 * Parse a possible text declaration first
10329 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010330 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010331 xmlParseTextDecl(ctxt);
10332 }
10333
10334 /*
10335 * Doing validity checking on chunk doesn't make sense
10336 */
10337 ctxt->instate = XML_PARSER_CONTENT;
10338 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010339 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010340 ctxt->loadsubset = ctx->loadsubset;
10341 ctxt->depth = ctx->depth + 1;
10342 ctxt->replaceEntities = ctx->replaceEntities;
10343 if (ctxt->validate) {
10344 ctxt->vctxt.error = ctx->vctxt.error;
10345 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010346 } else {
10347 ctxt->vctxt.error = NULL;
10348 ctxt->vctxt.warning = NULL;
10349 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010350 ctxt->vctxt.nodeTab = NULL;
10351 ctxt->vctxt.nodeNr = 0;
10352 ctxt->vctxt.nodeMax = 0;
10353 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010354 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10355 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010356 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10357 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10358 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010359 ctxt->dictNames = ctx->dictNames;
10360 ctxt->attsDefault = ctx->attsDefault;
10361 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010362
10363 xmlParseContent(ctxt);
10364
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010365 ctx->validate = ctxt->validate;
10366 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010367 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010368 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010369 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010370 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010371 }
10372 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010373 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
10375
10376 if (!ctxt->wellFormed) {
10377 if (ctxt->errNo == 0)
10378 ret = 1;
10379 else
10380 ret = ctxt->errNo;
10381 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010382 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010383 xmlNodePtr cur;
10384
10385 /*
10386 * Return the newly created nodeset after unlinking it from
10387 * they pseudo parent.
10388 */
10389 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010390 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010391 while (cur != NULL) {
10392 cur->parent = NULL;
10393 cur = cur->next;
10394 }
10395 newDoc->children->children = NULL;
10396 }
10397 ret = 0;
10398 }
10399 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010400 ctxt->dict = NULL;
10401 ctxt->attsDefault = NULL;
10402 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010403 xmlFreeParserCtxt(ctxt);
10404 newDoc->intSubset = NULL;
10405 newDoc->extSubset = NULL;
10406 xmlFreeDoc(newDoc);
10407
10408 return(ret);
10409}
10410
10411/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010412 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010413 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010414 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010415 * @sax: the SAX handler bloc (possibly NULL)
10416 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10417 * @depth: Used for loop detection, use 0
10418 * @URL: the URL for the entity to load
10419 * @ID: the System ID for the entity to load
10420 * @list: the return value for the set of parsed nodes
10421 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010422 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010423 *
10424 * Returns 0 if the entity is well formed, -1 in case of args problem and
10425 * the parser error code otherwise
10426 */
10427
Daniel Veillard7d515752003-09-26 19:12:37 +000010428static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010429xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10430 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010431 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010432 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010433 xmlParserCtxtPtr ctxt;
10434 xmlDocPtr newDoc;
10435 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010436 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010437 xmlChar start[4];
10438 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010439
10440 if (depth > 40) {
10441 return(XML_ERR_ENTITY_LOOP);
10442 }
10443
10444
10445
10446 if (list != NULL)
10447 *list = NULL;
10448 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010449 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010450 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010451 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010452
10453
10454 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010455 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010457 if (oldctxt != NULL) {
10458 ctxt->_private = oldctxt->_private;
10459 ctxt->loadsubset = oldctxt->loadsubset;
10460 ctxt->validate = oldctxt->validate;
10461 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010462 ctxt->record_info = oldctxt->record_info;
10463 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10464 ctxt->node_seq.length = oldctxt->node_seq.length;
10465 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010466 } else {
10467 /*
10468 * Doing validity checking on chunk without context
10469 * doesn't make sense
10470 */
10471 ctxt->_private = NULL;
10472 ctxt->validate = 0;
10473 ctxt->external = 2;
10474 ctxt->loadsubset = 0;
10475 }
Owen Taylor3473f882001-02-23 17:55:21 +000010476 if (sax != NULL) {
10477 oldsax = ctxt->sax;
10478 ctxt->sax = sax;
10479 if (user_data != NULL)
10480 ctxt->userData = user_data;
10481 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010482 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010483 newDoc = xmlNewDoc(BAD_CAST "1.0");
10484 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010485 ctxt->node_seq.maximum = 0;
10486 ctxt->node_seq.length = 0;
10487 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010488 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010489 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010490 }
10491 if (doc != NULL) {
10492 newDoc->intSubset = doc->intSubset;
10493 newDoc->extSubset = doc->extSubset;
10494 }
10495 if (doc->URL != NULL) {
10496 newDoc->URL = xmlStrdup(doc->URL);
10497 }
10498 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10499 if (newDoc->children == NULL) {
10500 if (sax != NULL)
10501 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010502 ctxt->node_seq.maximum = 0;
10503 ctxt->node_seq.length = 0;
10504 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010505 xmlFreeParserCtxt(ctxt);
10506 newDoc->intSubset = NULL;
10507 newDoc->extSubset = NULL;
10508 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010509 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010510 }
10511 nodePush(ctxt, newDoc->children);
10512 if (doc == NULL) {
10513 ctxt->myDoc = newDoc;
10514 } else {
10515 ctxt->myDoc = doc;
10516 newDoc->children->doc = doc;
10517 }
10518
Daniel Veillard87a764e2001-06-20 17:41:10 +000010519 /*
10520 * Get the 4 first bytes and decode the charset
10521 * if enc != XML_CHAR_ENCODING_NONE
10522 * plug some encoding conversion routines.
10523 */
10524 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010525 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10526 start[0] = RAW;
10527 start[1] = NXT(1);
10528 start[2] = NXT(2);
10529 start[3] = NXT(3);
10530 enc = xmlDetectCharEncoding(start, 4);
10531 if (enc != XML_CHAR_ENCODING_NONE) {
10532 xmlSwitchEncoding(ctxt, enc);
10533 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010534 }
10535
Owen Taylor3473f882001-02-23 17:55:21 +000010536 /*
10537 * Parse a possible text declaration first
10538 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010539 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010540 xmlParseTextDecl(ctxt);
10541 }
10542
Owen Taylor3473f882001-02-23 17:55:21 +000010543 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010544 ctxt->depth = depth;
10545
10546 xmlParseContent(ctxt);
10547
Daniel Veillard561b7f82002-03-20 21:55:57 +000010548 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010549 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010550 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010551 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010552 }
10553 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010554 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010555 }
10556
10557 if (!ctxt->wellFormed) {
10558 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010559 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010560 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010561 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010562 } else {
10563 if (list != NULL) {
10564 xmlNodePtr cur;
10565
10566 /*
10567 * Return the newly created nodeset after unlinking it from
10568 * they pseudo parent.
10569 */
10570 cur = newDoc->children->children;
10571 *list = cur;
10572 while (cur != NULL) {
10573 cur->parent = NULL;
10574 cur = cur->next;
10575 }
10576 newDoc->children->children = NULL;
10577 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010578 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010579 }
10580 if (sax != NULL)
10581 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010582 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10583 oldctxt->node_seq.length = ctxt->node_seq.length;
10584 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010585 ctxt->node_seq.maximum = 0;
10586 ctxt->node_seq.length = 0;
10587 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010588 xmlFreeParserCtxt(ctxt);
10589 newDoc->intSubset = NULL;
10590 newDoc->extSubset = NULL;
10591 xmlFreeDoc(newDoc);
10592
10593 return(ret);
10594}
10595
Daniel Veillard81273902003-09-30 00:43:48 +000010596#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010597/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010598 * xmlParseExternalEntity:
10599 * @doc: the document the chunk pertains to
10600 * @sax: the SAX handler bloc (possibly NULL)
10601 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10602 * @depth: Used for loop detection, use 0
10603 * @URL: the URL for the entity to load
10604 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010605 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010606 *
10607 * Parse an external general entity
10608 * An external general parsed entity is well-formed if it matches the
10609 * production labeled extParsedEnt.
10610 *
10611 * [78] extParsedEnt ::= TextDecl? content
10612 *
10613 * Returns 0 if the entity is well formed, -1 in case of args problem and
10614 * the parser error code otherwise
10615 */
10616
10617int
10618xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010619 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010620 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010621 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010622}
10623
10624/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010625 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010626 * @doc: the document the chunk pertains to
10627 * @sax: the SAX handler bloc (possibly NULL)
10628 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10629 * @depth: Used for loop detection, use 0
10630 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010631 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010632 *
10633 * Parse a well-balanced chunk of an XML document
10634 * called by the parser
10635 * The allowed sequence for the Well Balanced Chunk is the one defined by
10636 * the content production in the XML grammar:
10637 *
10638 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10639 *
10640 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10641 * the parser error code otherwise
10642 */
10643
10644int
10645xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010646 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010647 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10648 depth, string, lst, 0 );
10649}
Daniel Veillard81273902003-09-30 00:43:48 +000010650#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010651
10652/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010653 * xmlParseBalancedChunkMemoryInternal:
10654 * @oldctxt: the existing parsing context
10655 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10656 * @user_data: the user data field for the parser context
10657 * @lst: the return value for the set of parsed nodes
10658 *
10659 *
10660 * Parse a well-balanced chunk of an XML document
10661 * called by the parser
10662 * The allowed sequence for the Well Balanced Chunk is the one defined by
10663 * the content production in the XML grammar:
10664 *
10665 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10666 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010667 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10668 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010669 *
10670 * In case recover is set to 1, the nodelist will not be empty even if
10671 * the parsed chunk is not well balanced.
10672 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010673static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010674xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10675 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10676 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010677 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010678 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010679 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010680 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010681 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010682
10683 if (oldctxt->depth > 40) {
10684 return(XML_ERR_ENTITY_LOOP);
10685 }
10686
10687
10688 if (lst != NULL)
10689 *lst = NULL;
10690 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010691 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010692
10693 size = xmlStrlen(string);
10694
10695 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010696 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010697 if (user_data != NULL)
10698 ctxt->userData = user_data;
10699 else
10700 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010701 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10702 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010703 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10704 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10705 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010706
10707 oldsax = ctxt->sax;
10708 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010709 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010710 ctxt->replaceEntities = oldctxt->replaceEntities;
10711 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010712
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010713 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010714 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010715 newDoc = xmlNewDoc(BAD_CAST "1.0");
10716 if (newDoc == NULL) {
10717 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010718 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010719 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010720 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010721 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010722 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010723 } else {
10724 ctxt->myDoc = oldctxt->myDoc;
10725 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010726 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010727 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010728 BAD_CAST "pseudoroot", NULL);
10729 if (ctxt->myDoc->children == NULL) {
10730 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010731 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010732 xmlFreeParserCtxt(ctxt);
10733 if (newDoc != NULL)
10734 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010735 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010736 }
10737 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010738 ctxt->instate = XML_PARSER_CONTENT;
10739 ctxt->depth = oldctxt->depth + 1;
10740
Daniel Veillard328f48c2002-11-15 15:24:34 +000010741 ctxt->validate = 0;
10742 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010743 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10744 /*
10745 * ID/IDREF registration will be done in xmlValidateElement below
10746 */
10747 ctxt->loadsubset |= XML_SKIP_IDS;
10748 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010749 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010750 ctxt->attsDefault = oldctxt->attsDefault;
10751 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010752
Daniel Veillard68e9e742002-11-16 15:35:11 +000010753 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010754 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010755 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010756 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010757 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010758 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010759 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010760 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010761 }
10762
10763 if (!ctxt->wellFormed) {
10764 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010765 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010766 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010767 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010768 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010769 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010770 }
10771
William M. Brack7b9154b2003-09-27 19:23:50 +000010772 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010773 xmlNodePtr cur;
10774
10775 /*
10776 * Return the newly created nodeset after unlinking it from
10777 * they pseudo parent.
10778 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010779 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010780 *lst = cur;
10781 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010782#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010783 if (oldctxt->validate && oldctxt->wellFormed &&
10784 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10785 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10786 oldctxt->myDoc, cur);
10787 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010788#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010789 cur->parent = NULL;
10790 cur = cur->next;
10791 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010792 ctxt->myDoc->children->children = NULL;
10793 }
10794 if (ctxt->myDoc != NULL) {
10795 xmlFreeNode(ctxt->myDoc->children);
10796 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010797 }
10798
10799 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010800 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010801 ctxt->attsDefault = NULL;
10802 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010803 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010804 if (newDoc != NULL)
10805 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010806
10807 return(ret);
10808}
10809
Daniel Veillard81273902003-09-30 00:43:48 +000010810#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010811/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010812 * xmlParseBalancedChunkMemoryRecover:
10813 * @doc: the document the chunk pertains to
10814 * @sax: the SAX handler bloc (possibly NULL)
10815 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10816 * @depth: Used for loop detection, use 0
10817 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10818 * @lst: the return value for the set of parsed nodes
10819 * @recover: return nodes even if the data is broken (use 0)
10820 *
10821 *
10822 * Parse a well-balanced chunk of an XML document
10823 * called by the parser
10824 * The allowed sequence for the Well Balanced Chunk is the one defined by
10825 * the content production in the XML grammar:
10826 *
10827 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10828 *
10829 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10830 * the parser error code otherwise
10831 *
10832 * In case recover is set to 1, the nodelist will not be empty even if
10833 * the parsed chunk is not well balanced.
10834 */
10835int
10836xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10837 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10838 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010839 xmlParserCtxtPtr ctxt;
10840 xmlDocPtr newDoc;
10841 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010842 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010843 int size;
10844 int ret = 0;
10845
10846 if (depth > 40) {
10847 return(XML_ERR_ENTITY_LOOP);
10848 }
10849
10850
Daniel Veillardcda96922001-08-21 10:56:31 +000010851 if (lst != NULL)
10852 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010853 if (string == NULL)
10854 return(-1);
10855
10856 size = xmlStrlen(string);
10857
10858 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10859 if (ctxt == NULL) return(-1);
10860 ctxt->userData = ctxt;
10861 if (sax != NULL) {
10862 oldsax = ctxt->sax;
10863 ctxt->sax = sax;
10864 if (user_data != NULL)
10865 ctxt->userData = user_data;
10866 }
10867 newDoc = xmlNewDoc(BAD_CAST "1.0");
10868 if (newDoc == NULL) {
10869 xmlFreeParserCtxt(ctxt);
10870 return(-1);
10871 }
10872 if (doc != NULL) {
10873 newDoc->intSubset = doc->intSubset;
10874 newDoc->extSubset = doc->extSubset;
10875 }
10876 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10877 if (newDoc->children == NULL) {
10878 if (sax != NULL)
10879 ctxt->sax = oldsax;
10880 xmlFreeParserCtxt(ctxt);
10881 newDoc->intSubset = NULL;
10882 newDoc->extSubset = NULL;
10883 xmlFreeDoc(newDoc);
10884 return(-1);
10885 }
10886 nodePush(ctxt, newDoc->children);
10887 if (doc == NULL) {
10888 ctxt->myDoc = newDoc;
10889 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010890 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010891 newDoc->children->doc = doc;
10892 }
10893 ctxt->instate = XML_PARSER_CONTENT;
10894 ctxt->depth = depth;
10895
10896 /*
10897 * Doing validity checking on chunk doesn't make sense
10898 */
10899 ctxt->validate = 0;
10900 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010901 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010902
Daniel Veillardb39bc392002-10-26 19:29:51 +000010903 if ( doc != NULL ){
10904 content = doc->children;
10905 doc->children = NULL;
10906 xmlParseContent(ctxt);
10907 doc->children = content;
10908 }
10909 else {
10910 xmlParseContent(ctxt);
10911 }
Owen Taylor3473f882001-02-23 17:55:21 +000010912 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010913 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010914 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010915 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010916 }
10917 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010918 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010919 }
10920
10921 if (!ctxt->wellFormed) {
10922 if (ctxt->errNo == 0)
10923 ret = 1;
10924 else
10925 ret = ctxt->errNo;
10926 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010927 ret = 0;
10928 }
10929
10930 if (lst != NULL && (ret == 0 || recover == 1)) {
10931 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010932
10933 /*
10934 * Return the newly created nodeset after unlinking it from
10935 * they pseudo parent.
10936 */
10937 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010938 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010939 while (cur != NULL) {
10940 cur->parent = NULL;
10941 cur = cur->next;
10942 }
10943 newDoc->children->children = NULL;
10944 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010945
Owen Taylor3473f882001-02-23 17:55:21 +000010946 if (sax != NULL)
10947 ctxt->sax = oldsax;
10948 xmlFreeParserCtxt(ctxt);
10949 newDoc->intSubset = NULL;
10950 newDoc->extSubset = NULL;
10951 xmlFreeDoc(newDoc);
10952
10953 return(ret);
10954}
10955
10956/**
10957 * xmlSAXParseEntity:
10958 * @sax: the SAX handler block
10959 * @filename: the filename
10960 *
10961 * parse an XML external entity out of context and build a tree.
10962 * It use the given SAX function block to handle the parsing callback.
10963 * If sax is NULL, fallback to the default DOM tree building routines.
10964 *
10965 * [78] extParsedEnt ::= TextDecl? content
10966 *
10967 * This correspond to a "Well Balanced" chunk
10968 *
10969 * Returns the resulting document tree
10970 */
10971
10972xmlDocPtr
10973xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10974 xmlDocPtr ret;
10975 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010976
10977 ctxt = xmlCreateFileParserCtxt(filename);
10978 if (ctxt == NULL) {
10979 return(NULL);
10980 }
10981 if (sax != NULL) {
10982 if (ctxt->sax != NULL)
10983 xmlFree(ctxt->sax);
10984 ctxt->sax = sax;
10985 ctxt->userData = NULL;
10986 }
10987
Owen Taylor3473f882001-02-23 17:55:21 +000010988 xmlParseExtParsedEnt(ctxt);
10989
10990 if (ctxt->wellFormed)
10991 ret = ctxt->myDoc;
10992 else {
10993 ret = NULL;
10994 xmlFreeDoc(ctxt->myDoc);
10995 ctxt->myDoc = NULL;
10996 }
10997 if (sax != NULL)
10998 ctxt->sax = NULL;
10999 xmlFreeParserCtxt(ctxt);
11000
11001 return(ret);
11002}
11003
11004/**
11005 * xmlParseEntity:
11006 * @filename: the filename
11007 *
11008 * parse an XML external entity out of context and build a tree.
11009 *
11010 * [78] extParsedEnt ::= TextDecl? content
11011 *
11012 * This correspond to a "Well Balanced" chunk
11013 *
11014 * Returns the resulting document tree
11015 */
11016
11017xmlDocPtr
11018xmlParseEntity(const char *filename) {
11019 return(xmlSAXParseEntity(NULL, filename));
11020}
Daniel Veillard81273902003-09-30 00:43:48 +000011021#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011022
11023/**
11024 * xmlCreateEntityParserCtxt:
11025 * @URL: the entity URL
11026 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011027 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011028 *
11029 * Create a parser context for an external entity
11030 * Automatic support for ZLIB/Compress compressed document is provided
11031 * by default if found at compile-time.
11032 *
11033 * Returns the new parser context or NULL
11034 */
11035xmlParserCtxtPtr
11036xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11037 const xmlChar *base) {
11038 xmlParserCtxtPtr ctxt;
11039 xmlParserInputPtr inputStream;
11040 char *directory = NULL;
11041 xmlChar *uri;
11042
11043 ctxt = xmlNewParserCtxt();
11044 if (ctxt == NULL) {
11045 return(NULL);
11046 }
11047
11048 uri = xmlBuildURI(URL, base);
11049
11050 if (uri == NULL) {
11051 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11052 if (inputStream == NULL) {
11053 xmlFreeParserCtxt(ctxt);
11054 return(NULL);
11055 }
11056
11057 inputPush(ctxt, inputStream);
11058
11059 if ((ctxt->directory == NULL) && (directory == NULL))
11060 directory = xmlParserGetDirectory((char *)URL);
11061 if ((ctxt->directory == NULL) && (directory != NULL))
11062 ctxt->directory = directory;
11063 } else {
11064 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11065 if (inputStream == NULL) {
11066 xmlFree(uri);
11067 xmlFreeParserCtxt(ctxt);
11068 return(NULL);
11069 }
11070
11071 inputPush(ctxt, inputStream);
11072
11073 if ((ctxt->directory == NULL) && (directory == NULL))
11074 directory = xmlParserGetDirectory((char *)uri);
11075 if ((ctxt->directory == NULL) && (directory != NULL))
11076 ctxt->directory = directory;
11077 xmlFree(uri);
11078 }
Owen Taylor3473f882001-02-23 17:55:21 +000011079 return(ctxt);
11080}
11081
11082/************************************************************************
11083 * *
11084 * Front ends when parsing from a file *
11085 * *
11086 ************************************************************************/
11087
11088/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011089 * xmlCreateURLParserCtxt:
11090 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011091 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011092 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011093 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011094 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011095 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011096 *
11097 * Returns the new parser context or NULL
11098 */
11099xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011100xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011101{
11102 xmlParserCtxtPtr ctxt;
11103 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011104 char *directory = NULL;
11105
Owen Taylor3473f882001-02-23 17:55:21 +000011106 ctxt = xmlNewParserCtxt();
11107 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011108 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011109 return(NULL);
11110 }
11111
Daniel Veillard61b93382003-11-03 14:28:31 +000011112 if (options != 0)
11113 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011114
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011115 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011116 if (inputStream == NULL) {
11117 xmlFreeParserCtxt(ctxt);
11118 return(NULL);
11119 }
11120
Owen Taylor3473f882001-02-23 17:55:21 +000011121 inputPush(ctxt, inputStream);
11122 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011123 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011124 if ((ctxt->directory == NULL) && (directory != NULL))
11125 ctxt->directory = directory;
11126
11127 return(ctxt);
11128}
11129
Daniel Veillard61b93382003-11-03 14:28:31 +000011130/**
11131 * xmlCreateFileParserCtxt:
11132 * @filename: the filename
11133 *
11134 * Create a parser context for a file content.
11135 * Automatic support for ZLIB/Compress compressed document is provided
11136 * by default if found at compile-time.
11137 *
11138 * Returns the new parser context or NULL
11139 */
11140xmlParserCtxtPtr
11141xmlCreateFileParserCtxt(const char *filename)
11142{
11143 return(xmlCreateURLParserCtxt(filename, 0));
11144}
11145
Daniel Veillard81273902003-09-30 00:43:48 +000011146#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011147/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011148 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011149 * @sax: the SAX handler block
11150 * @filename: the filename
11151 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11152 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011153 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011154 *
11155 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11156 * compressed document is provided by default if found at compile-time.
11157 * It use the given SAX function block to handle the parsing callback.
11158 * If sax is NULL, fallback to the default DOM tree building routines.
11159 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011160 * User data (void *) is stored within the parser context in the
11161 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011162 *
Owen Taylor3473f882001-02-23 17:55:21 +000011163 * Returns the resulting document tree
11164 */
11165
11166xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011167xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11168 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011169 xmlDocPtr ret;
11170 xmlParserCtxtPtr ctxt;
11171 char *directory = NULL;
11172
Daniel Veillard635ef722001-10-29 11:48:19 +000011173 xmlInitParser();
11174
Owen Taylor3473f882001-02-23 17:55:21 +000011175 ctxt = xmlCreateFileParserCtxt(filename);
11176 if (ctxt == NULL) {
11177 return(NULL);
11178 }
11179 if (sax != NULL) {
11180 if (ctxt->sax != NULL)
11181 xmlFree(ctxt->sax);
11182 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011183 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011184 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011185 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011186 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011187 }
Owen Taylor3473f882001-02-23 17:55:21 +000011188
11189 if ((ctxt->directory == NULL) && (directory == NULL))
11190 directory = xmlParserGetDirectory(filename);
11191 if ((ctxt->directory == NULL) && (directory != NULL))
11192 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11193
Daniel Veillarddad3f682002-11-17 16:47:27 +000011194 ctxt->recovery = recovery;
11195
Owen Taylor3473f882001-02-23 17:55:21 +000011196 xmlParseDocument(ctxt);
11197
William M. Brackc07329e2003-09-08 01:57:30 +000011198 if ((ctxt->wellFormed) || recovery) {
11199 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011200 if (ret != NULL) {
11201 if (ctxt->input->buf->compressed > 0)
11202 ret->compression = 9;
11203 else
11204 ret->compression = ctxt->input->buf->compressed;
11205 }
William M. Brackc07329e2003-09-08 01:57:30 +000011206 }
Owen Taylor3473f882001-02-23 17:55:21 +000011207 else {
11208 ret = NULL;
11209 xmlFreeDoc(ctxt->myDoc);
11210 ctxt->myDoc = NULL;
11211 }
11212 if (sax != NULL)
11213 ctxt->sax = NULL;
11214 xmlFreeParserCtxt(ctxt);
11215
11216 return(ret);
11217}
11218
11219/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011220 * xmlSAXParseFile:
11221 * @sax: the SAX handler block
11222 * @filename: the filename
11223 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11224 * documents
11225 *
11226 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11227 * compressed document is provided by default if found at compile-time.
11228 * It use the given SAX function block to handle the parsing callback.
11229 * If sax is NULL, fallback to the default DOM tree building routines.
11230 *
11231 * Returns the resulting document tree
11232 */
11233
11234xmlDocPtr
11235xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11236 int recovery) {
11237 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11238}
11239
11240/**
Owen Taylor3473f882001-02-23 17:55:21 +000011241 * xmlRecoverDoc:
11242 * @cur: a pointer to an array of xmlChar
11243 *
11244 * parse an XML in-memory document and build a tree.
11245 * In the case the document is not Well Formed, a tree is built anyway
11246 *
11247 * Returns the resulting document tree
11248 */
11249
11250xmlDocPtr
11251xmlRecoverDoc(xmlChar *cur) {
11252 return(xmlSAXParseDoc(NULL, cur, 1));
11253}
11254
11255/**
11256 * xmlParseFile:
11257 * @filename: the filename
11258 *
11259 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11260 * compressed document is provided by default if found at compile-time.
11261 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011262 * Returns the resulting document tree if the file was wellformed,
11263 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011264 */
11265
11266xmlDocPtr
11267xmlParseFile(const char *filename) {
11268 return(xmlSAXParseFile(NULL, filename, 0));
11269}
11270
11271/**
11272 * xmlRecoverFile:
11273 * @filename: the filename
11274 *
11275 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11276 * compressed document is provided by default if found at compile-time.
11277 * In the case the document is not Well Formed, a tree is built anyway
11278 *
11279 * Returns the resulting document tree
11280 */
11281
11282xmlDocPtr
11283xmlRecoverFile(const char *filename) {
11284 return(xmlSAXParseFile(NULL, filename, 1));
11285}
11286
11287
11288/**
11289 * xmlSetupParserForBuffer:
11290 * @ctxt: an XML parser context
11291 * @buffer: a xmlChar * buffer
11292 * @filename: a file name
11293 *
11294 * Setup the parser context to parse a new buffer; Clears any prior
11295 * contents from the parser context. The buffer parameter must not be
11296 * NULL, but the filename parameter can be
11297 */
11298void
11299xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11300 const char* filename)
11301{
11302 xmlParserInputPtr input;
11303
11304 input = xmlNewInputStream(ctxt);
11305 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011306 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011307 xmlFree(ctxt);
11308 return;
11309 }
11310
11311 xmlClearParserCtxt(ctxt);
11312 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011313 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011314 input->base = buffer;
11315 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011316 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011317 inputPush(ctxt, input);
11318}
11319
11320/**
11321 * xmlSAXUserParseFile:
11322 * @sax: a SAX handler
11323 * @user_data: The user data returned on SAX callbacks
11324 * @filename: a file name
11325 *
11326 * parse an XML file and call the given SAX handler routines.
11327 * Automatic support for ZLIB/Compress compressed document is provided
11328 *
11329 * Returns 0 in case of success or a error number otherwise
11330 */
11331int
11332xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11333 const char *filename) {
11334 int ret = 0;
11335 xmlParserCtxtPtr ctxt;
11336
11337 ctxt = xmlCreateFileParserCtxt(filename);
11338 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011339#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011340 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011341#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011342 xmlFree(ctxt->sax);
11343 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011344 xmlDetectSAX2(ctxt);
11345
Owen Taylor3473f882001-02-23 17:55:21 +000011346 if (user_data != NULL)
11347 ctxt->userData = user_data;
11348
11349 xmlParseDocument(ctxt);
11350
11351 if (ctxt->wellFormed)
11352 ret = 0;
11353 else {
11354 if (ctxt->errNo != 0)
11355 ret = ctxt->errNo;
11356 else
11357 ret = -1;
11358 }
11359 if (sax != NULL)
11360 ctxt->sax = NULL;
11361 xmlFreeParserCtxt(ctxt);
11362
11363 return ret;
11364}
Daniel Veillard81273902003-09-30 00:43:48 +000011365#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011366
11367/************************************************************************
11368 * *
11369 * Front ends when parsing from memory *
11370 * *
11371 ************************************************************************/
11372
11373/**
11374 * xmlCreateMemoryParserCtxt:
11375 * @buffer: a pointer to a char array
11376 * @size: the size of the array
11377 *
11378 * Create a parser context for an XML in-memory document.
11379 *
11380 * Returns the new parser context or NULL
11381 */
11382xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011383xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011384 xmlParserCtxtPtr ctxt;
11385 xmlParserInputPtr input;
11386 xmlParserInputBufferPtr buf;
11387
11388 if (buffer == NULL)
11389 return(NULL);
11390 if (size <= 0)
11391 return(NULL);
11392
11393 ctxt = xmlNewParserCtxt();
11394 if (ctxt == NULL)
11395 return(NULL);
11396
Daniel Veillard53350552003-09-18 13:35:51 +000011397 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011398 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011399 if (buf == NULL) {
11400 xmlFreeParserCtxt(ctxt);
11401 return(NULL);
11402 }
Owen Taylor3473f882001-02-23 17:55:21 +000011403
11404 input = xmlNewInputStream(ctxt);
11405 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011406 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011407 xmlFreeParserCtxt(ctxt);
11408 return(NULL);
11409 }
11410
11411 input->filename = NULL;
11412 input->buf = buf;
11413 input->base = input->buf->buffer->content;
11414 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011415 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011416
11417 inputPush(ctxt, input);
11418 return(ctxt);
11419}
11420
Daniel Veillard81273902003-09-30 00:43:48 +000011421#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011422/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011423 * xmlSAXParseMemoryWithData:
11424 * @sax: the SAX handler block
11425 * @buffer: an pointer to a char array
11426 * @size: the size of the array
11427 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11428 * documents
11429 * @data: the userdata
11430 *
11431 * parse an XML in-memory block and use the given SAX function block
11432 * to handle the parsing callback. If sax is NULL, fallback to the default
11433 * DOM tree building routines.
11434 *
11435 * User data (void *) is stored within the parser context in the
11436 * context's _private member, so it is available nearly everywhere in libxml
11437 *
11438 * Returns the resulting document tree
11439 */
11440
11441xmlDocPtr
11442xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11443 int size, int recovery, void *data) {
11444 xmlDocPtr ret;
11445 xmlParserCtxtPtr ctxt;
11446
11447 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11448 if (ctxt == NULL) return(NULL);
11449 if (sax != NULL) {
11450 if (ctxt->sax != NULL)
11451 xmlFree(ctxt->sax);
11452 ctxt->sax = sax;
11453 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011454 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011455 if (data!=NULL) {
11456 ctxt->_private=data;
11457 }
11458
Daniel Veillardadba5f12003-04-04 16:09:01 +000011459 ctxt->recovery = recovery;
11460
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011461 xmlParseDocument(ctxt);
11462
11463 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11464 else {
11465 ret = NULL;
11466 xmlFreeDoc(ctxt->myDoc);
11467 ctxt->myDoc = NULL;
11468 }
11469 if (sax != NULL)
11470 ctxt->sax = NULL;
11471 xmlFreeParserCtxt(ctxt);
11472
11473 return(ret);
11474}
11475
11476/**
Owen Taylor3473f882001-02-23 17:55:21 +000011477 * xmlSAXParseMemory:
11478 * @sax: the SAX handler block
11479 * @buffer: an pointer to a char array
11480 * @size: the size of the array
11481 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11482 * documents
11483 *
11484 * parse an XML in-memory block and use the given SAX function block
11485 * to handle the parsing callback. If sax is NULL, fallback to the default
11486 * DOM tree building routines.
11487 *
11488 * Returns the resulting document tree
11489 */
11490xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011491xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11492 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011493 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011494}
11495
11496/**
11497 * xmlParseMemory:
11498 * @buffer: an pointer to a char array
11499 * @size: the size of the array
11500 *
11501 * parse an XML in-memory block and build a tree.
11502 *
11503 * Returns the resulting document tree
11504 */
11505
Daniel Veillard50822cb2001-07-26 20:05:51 +000011506xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011507 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11508}
11509
11510/**
11511 * xmlRecoverMemory:
11512 * @buffer: an pointer to a char array
11513 * @size: the size of the array
11514 *
11515 * parse an XML in-memory block and build a tree.
11516 * In the case the document is not Well Formed, a tree is built anyway
11517 *
11518 * Returns the resulting document tree
11519 */
11520
Daniel Veillard50822cb2001-07-26 20:05:51 +000011521xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011522 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11523}
11524
11525/**
11526 * xmlSAXUserParseMemory:
11527 * @sax: a SAX handler
11528 * @user_data: The user data returned on SAX callbacks
11529 * @buffer: an in-memory XML document input
11530 * @size: the length of the XML document in bytes
11531 *
11532 * A better SAX parsing routine.
11533 * parse an XML in-memory buffer and call the given SAX handler routines.
11534 *
11535 * Returns 0 in case of success or a error number otherwise
11536 */
11537int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011538 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011539 int ret = 0;
11540 xmlParserCtxtPtr ctxt;
11541 xmlSAXHandlerPtr oldsax = NULL;
11542
Daniel Veillard9e923512002-08-14 08:48:52 +000011543 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011544 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11545 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011546 oldsax = ctxt->sax;
11547 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011548 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011549 if (user_data != NULL)
11550 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011551
11552 xmlParseDocument(ctxt);
11553
11554 if (ctxt->wellFormed)
11555 ret = 0;
11556 else {
11557 if (ctxt->errNo != 0)
11558 ret = ctxt->errNo;
11559 else
11560 ret = -1;
11561 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011562 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011563 xmlFreeParserCtxt(ctxt);
11564
11565 return ret;
11566}
Daniel Veillard81273902003-09-30 00:43:48 +000011567#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011568
11569/**
11570 * xmlCreateDocParserCtxt:
11571 * @cur: a pointer to an array of xmlChar
11572 *
11573 * Creates a parser context for an XML in-memory document.
11574 *
11575 * Returns the new parser context or NULL
11576 */
11577xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011578xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011579 int len;
11580
11581 if (cur == NULL)
11582 return(NULL);
11583 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011584 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011585}
11586
Daniel Veillard81273902003-09-30 00:43:48 +000011587#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011588/**
11589 * xmlSAXParseDoc:
11590 * @sax: the SAX handler block
11591 * @cur: a pointer to an array of xmlChar
11592 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11593 * documents
11594 *
11595 * parse an XML in-memory document and build a tree.
11596 * It use the given SAX function block to handle the parsing callback.
11597 * If sax is NULL, fallback to the default DOM tree building routines.
11598 *
11599 * Returns the resulting document tree
11600 */
11601
11602xmlDocPtr
11603xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11604 xmlDocPtr ret;
11605 xmlParserCtxtPtr ctxt;
11606
11607 if (cur == NULL) return(NULL);
11608
11609
11610 ctxt = xmlCreateDocParserCtxt(cur);
11611 if (ctxt == NULL) return(NULL);
11612 if (sax != NULL) {
11613 ctxt->sax = sax;
11614 ctxt->userData = NULL;
11615 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011616 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011617
11618 xmlParseDocument(ctxt);
11619 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11620 else {
11621 ret = NULL;
11622 xmlFreeDoc(ctxt->myDoc);
11623 ctxt->myDoc = NULL;
11624 }
11625 if (sax != NULL)
11626 ctxt->sax = NULL;
11627 xmlFreeParserCtxt(ctxt);
11628
11629 return(ret);
11630}
11631
11632/**
11633 * xmlParseDoc:
11634 * @cur: a pointer to an array of xmlChar
11635 *
11636 * parse an XML in-memory document and build a tree.
11637 *
11638 * Returns the resulting document tree
11639 */
11640
11641xmlDocPtr
11642xmlParseDoc(xmlChar *cur) {
11643 return(xmlSAXParseDoc(NULL, cur, 0));
11644}
Daniel Veillard81273902003-09-30 00:43:48 +000011645#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011646
Daniel Veillard81273902003-09-30 00:43:48 +000011647#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011648/************************************************************************
11649 * *
11650 * Specific function to keep track of entities references *
11651 * and used by the XSLT debugger *
11652 * *
11653 ************************************************************************/
11654
11655static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11656
11657/**
11658 * xmlAddEntityReference:
11659 * @ent : A valid entity
11660 * @firstNode : A valid first node for children of entity
11661 * @lastNode : A valid last node of children entity
11662 *
11663 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11664 */
11665static void
11666xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11667 xmlNodePtr lastNode)
11668{
11669 if (xmlEntityRefFunc != NULL) {
11670 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11671 }
11672}
11673
11674
11675/**
11676 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011677 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011678 *
11679 * Set the function to call call back when a xml reference has been made
11680 */
11681void
11682xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11683{
11684 xmlEntityRefFunc = func;
11685}
Daniel Veillard81273902003-09-30 00:43:48 +000011686#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011687
11688/************************************************************************
11689 * *
11690 * Miscellaneous *
11691 * *
11692 ************************************************************************/
11693
11694#ifdef LIBXML_XPATH_ENABLED
11695#include <libxml/xpath.h>
11696#endif
11697
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011698extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011699static int xmlParserInitialized = 0;
11700
11701/**
11702 * xmlInitParser:
11703 *
11704 * Initialization function for the XML parser.
11705 * This is not reentrant. Call once before processing in case of
11706 * use in multithreaded programs.
11707 */
11708
11709void
11710xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011711 if (xmlParserInitialized != 0)
11712 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011713
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011714 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11715 (xmlGenericError == NULL))
11716 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011717 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011718 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011719 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011720 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011721 xmlDefaultSAXHandlerInit();
11722 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011723#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011724 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011725#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011726#ifdef LIBXML_HTML_ENABLED
11727 htmlInitAutoClose();
11728 htmlDefaultSAXHandlerInit();
11729#endif
11730#ifdef LIBXML_XPATH_ENABLED
11731 xmlXPathInit();
11732#endif
11733 xmlParserInitialized = 1;
11734}
11735
11736/**
11737 * xmlCleanupParser:
11738 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011739 * Cleanup function for the XML library. It tries to reclaim all
11740 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011741 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011742 * function should not prevent reusing the library but one should
11743 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011744 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011745 */
11746
11747void
11748xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011749 if (!xmlParserInitialized)
11750 return;
11751
Owen Taylor3473f882001-02-23 17:55:21 +000011752 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011753#ifdef LIBXML_CATALOG_ENABLED
11754 xmlCatalogCleanup();
11755#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011756 xmlCleanupInputCallbacks();
11757#ifdef LIBXML_OUTPUT_ENABLED
11758 xmlCleanupOutputCallbacks();
11759#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011760 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011761 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011762 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011763 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011764 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011765}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011766
11767/************************************************************************
11768 * *
11769 * New set (2.6.0) of simpler and more flexible APIs *
11770 * *
11771 ************************************************************************/
11772
11773/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011774 * DICT_FREE:
11775 * @str: a string
11776 *
11777 * Free a string if it is not owned by the "dict" dictionnary in the
11778 * current scope
11779 */
11780#define DICT_FREE(str) \
11781 if ((str) && ((!dict) || \
11782 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11783 xmlFree((char *)(str));
11784
11785/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011786 * xmlCtxtReset:
11787 * @ctxt: an XML parser context
11788 *
11789 * Reset a parser context
11790 */
11791void
11792xmlCtxtReset(xmlParserCtxtPtr ctxt)
11793{
11794 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011795 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011796
11797 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11798 xmlFreeInputStream(input);
11799 }
11800 ctxt->inputNr = 0;
11801 ctxt->input = NULL;
11802
11803 ctxt->spaceNr = 0;
11804 ctxt->spaceTab[0] = -1;
11805 ctxt->space = &ctxt->spaceTab[0];
11806
11807
11808 ctxt->nodeNr = 0;
11809 ctxt->node = NULL;
11810
11811 ctxt->nameNr = 0;
11812 ctxt->name = NULL;
11813
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011814 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011815 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011816 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011817 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011818 DICT_FREE(ctxt->directory);
11819 ctxt->directory = NULL;
11820 DICT_FREE(ctxt->extSubURI);
11821 ctxt->extSubURI = NULL;
11822 DICT_FREE(ctxt->extSubSystem);
11823 ctxt->extSubSystem = NULL;
11824 if (ctxt->myDoc != NULL)
11825 xmlFreeDoc(ctxt->myDoc);
11826 ctxt->myDoc = NULL;
11827
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011828 ctxt->standalone = -1;
11829 ctxt->hasExternalSubset = 0;
11830 ctxt->hasPErefs = 0;
11831 ctxt->html = 0;
11832 ctxt->external = 0;
11833 ctxt->instate = XML_PARSER_START;
11834 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011835
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011836 ctxt->wellFormed = 1;
11837 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011838 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011839 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011840#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011841 ctxt->vctxt.userData = ctxt;
11842 ctxt->vctxt.error = xmlParserValidityError;
11843 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011844#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011845 ctxt->record_info = 0;
11846 ctxt->nbChars = 0;
11847 ctxt->checkIndex = 0;
11848 ctxt->inSubset = 0;
11849 ctxt->errNo = XML_ERR_OK;
11850 ctxt->depth = 0;
11851 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11852 ctxt->catalogs = NULL;
11853 xmlInitNodeInfoSeq(&ctxt->node_seq);
11854
11855 if (ctxt->attsDefault != NULL) {
11856 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11857 ctxt->attsDefault = NULL;
11858 }
11859 if (ctxt->attsSpecial != NULL) {
11860 xmlHashFree(ctxt->attsSpecial, NULL);
11861 ctxt->attsSpecial = NULL;
11862 }
11863
Daniel Veillard4432df22003-09-28 18:58:27 +000011864#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011865 if (ctxt->catalogs != NULL)
11866 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011867#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011868 if (ctxt->lastError.code != XML_ERR_OK)
11869 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011870}
11871
11872/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011873 * xmlCtxtResetPush:
11874 * @ctxt: an XML parser context
11875 * @chunk: a pointer to an array of chars
11876 * @size: number of chars in the array
11877 * @filename: an optional file name or URI
11878 * @encoding: the document encoding, or NULL
11879 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011880 * Reset a push parser context
11881 *
11882 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011883 */
11884int
11885xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11886 int size, const char *filename, const char *encoding)
11887{
11888 xmlParserInputPtr inputStream;
11889 xmlParserInputBufferPtr buf;
11890 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11891
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011892 if (ctxt == NULL)
11893 return(1);
11894
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011895 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11896 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11897
11898 buf = xmlAllocParserInputBuffer(enc);
11899 if (buf == NULL)
11900 return(1);
11901
11902 if (ctxt == NULL) {
11903 xmlFreeParserInputBuffer(buf);
11904 return(1);
11905 }
11906
11907 xmlCtxtReset(ctxt);
11908
11909 if (ctxt->pushTab == NULL) {
11910 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11911 sizeof(xmlChar *));
11912 if (ctxt->pushTab == NULL) {
11913 xmlErrMemory(ctxt, NULL);
11914 xmlFreeParserInputBuffer(buf);
11915 return(1);
11916 }
11917 }
11918
11919 if (filename == NULL) {
11920 ctxt->directory = NULL;
11921 } else {
11922 ctxt->directory = xmlParserGetDirectory(filename);
11923 }
11924
11925 inputStream = xmlNewInputStream(ctxt);
11926 if (inputStream == NULL) {
11927 xmlFreeParserInputBuffer(buf);
11928 return(1);
11929 }
11930
11931 if (filename == NULL)
11932 inputStream->filename = NULL;
11933 else
11934 inputStream->filename = (char *)
11935 xmlCanonicPath((const xmlChar *) filename);
11936 inputStream->buf = buf;
11937 inputStream->base = inputStream->buf->buffer->content;
11938 inputStream->cur = inputStream->buf->buffer->content;
11939 inputStream->end =
11940 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11941
11942 inputPush(ctxt, inputStream);
11943
11944 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11945 (ctxt->input->buf != NULL)) {
11946 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11947 int cur = ctxt->input->cur - ctxt->input->base;
11948
11949 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11950
11951 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11952 ctxt->input->cur = ctxt->input->base + cur;
11953 ctxt->input->end =
11954 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11955 use];
11956#ifdef DEBUG_PUSH
11957 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11958#endif
11959 }
11960
11961 if (encoding != NULL) {
11962 xmlCharEncodingHandlerPtr hdlr;
11963
11964 hdlr = xmlFindCharEncodingHandler(encoding);
11965 if (hdlr != NULL) {
11966 xmlSwitchToEncoding(ctxt, hdlr);
11967 } else {
11968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11969 "Unsupported encoding %s\n", BAD_CAST encoding);
11970 }
11971 } else if (enc != XML_CHAR_ENCODING_NONE) {
11972 xmlSwitchEncoding(ctxt, enc);
11973 }
11974
11975 return(0);
11976}
11977
11978/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011979 * xmlCtxtUseOptions:
11980 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011981 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011982 *
11983 * Applies the options to the parser context
11984 *
11985 * Returns 0 in case of success, the set of unknown or unimplemented options
11986 * in case of error.
11987 */
11988int
11989xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11990{
11991 if (options & XML_PARSE_RECOVER) {
11992 ctxt->recovery = 1;
11993 options -= XML_PARSE_RECOVER;
11994 } else
11995 ctxt->recovery = 0;
11996 if (options & XML_PARSE_DTDLOAD) {
11997 ctxt->loadsubset = XML_DETECT_IDS;
11998 options -= XML_PARSE_DTDLOAD;
11999 } else
12000 ctxt->loadsubset = 0;
12001 if (options & XML_PARSE_DTDATTR) {
12002 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12003 options -= XML_PARSE_DTDATTR;
12004 }
12005 if (options & XML_PARSE_NOENT) {
12006 ctxt->replaceEntities = 1;
12007 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12008 options -= XML_PARSE_NOENT;
12009 } else
12010 ctxt->replaceEntities = 0;
12011 if (options & XML_PARSE_NOWARNING) {
12012 ctxt->sax->warning = NULL;
12013 options -= XML_PARSE_NOWARNING;
12014 }
12015 if (options & XML_PARSE_NOERROR) {
12016 ctxt->sax->error = NULL;
12017 ctxt->sax->fatalError = NULL;
12018 options -= XML_PARSE_NOERROR;
12019 }
12020 if (options & XML_PARSE_PEDANTIC) {
12021 ctxt->pedantic = 1;
12022 options -= XML_PARSE_PEDANTIC;
12023 } else
12024 ctxt->pedantic = 0;
12025 if (options & XML_PARSE_NOBLANKS) {
12026 ctxt->keepBlanks = 0;
12027 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12028 options -= XML_PARSE_NOBLANKS;
12029 } else
12030 ctxt->keepBlanks = 1;
12031 if (options & XML_PARSE_DTDVALID) {
12032 ctxt->validate = 1;
12033 if (options & XML_PARSE_NOWARNING)
12034 ctxt->vctxt.warning = NULL;
12035 if (options & XML_PARSE_NOERROR)
12036 ctxt->vctxt.error = NULL;
12037 options -= XML_PARSE_DTDVALID;
12038 } else
12039 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012040#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012041 if (options & XML_PARSE_SAX1) {
12042 ctxt->sax->startElement = xmlSAX2StartElement;
12043 ctxt->sax->endElement = xmlSAX2EndElement;
12044 ctxt->sax->startElementNs = NULL;
12045 ctxt->sax->endElementNs = NULL;
12046 ctxt->sax->initialized = 1;
12047 options -= XML_PARSE_SAX1;
12048 }
Daniel Veillard81273902003-09-30 00:43:48 +000012049#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012050 if (options & XML_PARSE_NODICT) {
12051 ctxt->dictNames = 0;
12052 options -= XML_PARSE_NODICT;
12053 } else {
12054 ctxt->dictNames = 1;
12055 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012056 if (options & XML_PARSE_NOCDATA) {
12057 ctxt->sax->cdataBlock = NULL;
12058 options -= XML_PARSE_NOCDATA;
12059 }
12060 if (options & XML_PARSE_NSCLEAN) {
12061 ctxt->options |= XML_PARSE_NSCLEAN;
12062 options -= XML_PARSE_NSCLEAN;
12063 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012064 if (options & XML_PARSE_NONET) {
12065 ctxt->options |= XML_PARSE_NONET;
12066 options -= XML_PARSE_NONET;
12067 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012068 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012069 return (options);
12070}
12071
12072/**
12073 * xmlDoRead:
12074 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012075 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012076 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012077 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012078 * @reuse: keep the context for reuse
12079 *
12080 * Common front-end for the xmlRead functions
12081 *
12082 * Returns the resulting document tree or NULL
12083 */
12084static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012085xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12086 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012087{
12088 xmlDocPtr ret;
12089
12090 xmlCtxtUseOptions(ctxt, options);
12091 if (encoding != NULL) {
12092 xmlCharEncodingHandlerPtr hdlr;
12093
12094 hdlr = xmlFindCharEncodingHandler(encoding);
12095 if (hdlr != NULL)
12096 xmlSwitchToEncoding(ctxt, hdlr);
12097 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012098 if ((URL != NULL) && (ctxt->input != NULL) &&
12099 (ctxt->input->filename == NULL))
12100 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012101 xmlParseDocument(ctxt);
12102 if ((ctxt->wellFormed) || ctxt->recovery)
12103 ret = ctxt->myDoc;
12104 else {
12105 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012106 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012107 xmlFreeDoc(ctxt->myDoc);
12108 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012109 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012110 ctxt->myDoc = NULL;
12111 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012113 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012114
12115 return (ret);
12116}
12117
12118/**
12119 * xmlReadDoc:
12120 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012121 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012122 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012123 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012124 *
12125 * parse an XML in-memory document and build a tree.
12126 *
12127 * Returns the resulting document tree
12128 */
12129xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012130xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012131{
12132 xmlParserCtxtPtr ctxt;
12133
12134 if (cur == NULL)
12135 return (NULL);
12136
12137 ctxt = xmlCreateDocParserCtxt(cur);
12138 if (ctxt == NULL)
12139 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012140 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012141}
12142
12143/**
12144 * xmlReadFile:
12145 * @filename: a file or URL
12146 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012147 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012148 *
12149 * parse an XML file from the filesystem or the network.
12150 *
12151 * Returns the resulting document tree
12152 */
12153xmlDocPtr
12154xmlReadFile(const char *filename, const char *encoding, int options)
12155{
12156 xmlParserCtxtPtr ctxt;
12157
Daniel Veillard61b93382003-11-03 14:28:31 +000012158 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012159 if (ctxt == NULL)
12160 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012161 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012162}
12163
12164/**
12165 * xmlReadMemory:
12166 * @buffer: a pointer to a char array
12167 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012168 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012169 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012170 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012171 *
12172 * parse an XML in-memory document and build a tree.
12173 *
12174 * Returns the resulting document tree
12175 */
12176xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012177xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012178{
12179 xmlParserCtxtPtr ctxt;
12180
12181 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12182 if (ctxt == NULL)
12183 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012184 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012185}
12186
12187/**
12188 * xmlReadFd:
12189 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012190 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012191 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012192 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012193 *
12194 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012195 * NOTE that the file descriptor will not be closed when the
12196 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012197 *
12198 * Returns the resulting document tree
12199 */
12200xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012201xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012202{
12203 xmlParserCtxtPtr ctxt;
12204 xmlParserInputBufferPtr input;
12205 xmlParserInputPtr stream;
12206
12207 if (fd < 0)
12208 return (NULL);
12209
12210 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12211 if (input == NULL)
12212 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012213 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012214 ctxt = xmlNewParserCtxt();
12215 if (ctxt == NULL) {
12216 xmlFreeParserInputBuffer(input);
12217 return (NULL);
12218 }
12219 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12220 if (stream == NULL) {
12221 xmlFreeParserInputBuffer(input);
12222 xmlFreeParserCtxt(ctxt);
12223 return (NULL);
12224 }
12225 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012226 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012227}
12228
12229/**
12230 * xmlReadIO:
12231 * @ioread: an I/O read function
12232 * @ioclose: an I/O close function
12233 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012234 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012235 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012236 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012237 *
12238 * parse an XML document from I/O functions and source and build a tree.
12239 *
12240 * Returns the resulting document tree
12241 */
12242xmlDocPtr
12243xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012244 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012245{
12246 xmlParserCtxtPtr ctxt;
12247 xmlParserInputBufferPtr input;
12248 xmlParserInputPtr stream;
12249
12250 if (ioread == NULL)
12251 return (NULL);
12252
12253 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12254 XML_CHAR_ENCODING_NONE);
12255 if (input == NULL)
12256 return (NULL);
12257 ctxt = xmlNewParserCtxt();
12258 if (ctxt == NULL) {
12259 xmlFreeParserInputBuffer(input);
12260 return (NULL);
12261 }
12262 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12263 if (stream == NULL) {
12264 xmlFreeParserInputBuffer(input);
12265 xmlFreeParserCtxt(ctxt);
12266 return (NULL);
12267 }
12268 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012269 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012270}
12271
12272/**
12273 * xmlCtxtReadDoc:
12274 * @ctxt: an XML parser context
12275 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012276 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012277 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012278 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012279 *
12280 * parse an XML in-memory document and build a tree.
12281 * This reuses the existing @ctxt parser context
12282 *
12283 * Returns the resulting document tree
12284 */
12285xmlDocPtr
12286xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012287 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012288{
12289 xmlParserInputPtr stream;
12290
12291 if (cur == NULL)
12292 return (NULL);
12293 if (ctxt == NULL)
12294 return (NULL);
12295
12296 xmlCtxtReset(ctxt);
12297
12298 stream = xmlNewStringInputStream(ctxt, cur);
12299 if (stream == NULL) {
12300 return (NULL);
12301 }
12302 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012303 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012304}
12305
12306/**
12307 * xmlCtxtReadFile:
12308 * @ctxt: an XML parser context
12309 * @filename: a file or URL
12310 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012311 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012312 *
12313 * parse an XML file from the filesystem or the network.
12314 * This reuses the existing @ctxt parser context
12315 *
12316 * Returns the resulting document tree
12317 */
12318xmlDocPtr
12319xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12320 const char *encoding, int options)
12321{
12322 xmlParserInputPtr stream;
12323
12324 if (filename == NULL)
12325 return (NULL);
12326 if (ctxt == NULL)
12327 return (NULL);
12328
12329 xmlCtxtReset(ctxt);
12330
12331 stream = xmlNewInputFromFile(ctxt, filename);
12332 if (stream == NULL) {
12333 return (NULL);
12334 }
12335 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012336 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012337}
12338
12339/**
12340 * xmlCtxtReadMemory:
12341 * @ctxt: an XML parser context
12342 * @buffer: a pointer to a char array
12343 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012344 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012346 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012347 *
12348 * parse an XML in-memory document and build a tree.
12349 * This reuses the existing @ctxt parser context
12350 *
12351 * Returns the resulting document tree
12352 */
12353xmlDocPtr
12354xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012355 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012356{
12357 xmlParserInputBufferPtr input;
12358 xmlParserInputPtr stream;
12359
12360 if (ctxt == NULL)
12361 return (NULL);
12362 if (buffer == NULL)
12363 return (NULL);
12364
12365 xmlCtxtReset(ctxt);
12366
12367 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12368 if (input == NULL) {
12369 return(NULL);
12370 }
12371
12372 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12373 if (stream == NULL) {
12374 xmlFreeParserInputBuffer(input);
12375 return(NULL);
12376 }
12377
12378 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012379 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012380}
12381
12382/**
12383 * xmlCtxtReadFd:
12384 * @ctxt: an XML parser context
12385 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012386 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012387 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012388 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012389 *
12390 * parse an XML from a file descriptor and build a tree.
12391 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012392 * NOTE that the file descriptor will not be closed when the
12393 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012394 *
12395 * Returns the resulting document tree
12396 */
12397xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012398xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12399 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012400{
12401 xmlParserInputBufferPtr input;
12402 xmlParserInputPtr stream;
12403
12404 if (fd < 0)
12405 return (NULL);
12406 if (ctxt == NULL)
12407 return (NULL);
12408
12409 xmlCtxtReset(ctxt);
12410
12411
12412 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12413 if (input == NULL)
12414 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012415 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012416 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12417 if (stream == NULL) {
12418 xmlFreeParserInputBuffer(input);
12419 return (NULL);
12420 }
12421 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012422 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012423}
12424
12425/**
12426 * xmlCtxtReadIO:
12427 * @ctxt: an XML parser context
12428 * @ioread: an I/O read function
12429 * @ioclose: an I/O close function
12430 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012431 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012432 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012433 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012434 *
12435 * parse an XML document from I/O functions and source and build a tree.
12436 * This reuses the existing @ctxt parser context
12437 *
12438 * Returns the resulting document tree
12439 */
12440xmlDocPtr
12441xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12442 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012443 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012444 const char *encoding, int options)
12445{
12446 xmlParserInputBufferPtr input;
12447 xmlParserInputPtr stream;
12448
12449 if (ioread == NULL)
12450 return (NULL);
12451 if (ctxt == NULL)
12452 return (NULL);
12453
12454 xmlCtxtReset(ctxt);
12455
12456 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12457 XML_CHAR_ENCODING_NONE);
12458 if (input == NULL)
12459 return (NULL);
12460 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12461 if (stream == NULL) {
12462 xmlFreeParserInputBuffer(input);
12463 return (NULL);
12464 }
12465 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012466 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012467}