blob: 22e2b018abfeb49ea9fec5f42966ae72f574cb9d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillard0b787f32004-03-26 17:29:53 +00001263#define SKIPL(val) do { \
1264 int skipl; \
1265 for(skipl=0; skipl<val; skipl++) { \
1266 if (*(ctxt->input->cur) == '\n') { \
1267 ctxt->input->line++; ctxt->input->col = 1; \
1268 } else ctxt->input->col++; \
1269 ctxt->nbChars++; \
1270 ctxt->input->cur++; \
1271 } \
1272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1273 if ((*ctxt->input->cur == 0) && \
1274 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1275 xmlPopInput(ctxt); \
1276 } while (0)
1277
Daniel Veillarda880b122003-04-21 21:36:41 +00001278#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001279 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1280 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001281 xmlSHRINK (ctxt);
1282
1283static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1284 xmlParserInputShrink(ctxt->input);
1285 if ((*ctxt->input->cur == 0) &&
1286 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1287 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001288 }
Owen Taylor3473f882001-02-23 17:55:21 +00001289
Daniel Veillarda880b122003-04-21 21:36:41 +00001290#define GROW if ((ctxt->progressive == 0) && \
1291 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001292 xmlGROW (ctxt);
1293
1294static void xmlGROW (xmlParserCtxtPtr ctxt) {
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((*ctxt->input->cur == 0) &&
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1298 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001299}
Owen Taylor3473f882001-02-23 17:55:21 +00001300
1301#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1302
1303#define NEXT xmlNextChar(ctxt)
1304
Daniel Veillard21a0f912001-02-25 19:54:14 +00001305#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001306 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001307 ctxt->input->cur++; \
1308 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001309 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1311 }
1312
Owen Taylor3473f882001-02-23 17:55:21 +00001313#define NEXTL(l) do { \
1314 if (*(ctxt->input->cur) == '\n') { \
1315 ctxt->input->line++; ctxt->input->col = 1; \
1316 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001317 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001318 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001319 } while (0)
1320
1321#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1322#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1323
1324#define COPY_BUF(l,b,i,v) \
1325 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001326 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001327
1328/**
1329 * xmlSkipBlankChars:
1330 * @ctxt: the XML parser context
1331 *
1332 * skip all blanks character found at that point in the input streams.
1333 * It pops up finished entities in the process if allowable at that point.
1334 *
1335 * Returns the number of space chars skipped
1336 */
1337
1338int
1339xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001340 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342 /*
1343 * It's Okay to use CUR/NEXT here since all the blanks are on
1344 * the ASCII range.
1345 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001346 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1347 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001348 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001349 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001351 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001352 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001353 if (*cur == '\n') {
1354 ctxt->input->line++; ctxt->input->col = 1;
1355 }
1356 cur++;
1357 res++;
1358 if (*cur == 0) {
1359 ctxt->input->cur = cur;
1360 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1361 cur = ctxt->input->cur;
1362 }
1363 }
1364 ctxt->input->cur = cur;
1365 } else {
1366 int cur;
1367 do {
1368 cur = CUR;
1369 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1370 NEXT;
1371 cur = CUR;
1372 res++;
1373 }
1374 while ((cur == 0) && (ctxt->inputNr > 1) &&
1375 (ctxt->instate != XML_PARSER_COMMENT)) {
1376 xmlPopInput(ctxt);
1377 cur = CUR;
1378 }
1379 /*
1380 * Need to handle support of entities branching here
1381 */
1382 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1383 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1384 }
Owen Taylor3473f882001-02-23 17:55:21 +00001385 return(res);
1386}
1387
1388/************************************************************************
1389 * *
1390 * Commodity functions to handle entities *
1391 * *
1392 ************************************************************************/
1393
1394/**
1395 * xmlPopInput:
1396 * @ctxt: an XML parser context
1397 *
1398 * xmlPopInput: the current input pointed by ctxt->input came to an end
1399 * pop it and return the next char.
1400 *
1401 * Returns the current xmlChar in the parser context
1402 */
1403xmlChar
1404xmlPopInput(xmlParserCtxtPtr ctxt) {
1405 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1406 if (xmlParserDebugEntities)
1407 xmlGenericError(xmlGenericErrorContext,
1408 "Popping input %d\n", ctxt->inputNr);
1409 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001410 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001411 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1412 return(xmlPopInput(ctxt));
1413 return(CUR);
1414}
1415
1416/**
1417 * xmlPushInput:
1418 * @ctxt: an XML parser context
1419 * @input: an XML parser input fragment (entity, XML fragment ...).
1420 *
1421 * xmlPushInput: switch to a new input stream which is stacked on top
1422 * of the previous one(s).
1423 */
1424void
1425xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1426 if (input == NULL) return;
1427
1428 if (xmlParserDebugEntities) {
1429 if ((ctxt->input != NULL) && (ctxt->input->filename))
1430 xmlGenericError(xmlGenericErrorContext,
1431 "%s(%d): ", ctxt->input->filename,
1432 ctxt->input->line);
1433 xmlGenericError(xmlGenericErrorContext,
1434 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1435 }
1436 inputPush(ctxt, input);
1437 GROW;
1438}
1439
1440/**
1441 * xmlParseCharRef:
1442 * @ctxt: an XML parser context
1443 *
1444 * parse Reference declarations
1445 *
1446 * [66] CharRef ::= '&#' [0-9]+ ';' |
1447 * '&#x' [0-9a-fA-F]+ ';'
1448 *
1449 * [ WFC: Legal Character ]
1450 * Characters referred to using character references must match the
1451 * production for Char.
1452 *
1453 * Returns the value parsed (as an int), 0 in case of error
1454 */
1455int
1456xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001457 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001458 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001459 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001460
Owen Taylor3473f882001-02-23 17:55:21 +00001461 /*
1462 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1463 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001464 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001465 (NXT(2) == 'x')) {
1466 SKIP(3);
1467 GROW;
1468 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001469 if (count++ > 20) {
1470 count = 0;
1471 GROW;
1472 }
1473 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001474 val = val * 16 + (CUR - '0');
1475 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1476 val = val * 16 + (CUR - 'a') + 10;
1477 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1478 val = val * 16 + (CUR - 'A') + 10;
1479 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001480 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001481 val = 0;
1482 break;
1483 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001484 if (val > 0x10FFFF)
1485 outofrange = val;
1486
Owen Taylor3473f882001-02-23 17:55:21 +00001487 NEXT;
1488 count++;
1489 }
1490 if (RAW == ';') {
1491 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001492 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001493 ctxt->nbChars ++;
1494 ctxt->input->cur++;
1495 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001496 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001497 SKIP(2);
1498 GROW;
1499 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001500 if (count++ > 20) {
1501 count = 0;
1502 GROW;
1503 }
1504 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001505 val = val * 10 + (CUR - '0');
1506 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001507 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 val = 0;
1509 break;
1510 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001511 if (val > 0x10FFFF)
1512 outofrange = val;
1513
Owen Taylor3473f882001-02-23 17:55:21 +00001514 NEXT;
1515 count++;
1516 }
1517 if (RAW == ';') {
1518 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001519 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001520 ctxt->nbChars ++;
1521 ctxt->input->cur++;
1522 }
1523 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001524 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 }
1526
1527 /*
1528 * [ WFC: Legal Character ]
1529 * Characters referred to using character references must match the
1530 * production for Char.
1531 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001532 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001533 return(val);
1534 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001535 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1536 "xmlParseCharRef: invalid xmlChar value %d\n",
1537 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001538 }
1539 return(0);
1540}
1541
1542/**
1543 * xmlParseStringCharRef:
1544 * @ctxt: an XML parser context
1545 * @str: a pointer to an index in the string
1546 *
1547 * parse Reference declarations, variant parsing from a string rather
1548 * than an an input flow.
1549 *
1550 * [66] CharRef ::= '&#' [0-9]+ ';' |
1551 * '&#x' [0-9a-fA-F]+ ';'
1552 *
1553 * [ WFC: Legal Character ]
1554 * Characters referred to using character references must match the
1555 * production for Char.
1556 *
1557 * Returns the value parsed (as an int), 0 in case of error, str will be
1558 * updated to the current value of the index
1559 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001560static int
Owen Taylor3473f882001-02-23 17:55:21 +00001561xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1562 const xmlChar *ptr;
1563 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001564 unsigned int val = 0;
1565 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001566
1567 if ((str == NULL) || (*str == NULL)) return(0);
1568 ptr = *str;
1569 cur = *ptr;
1570 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1571 ptr += 3;
1572 cur = *ptr;
1573 while (cur != ';') { /* Non input consuming loop */
1574 if ((cur >= '0') && (cur <= '9'))
1575 val = val * 16 + (cur - '0');
1576 else if ((cur >= 'a') && (cur <= 'f'))
1577 val = val * 16 + (cur - 'a') + 10;
1578 else if ((cur >= 'A') && (cur <= 'F'))
1579 val = val * 16 + (cur - 'A') + 10;
1580 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001581 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001582 val = 0;
1583 break;
1584 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001585 if (val > 0x10FFFF)
1586 outofrange = val;
1587
Owen Taylor3473f882001-02-23 17:55:21 +00001588 ptr++;
1589 cur = *ptr;
1590 }
1591 if (cur == ';')
1592 ptr++;
1593 } else if ((cur == '&') && (ptr[1] == '#')){
1594 ptr += 2;
1595 cur = *ptr;
1596 while (cur != ';') { /* Non input consuming loops */
1597 if ((cur >= '0') && (cur <= '9'))
1598 val = val * 10 + (cur - '0');
1599 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001600 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001601 val = 0;
1602 break;
1603 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001604 if (val > 0x10FFFF)
1605 outofrange = val;
1606
Owen Taylor3473f882001-02-23 17:55:21 +00001607 ptr++;
1608 cur = *ptr;
1609 }
1610 if (cur == ';')
1611 ptr++;
1612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001613 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001614 return(0);
1615 }
1616 *str = ptr;
1617
1618 /*
1619 * [ WFC: Legal Character ]
1620 * Characters referred to using character references must match the
1621 * production for Char.
1622 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001623 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001624 return(val);
1625 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001626 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1627 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1628 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001629 }
1630 return(0);
1631}
1632
1633/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001634 * xmlNewBlanksWrapperInputStream:
1635 * @ctxt: an XML parser context
1636 * @entity: an Entity pointer
1637 *
1638 * Create a new input stream for wrapping
1639 * blanks around a PEReference
1640 *
1641 * Returns the new input stream or NULL
1642 */
1643
1644static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1645
Daniel Veillardf4862f02002-09-10 11:13:43 +00001646static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001647xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1648 xmlParserInputPtr input;
1649 xmlChar *buffer;
1650 size_t length;
1651 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001652 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1653 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001654 return(NULL);
1655 }
1656 if (xmlParserDebugEntities)
1657 xmlGenericError(xmlGenericErrorContext,
1658 "new blanks wrapper for entity: %s\n", entity->name);
1659 input = xmlNewInputStream(ctxt);
1660 if (input == NULL) {
1661 return(NULL);
1662 }
1663 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001664 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001665 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001666 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001667 return(NULL);
1668 }
1669 buffer [0] = ' ';
1670 buffer [1] = '%';
1671 buffer [length-3] = ';';
1672 buffer [length-2] = ' ';
1673 buffer [length-1] = 0;
1674 memcpy(buffer + 2, entity->name, length - 5);
1675 input->free = deallocblankswrapper;
1676 input->base = buffer;
1677 input->cur = buffer;
1678 input->length = length;
1679 input->end = &buffer[length];
1680 return(input);
1681}
1682
1683/**
Owen Taylor3473f882001-02-23 17:55:21 +00001684 * xmlParserHandlePEReference:
1685 * @ctxt: the parser context
1686 *
1687 * [69] PEReference ::= '%' Name ';'
1688 *
1689 * [ WFC: No Recursion ]
1690 * A parsed entity must not contain a recursive
1691 * reference to itself, either directly or indirectly.
1692 *
1693 * [ WFC: Entity Declared ]
1694 * In a document without any DTD, a document with only an internal DTD
1695 * subset which contains no parameter entity references, or a document
1696 * with "standalone='yes'", ... ... The declaration of a parameter
1697 * entity must precede any reference to it...
1698 *
1699 * [ VC: Entity Declared ]
1700 * In a document with an external subset or external parameter entities
1701 * with "standalone='no'", ... ... The declaration of a parameter entity
1702 * must precede any reference to it...
1703 *
1704 * [ WFC: In DTD ]
1705 * Parameter-entity references may only appear in the DTD.
1706 * NOTE: misleading but this is handled.
1707 *
1708 * A PEReference may have been detected in the current input stream
1709 * the handling is done accordingly to
1710 * http://www.w3.org/TR/REC-xml#entproc
1711 * i.e.
1712 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001713 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001714 */
1715void
1716xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001717 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 xmlEntityPtr entity = NULL;
1719 xmlParserInputPtr input;
1720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 if (RAW != '%') return;
1722 switch(ctxt->instate) {
1723 case XML_PARSER_CDATA_SECTION:
1724 return;
1725 case XML_PARSER_COMMENT:
1726 return;
1727 case XML_PARSER_START_TAG:
1728 return;
1729 case XML_PARSER_END_TAG:
1730 return;
1731 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001732 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001733 return;
1734 case XML_PARSER_PROLOG:
1735 case XML_PARSER_START:
1736 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001738 return;
1739 case XML_PARSER_ENTITY_DECL:
1740 case XML_PARSER_CONTENT:
1741 case XML_PARSER_ATTRIBUTE_VALUE:
1742 case XML_PARSER_PI:
1743 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001744 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001745 /* we just ignore it there */
1746 return;
1747 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001748 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001749 return;
1750 case XML_PARSER_ENTITY_VALUE:
1751 /*
1752 * NOTE: in the case of entity values, we don't do the
1753 * substitution here since we need the literal
1754 * entity value to be able to save the internal
1755 * subset of the document.
1756 * This will be handled by xmlStringDecodeEntities
1757 */
1758 return;
1759 case XML_PARSER_DTD:
1760 /*
1761 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1762 * In the internal DTD subset, parameter-entity references
1763 * can occur only where markup declarations can occur, not
1764 * within markup declarations.
1765 * In that case this is handled in xmlParseMarkupDecl
1766 */
1767 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1768 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001769 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001770 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001771 break;
1772 case XML_PARSER_IGNORE:
1773 return;
1774 }
1775
1776 NEXT;
1777 name = xmlParseName(ctxt);
1778 if (xmlParserDebugEntities)
1779 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001780 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001781 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001782 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001783 } else {
1784 if (RAW == ';') {
1785 NEXT;
1786 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1787 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1788 if (entity == NULL) {
1789
1790 /*
1791 * [ WFC: Entity Declared ]
1792 * In a document without any DTD, a document with only an
1793 * internal DTD subset which contains no parameter entity
1794 * references, or a document with "standalone='yes'", ...
1795 * ... The declaration of a parameter entity must precede
1796 * any reference to it...
1797 */
1798 if ((ctxt->standalone == 1) ||
1799 ((ctxt->hasExternalSubset == 0) &&
1800 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001801 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001802 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 } else {
1804 /*
1805 * [ VC: Entity Declared ]
1806 * In a document with an external subset or external
1807 * parameter entities with "standalone='no'", ...
1808 * ... The declaration of a parameter entity must precede
1809 * any reference to it...
1810 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001811 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1812 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1813 "PEReference: %%%s; not found\n",
1814 name);
1815 } else
1816 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1817 "PEReference: %%%s; not found\n",
1818 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001819 ctxt->valid = 0;
1820 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001821 } else if (ctxt->input->free != deallocblankswrapper) {
1822 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1823 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001824 } else {
1825 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1826 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001827 xmlChar start[4];
1828 xmlCharEncoding enc;
1829
Owen Taylor3473f882001-02-23 17:55:21 +00001830 /*
1831 * handle the extra spaces added before and after
1832 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001833 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001834 */
1835 input = xmlNewEntityInputStream(ctxt, entity);
1836 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001837
1838 /*
1839 * Get the 4 first bytes and decode the charset
1840 * if enc != XML_CHAR_ENCODING_NONE
1841 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001842 * Note that, since we may have some non-UTF8
1843 * encoding (like UTF16, bug 135229), the 'length'
1844 * is not known, but we can calculate based upon
1845 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001846 */
1847 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001848 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001849 start[0] = RAW;
1850 start[1] = NXT(1);
1851 start[2] = NXT(2);
1852 start[3] = NXT(3);
1853 enc = xmlDetectCharEncoding(start, 4);
1854 if (enc != XML_CHAR_ENCODING_NONE) {
1855 xmlSwitchEncoding(ctxt, enc);
1856 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001857 }
1858
Owen Taylor3473f882001-02-23 17:55:21 +00001859 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001860 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1861 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001862 xmlParseTextDecl(ctxt);
1863 }
Owen Taylor3473f882001-02-23 17:55:21 +00001864 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001865 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1866 "PEReference: %s is not a parameter entity\n",
1867 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001868 }
1869 }
1870 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001871 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001872 }
Owen Taylor3473f882001-02-23 17:55:21 +00001873 }
1874}
1875
1876/*
1877 * Macro used to grow the current buffer.
1878 */
1879#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001880 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001881 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001882 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001883 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001884 if (tmp == NULL) goto mem_error; \
1885 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001886}
1887
1888/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001889 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001890 * @ctxt: the parser context
1891 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001892 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001893 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1894 * @end: an end marker xmlChar, 0 if none
1895 * @end2: an end marker xmlChar, 0 if none
1896 * @end3: an end marker xmlChar, 0 if none
1897 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001898 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001899 *
1900 * [67] Reference ::= EntityRef | CharRef
1901 *
1902 * [69] PEReference ::= '%' Name ';'
1903 *
1904 * Returns A newly allocated string with the substitution done. The caller
1905 * must deallocate it !
1906 */
1907xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001908xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1909 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001910 xmlChar *buffer = NULL;
1911 int buffer_size = 0;
1912
1913 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001915 xmlEntityPtr ent;
1916 int c,l;
1917 int nbchars = 0;
1918
Daniel Veillarde57ec792003-09-10 10:50:59 +00001919 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001920 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001922
1923 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001924 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001925 return(NULL);
1926 }
1927
1928 /*
1929 * allocate a translation buffer.
1930 */
1931 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001932 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001933 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001934
1935 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001936 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001937 * we are operating on already parsed values.
1938 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001939 if (str < last)
1940 c = CUR_SCHAR(str, l);
1941 else
1942 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001943 while ((c != 0) && (c != end) && /* non input consuming loop */
1944 (c != end2) && (c != end3)) {
1945
1946 if (c == 0) break;
1947 if ((c == '&') && (str[1] == '#')) {
1948 int val = xmlParseStringCharRef(ctxt, &str);
1949 if (val != 0) {
1950 COPY_BUF(0,buffer,nbchars,val);
1951 }
1952 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1953 if (xmlParserDebugEntities)
1954 xmlGenericError(xmlGenericErrorContext,
1955 "String decoding Entity Reference: %.30s\n",
1956 str);
1957 ent = xmlParseStringEntityRef(ctxt, &str);
1958 if ((ent != NULL) &&
1959 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1960 if (ent->content != NULL) {
1961 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1962 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001963 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1964 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001965 }
1966 } else if ((ent != NULL) && (ent->content != NULL)) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 } else if (ent != NULL) {
1985 int i = xmlStrlen(ent->name);
1986 const xmlChar *cur = ent->name;
1987
1988 buffer[nbchars++] = '&';
1989 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1990 growBuffer(buffer);
1991 }
1992 for (;i > 0;i--)
1993 buffer[nbchars++] = *cur++;
1994 buffer[nbchars++] = ';';
1995 }
1996 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1997 if (xmlParserDebugEntities)
1998 xmlGenericError(xmlGenericErrorContext,
1999 "String decoding PE Reference: %.30s\n", str);
2000 ent = xmlParseStringPEReference(ctxt, &str);
2001 if (ent != NULL) {
2002 xmlChar *rep;
2003
2004 ctxt->depth++;
2005 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2006 0, 0, 0);
2007 ctxt->depth--;
2008 if (rep != NULL) {
2009 current = rep;
2010 while (*current != 0) { /* non input consuming loop */
2011 buffer[nbchars++] = *current++;
2012 if (nbchars >
2013 buffer_size - XML_PARSER_BUFFER_SIZE) {
2014 growBuffer(buffer);
2015 }
2016 }
2017 xmlFree(rep);
2018 }
2019 }
2020 } else {
2021 COPY_BUF(l,buffer,nbchars,c);
2022 str += l;
2023 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2024 growBuffer(buffer);
2025 }
2026 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002027 if (str < last)
2028 c = CUR_SCHAR(str, l);
2029 else
2030 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 }
2032 buffer[nbchars++] = 0;
2033 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002034
2035mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002036 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002037 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002038}
2039
Daniel Veillarde57ec792003-09-10 10:50:59 +00002040/**
2041 * xmlStringDecodeEntities:
2042 * @ctxt: the parser context
2043 * @str: the input string
2044 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2045 * @end: an end marker xmlChar, 0 if none
2046 * @end2: an end marker xmlChar, 0 if none
2047 * @end3: an end marker xmlChar, 0 if none
2048 *
2049 * Takes a entity string content and process to do the adequate substitutions.
2050 *
2051 * [67] Reference ::= EntityRef | CharRef
2052 *
2053 * [69] PEReference ::= '%' Name ';'
2054 *
2055 * Returns A newly allocated string with the substitution done. The caller
2056 * must deallocate it !
2057 */
2058xmlChar *
2059xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2060 xmlChar end, xmlChar end2, xmlChar end3) {
2061 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2062 end, end2, end3));
2063}
Owen Taylor3473f882001-02-23 17:55:21 +00002064
2065/************************************************************************
2066 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002067 * Commodity functions, cleanup needed ? *
2068 * *
2069 ************************************************************************/
2070
2071/**
2072 * areBlanks:
2073 * @ctxt: an XML parser context
2074 * @str: a xmlChar *
2075 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002076 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002077 *
2078 * Is this a sequence of blank chars that one can ignore ?
2079 *
2080 * Returns 1 if ignorable 0 otherwise.
2081 */
2082
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002083static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2084 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002085 int i, ret;
2086 xmlNodePtr lastChild;
2087
Daniel Veillard05c13a22001-09-09 08:38:09 +00002088 /*
2089 * Don't spend time trying to differentiate them, the same callback is
2090 * used !
2091 */
2092 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002093 return(0);
2094
Owen Taylor3473f882001-02-23 17:55:21 +00002095 /*
2096 * Check for xml:space value.
2097 */
2098 if (*(ctxt->space) == 1)
2099 return(0);
2100
2101 /*
2102 * Check that the string is made of blanks
2103 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002104 if (blank_chars == 0) {
2105 for (i = 0;i < len;i++)
2106 if (!(IS_BLANK_CH(str[i]))) return(0);
2107 }
Owen Taylor3473f882001-02-23 17:55:21 +00002108
2109 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002110 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002111 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002112 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002113 if (ctxt->myDoc != NULL) {
2114 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2115 if (ret == 0) return(1);
2116 if (ret == 1) return(0);
2117 }
2118
2119 /*
2120 * Otherwise, heuristic :-\
2121 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002122 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002123 if ((ctxt->node->children == NULL) &&
2124 (RAW == '<') && (NXT(1) == '/')) return(0);
2125
2126 lastChild = xmlGetLastChild(ctxt->node);
2127 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002128 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2129 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002130 } else if (xmlNodeIsText(lastChild))
2131 return(0);
2132 else if ((ctxt->node->children != NULL) &&
2133 (xmlNodeIsText(ctxt->node->children)))
2134 return(0);
2135 return(1);
2136}
2137
Owen Taylor3473f882001-02-23 17:55:21 +00002138/************************************************************************
2139 * *
2140 * Extra stuff for namespace support *
2141 * Relates to http://www.w3.org/TR/WD-xml-names *
2142 * *
2143 ************************************************************************/
2144
2145/**
2146 * xmlSplitQName:
2147 * @ctxt: an XML parser context
2148 * @name: an XML parser context
2149 * @prefix: a xmlChar **
2150 *
2151 * parse an UTF8 encoded XML qualified name string
2152 *
2153 * [NS 5] QName ::= (Prefix ':')? LocalPart
2154 *
2155 * [NS 6] Prefix ::= NCName
2156 *
2157 * [NS 7] LocalPart ::= NCName
2158 *
2159 * Returns the local part, and prefix is updated
2160 * to get the Prefix if any.
2161 */
2162
2163xmlChar *
2164xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2165 xmlChar buf[XML_MAX_NAMELEN + 5];
2166 xmlChar *buffer = NULL;
2167 int len = 0;
2168 int max = XML_MAX_NAMELEN;
2169 xmlChar *ret = NULL;
2170 const xmlChar *cur = name;
2171 int c;
2172
2173 *prefix = NULL;
2174
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002175 if (cur == NULL) return(NULL);
2176
Owen Taylor3473f882001-02-23 17:55:21 +00002177#ifndef XML_XML_NAMESPACE
2178 /* xml: prefix is not really a namespace */
2179 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2180 (cur[2] == 'l') && (cur[3] == ':'))
2181 return(xmlStrdup(name));
2182#endif
2183
Daniel Veillard597bc482003-07-24 16:08:28 +00002184 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002185 if (cur[0] == ':')
2186 return(xmlStrdup(name));
2187
2188 c = *cur++;
2189 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2190 buf[len++] = c;
2191 c = *cur++;
2192 }
2193 if (len >= max) {
2194 /*
2195 * Okay someone managed to make a huge name, so he's ready to pay
2196 * for the processing speed.
2197 */
2198 max = len * 2;
2199
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002200 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002201 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002202 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 memcpy(buffer, buf, len);
2206 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2207 if (len + 10 > max) {
2208 max *= 2;
2209 buffer = (xmlChar *) xmlRealloc(buffer,
2210 max * sizeof(xmlChar));
2211 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002212 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002213 return(NULL);
2214 }
2215 }
2216 buffer[len++] = c;
2217 c = *cur++;
2218 }
2219 buffer[len] = 0;
2220 }
2221
Daniel Veillard597bc482003-07-24 16:08:28 +00002222 /* nasty but well=formed
2223 if ((c == ':') && (*cur == 0)) {
2224 return(xmlStrdup(name));
2225 } */
2226
Owen Taylor3473f882001-02-23 17:55:21 +00002227 if (buffer == NULL)
2228 ret = xmlStrndup(buf, len);
2229 else {
2230 ret = buffer;
2231 buffer = NULL;
2232 max = XML_MAX_NAMELEN;
2233 }
2234
2235
2236 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002237 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002238 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002239 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002240 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002241 }
Owen Taylor3473f882001-02-23 17:55:21 +00002242 len = 0;
2243
Daniel Veillardbb284f42002-10-16 18:02:47 +00002244 /*
2245 * Check that the first character is proper to start
2246 * a new name
2247 */
2248 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2249 ((c >= 0x41) && (c <= 0x5A)) ||
2250 (c == '_') || (c == ':'))) {
2251 int l;
2252 int first = CUR_SCHAR(cur, l);
2253
2254 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002255 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002256 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002257 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002258 }
2259 }
2260 cur++;
2261
Owen Taylor3473f882001-02-23 17:55:21 +00002262 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2263 buf[len++] = c;
2264 c = *cur++;
2265 }
2266 if (len >= max) {
2267 /*
2268 * Okay someone managed to make a huge name, so he's ready to pay
2269 * for the processing speed.
2270 */
2271 max = len * 2;
2272
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002273 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002274 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002275 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002276 return(NULL);
2277 }
2278 memcpy(buffer, buf, len);
2279 while (c != 0) { /* tested bigname2.xml */
2280 if (len + 10 > max) {
2281 max *= 2;
2282 buffer = (xmlChar *) xmlRealloc(buffer,
2283 max * sizeof(xmlChar));
2284 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002285 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002286 return(NULL);
2287 }
2288 }
2289 buffer[len++] = c;
2290 c = *cur++;
2291 }
2292 buffer[len] = 0;
2293 }
2294
2295 if (buffer == NULL)
2296 ret = xmlStrndup(buf, len);
2297 else {
2298 ret = buffer;
2299 }
2300 }
2301
2302 return(ret);
2303}
2304
2305/************************************************************************
2306 * *
2307 * The parser itself *
2308 * Relates to http://www.w3.org/TR/REC-xml *
2309 * *
2310 ************************************************************************/
2311
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002312static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002313static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002314 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002315
Owen Taylor3473f882001-02-23 17:55:21 +00002316/**
2317 * xmlParseName:
2318 * @ctxt: an XML parser context
2319 *
2320 * parse an XML name.
2321 *
2322 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2323 * CombiningChar | Extender
2324 *
2325 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2326 *
2327 * [6] Names ::= Name (S Name)*
2328 *
2329 * Returns the Name parsed or NULL
2330 */
2331
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002332const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002333xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002334 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002335 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002336 int count = 0;
2337
2338 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002339
2340 /*
2341 * Accelerator for simple ASCII names
2342 */
2343 in = ctxt->input->cur;
2344 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2345 ((*in >= 0x41) && (*in <= 0x5A)) ||
2346 (*in == '_') || (*in == ':')) {
2347 in++;
2348 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2349 ((*in >= 0x41) && (*in <= 0x5A)) ||
2350 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002351 (*in == '_') || (*in == '-') ||
2352 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002353 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002354 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002355 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002356 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002357 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002358 ctxt->nbChars += count;
2359 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 if (ret == NULL)
2361 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002362 return(ret);
2363 }
2364 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002365 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002366}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002367
Daniel Veillard46de64e2002-05-29 08:21:33 +00002368/**
2369 * xmlParseNameAndCompare:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML name and compares for match
2373 * (specialized for endtag parsing)
2374 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002375 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2376 * and the name for mismatch
2377 */
2378
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002379static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002380xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002381 register const xmlChar *cmp = other;
2382 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002383 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002384
2385 GROW;
2386
2387 in = ctxt->input->cur;
2388 while (*in != 0 && *in == *cmp) {
2389 ++in;
2390 ++cmp;
2391 }
William M. Brack76e95df2003-10-18 16:20:14 +00002392 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002393 /* success */
2394 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002395 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002396 }
2397 /* failure (or end of input buffer), check with full function */
2398 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002399 /* strings coming from the dictionnary direct compare possible */
2400 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002401 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002402 }
2403 return ret;
2404}
2405
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002406static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002407xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002408 int len = 0, l;
2409 int c;
2410 int count = 0;
2411
2412 /*
2413 * Handler for more complex cases
2414 */
2415 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002416 c = CUR_CHAR(l);
2417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2418 (!IS_LETTER(c) && (c != '_') &&
2419 (c != ':'))) {
2420 return(NULL);
2421 }
2422
2423 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002424 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002425 (c == '.') || (c == '-') ||
2426 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002427 (IS_COMBINING(c)) ||
2428 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002429 if (count++ > 100) {
2430 count = 0;
2431 GROW;
2432 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002433 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002434 NEXTL(l);
2435 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002436 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002437 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002438}
2439
2440/**
2441 * xmlParseStringName:
2442 * @ctxt: an XML parser context
2443 * @str: a pointer to the string pointer (IN/OUT)
2444 *
2445 * parse an XML name.
2446 *
2447 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2448 * CombiningChar | Extender
2449 *
2450 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2451 *
2452 * [6] Names ::= Name (S Name)*
2453 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002454 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002455 * is updated to the current location in the string.
2456 */
2457
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002458static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002459xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2460 xmlChar buf[XML_MAX_NAMELEN + 5];
2461 const xmlChar *cur = *str;
2462 int len = 0, l;
2463 int c;
2464
2465 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002466 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002467 (c != ':')) {
2468 return(NULL);
2469 }
2470
William M. Brack871611b2003-10-18 04:53:14 +00002471 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002472 (c == '.') || (c == '-') ||
2473 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002474 (IS_COMBINING(c)) ||
2475 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002476 COPY_BUF(l,buf,len,c);
2477 cur += l;
2478 c = CUR_SCHAR(cur, l);
2479 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2480 /*
2481 * Okay someone managed to make a huge name, so he's ready to pay
2482 * for the processing speed.
2483 */
2484 xmlChar *buffer;
2485 int max = len * 2;
2486
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002487 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002488 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002489 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002490 return(NULL);
2491 }
2492 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002493 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002494 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002495 (c == '.') || (c == '-') ||
2496 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002497 (IS_COMBINING(c)) ||
2498 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002499 if (len + 10 > max) {
2500 max *= 2;
2501 buffer = (xmlChar *) xmlRealloc(buffer,
2502 max * sizeof(xmlChar));
2503 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002504 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002505 return(NULL);
2506 }
2507 }
2508 COPY_BUF(l,buffer,len,c);
2509 cur += l;
2510 c = CUR_SCHAR(cur, l);
2511 }
2512 buffer[len] = 0;
2513 *str = cur;
2514 return(buffer);
2515 }
2516 }
2517 *str = cur;
2518 return(xmlStrndup(buf, len));
2519}
2520
2521/**
2522 * xmlParseNmtoken:
2523 * @ctxt: an XML parser context
2524 *
2525 * parse an XML Nmtoken.
2526 *
2527 * [7] Nmtoken ::= (NameChar)+
2528 *
2529 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2530 *
2531 * Returns the Nmtoken parsed or NULL
2532 */
2533
2534xmlChar *
2535xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2536 xmlChar buf[XML_MAX_NAMELEN + 5];
2537 int len = 0, l;
2538 int c;
2539 int count = 0;
2540
2541 GROW;
2542 c = CUR_CHAR(l);
2543
William M. Brack871611b2003-10-18 04:53:14 +00002544 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002545 (c == '.') || (c == '-') ||
2546 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002547 (IS_COMBINING(c)) ||
2548 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (count++ > 100) {
2550 count = 0;
2551 GROW;
2552 }
2553 COPY_BUF(l,buf,len,c);
2554 NEXTL(l);
2555 c = CUR_CHAR(l);
2556 if (len >= XML_MAX_NAMELEN) {
2557 /*
2558 * Okay someone managed to make a huge token, so he's ready to pay
2559 * for the processing speed.
2560 */
2561 xmlChar *buffer;
2562 int max = len * 2;
2563
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002564 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002565 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002566 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 return(NULL);
2568 }
2569 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002570 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002571 (c == '.') || (c == '-') ||
2572 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002573 (IS_COMBINING(c)) ||
2574 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002575 if (count++ > 100) {
2576 count = 0;
2577 GROW;
2578 }
2579 if (len + 10 > max) {
2580 max *= 2;
2581 buffer = (xmlChar *) xmlRealloc(buffer,
2582 max * sizeof(xmlChar));
2583 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002584 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 return(NULL);
2586 }
2587 }
2588 COPY_BUF(l,buffer,len,c);
2589 NEXTL(l);
2590 c = CUR_CHAR(l);
2591 }
2592 buffer[len] = 0;
2593 return(buffer);
2594 }
2595 }
2596 if (len == 0)
2597 return(NULL);
2598 return(xmlStrndup(buf, len));
2599}
2600
2601/**
2602 * xmlParseEntityValue:
2603 * @ctxt: an XML parser context
2604 * @orig: if non-NULL store a copy of the original entity value
2605 *
2606 * parse a value for ENTITY declarations
2607 *
2608 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2609 * "'" ([^%&'] | PEReference | Reference)* "'"
2610 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002611 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002612 */
2613
2614xmlChar *
2615xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2616 xmlChar *buf = NULL;
2617 int len = 0;
2618 int size = XML_PARSER_BUFFER_SIZE;
2619 int c, l;
2620 xmlChar stop;
2621 xmlChar *ret = NULL;
2622 const xmlChar *cur = NULL;
2623 xmlParserInputPtr input;
2624
2625 if (RAW == '"') stop = '"';
2626 else if (RAW == '\'') stop = '\'';
2627 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002628 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002629 return(NULL);
2630 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002631 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002632 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002633 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002634 return(NULL);
2635 }
2636
2637 /*
2638 * The content of the entity definition is copied in a buffer.
2639 */
2640
2641 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2642 input = ctxt->input;
2643 GROW;
2644 NEXT;
2645 c = CUR_CHAR(l);
2646 /*
2647 * NOTE: 4.4.5 Included in Literal
2648 * When a parameter entity reference appears in a literal entity
2649 * value, ... a single or double quote character in the replacement
2650 * text is always treated as a normal data character and will not
2651 * terminate the literal.
2652 * In practice it means we stop the loop only when back at parsing
2653 * the initial entity and the quote is found
2654 */
William M. Brack871611b2003-10-18 04:53:14 +00002655 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002656 (ctxt->input != input))) {
2657 if (len + 5 >= size) {
2658 size *= 2;
2659 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2660 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002661 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002662 return(NULL);
2663 }
2664 }
2665 COPY_BUF(l,buf,len,c);
2666 NEXTL(l);
2667 /*
2668 * Pop-up of finished entities.
2669 */
2670 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2671 xmlPopInput(ctxt);
2672
2673 GROW;
2674 c = CUR_CHAR(l);
2675 if (c == 0) {
2676 GROW;
2677 c = CUR_CHAR(l);
2678 }
2679 }
2680 buf[len] = 0;
2681
2682 /*
2683 * Raise problem w.r.t. '&' and '%' being used in non-entities
2684 * reference constructs. Note Charref will be handled in
2685 * xmlStringDecodeEntities()
2686 */
2687 cur = buf;
2688 while (*cur != 0) { /* non input consuming */
2689 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2690 xmlChar *name;
2691 xmlChar tmp = *cur;
2692
2693 cur++;
2694 name = xmlParseStringName(ctxt, &cur);
2695 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002696 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002697 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002698 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002699 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002700 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2701 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002702 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002703 }
2704 if (name != NULL)
2705 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002706 if (*cur == 0)
2707 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 cur++;
2710 }
2711
2712 /*
2713 * Then PEReference entities are substituted.
2714 */
2715 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002716 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 xmlFree(buf);
2718 } else {
2719 NEXT;
2720 /*
2721 * NOTE: 4.4.7 Bypassed
2722 * When a general entity reference appears in the EntityValue in
2723 * an entity declaration, it is bypassed and left as is.
2724 * so XML_SUBSTITUTE_REF is not set here.
2725 */
2726 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2727 0, 0, 0);
2728 if (orig != NULL)
2729 *orig = buf;
2730 else
2731 xmlFree(buf);
2732 }
2733
2734 return(ret);
2735}
2736
2737/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002738 * xmlParseAttValueComplex:
2739 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002740 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002741 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002742 *
2743 * parse a value for an attribute, this is the fallback function
2744 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002745 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002746 *
2747 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2748 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002749static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002750xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002751 xmlChar limit = 0;
2752 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 int len = 0;
2754 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002755 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 xmlChar *current = NULL;
2757 xmlEntityPtr ent;
2758
Owen Taylor3473f882001-02-23 17:55:21 +00002759 if (NXT(0) == '"') {
2760 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2761 limit = '"';
2762 NEXT;
2763 } else if (NXT(0) == '\'') {
2764 limit = '\'';
2765 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2766 NEXT;
2767 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002768 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002769 return(NULL);
2770 }
2771
2772 /*
2773 * allocate a translation buffer.
2774 */
2775 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002776 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002777 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002778
2779 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002780 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002781 */
2782 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002783 while ((NXT(0) != limit) && /* checked */
2784 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002785 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002786 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002787 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002788 if (NXT(1) == '#') {
2789 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002790
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002792 if (ctxt->replaceEntities) {
2793 if (len > buf_size - 10) {
2794 growBuffer(buf);
2795 }
2796 buf[len++] = '&';
2797 } else {
2798 /*
2799 * The reparsing will be done in xmlStringGetNodeList()
2800 * called by the attribute() function in SAX.c
2801 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002802 if (len > buf_size - 10) {
2803 growBuffer(buf);
2804 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002805 buf[len++] = '&';
2806 buf[len++] = '#';
2807 buf[len++] = '3';
2808 buf[len++] = '8';
2809 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002810 }
2811 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002812 if (len > buf_size - 10) {
2813 growBuffer(buf);
2814 }
Owen Taylor3473f882001-02-23 17:55:21 +00002815 len += xmlCopyChar(0, &buf[len], val);
2816 }
2817 } else {
2818 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002819 if ((ent != NULL) &&
2820 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2821 if (len > buf_size - 10) {
2822 growBuffer(buf);
2823 }
2824 if ((ctxt->replaceEntities == 0) &&
2825 (ent->content[0] == '&')) {
2826 buf[len++] = '&';
2827 buf[len++] = '#';
2828 buf[len++] = '3';
2829 buf[len++] = '8';
2830 buf[len++] = ';';
2831 } else {
2832 buf[len++] = ent->content[0];
2833 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834 } else if ((ent != NULL) &&
2835 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002836 xmlChar *rep;
2837
2838 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2839 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002840 XML_SUBSTITUTE_REF,
2841 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002842 if (rep != NULL) {
2843 current = rep;
2844 while (*current != 0) { /* non input consuming */
2845 buf[len++] = *current++;
2846 if (len > buf_size - 10) {
2847 growBuffer(buf);
2848 }
2849 }
2850 xmlFree(rep);
2851 }
2852 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002853 if (len > buf_size - 10) {
2854 growBuffer(buf);
2855 }
Owen Taylor3473f882001-02-23 17:55:21 +00002856 if (ent->content != NULL)
2857 buf[len++] = ent->content[0];
2858 }
2859 } else if (ent != NULL) {
2860 int i = xmlStrlen(ent->name);
2861 const xmlChar *cur = ent->name;
2862
2863 /*
2864 * This may look absurd but is needed to detect
2865 * entities problems
2866 */
2867 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2868 (ent->content != NULL)) {
2869 xmlChar *rep;
2870 rep = xmlStringDecodeEntities(ctxt, ent->content,
2871 XML_SUBSTITUTE_REF, 0, 0, 0);
2872 if (rep != NULL)
2873 xmlFree(rep);
2874 }
2875
2876 /*
2877 * Just output the reference
2878 */
2879 buf[len++] = '&';
2880 if (len > buf_size - i - 10) {
2881 growBuffer(buf);
2882 }
2883 for (;i > 0;i--)
2884 buf[len++] = *cur++;
2885 buf[len++] = ';';
2886 }
2887 }
2888 } else {
2889 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002890 if ((len != 0) || (!normalize)) {
2891 if ((!normalize) || (!in_space)) {
2892 COPY_BUF(l,buf,len,0x20);
2893 if (len > buf_size - 10) {
2894 growBuffer(buf);
2895 }
2896 }
2897 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002898 }
2899 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002900 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002901 COPY_BUF(l,buf,len,c);
2902 if (len > buf_size - 10) {
2903 growBuffer(buf);
2904 }
2905 }
2906 NEXTL(l);
2907 }
2908 GROW;
2909 c = CUR_CHAR(l);
2910 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002911 if ((in_space) && (normalize)) {
2912 while (buf[len - 1] == 0x20) len--;
2913 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002914 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002916 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002917 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002918 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2919 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002920 } else
2921 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002922 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002924
2925mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002926 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002927 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002928}
2929
2930/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002931 * xmlParseAttValue:
2932 * @ctxt: an XML parser context
2933 *
2934 * parse a value for an attribute
2935 * Note: the parser won't do substitution of entities here, this
2936 * will be handled later in xmlStringGetNodeList
2937 *
2938 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2939 * "'" ([^<&'] | Reference)* "'"
2940 *
2941 * 3.3.3 Attribute-Value Normalization:
2942 * Before the value of an attribute is passed to the application or
2943 * checked for validity, the XML processor must normalize it as follows:
2944 * - a character reference is processed by appending the referenced
2945 * character to the attribute value
2946 * - an entity reference is processed by recursively processing the
2947 * replacement text of the entity
2948 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2949 * appending #x20 to the normalized value, except that only a single
2950 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2951 * parsed entity or the literal entity value of an internal parsed entity
2952 * - other characters are processed by appending them to the normalized value
2953 * If the declared value is not CDATA, then the XML processor must further
2954 * process the normalized attribute value by discarding any leading and
2955 * trailing space (#x20) characters, and by replacing sequences of space
2956 * (#x20) characters by a single space (#x20) character.
2957 * All attributes for which no declaration has been read should be treated
2958 * by a non-validating parser as if declared CDATA.
2959 *
2960 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2961 */
2962
2963
2964xmlChar *
2965xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002966 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002967}
2968
2969/**
Owen Taylor3473f882001-02-23 17:55:21 +00002970 * xmlParseSystemLiteral:
2971 * @ctxt: an XML parser context
2972 *
2973 * parse an XML Literal
2974 *
2975 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2976 *
2977 * Returns the SystemLiteral parsed or NULL
2978 */
2979
2980xmlChar *
2981xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2982 xmlChar *buf = NULL;
2983 int len = 0;
2984 int size = XML_PARSER_BUFFER_SIZE;
2985 int cur, l;
2986 xmlChar stop;
2987 int state = ctxt->instate;
2988 int count = 0;
2989
2990 SHRINK;
2991 if (RAW == '"') {
2992 NEXT;
2993 stop = '"';
2994 } else if (RAW == '\'') {
2995 NEXT;
2996 stop = '\'';
2997 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002998 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002999 return(NULL);
3000 }
3001
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003002 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003003 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003004 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003005 return(NULL);
3006 }
3007 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3008 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003009 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003010 if (len + 5 >= size) {
3011 size *= 2;
3012 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3013 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003014 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003015 ctxt->instate = (xmlParserInputState) state;
3016 return(NULL);
3017 }
3018 }
3019 count++;
3020 if (count > 50) {
3021 GROW;
3022 count = 0;
3023 }
3024 COPY_BUF(l,buf,len,cur);
3025 NEXTL(l);
3026 cur = CUR_CHAR(l);
3027 if (cur == 0) {
3028 GROW;
3029 SHRINK;
3030 cur = CUR_CHAR(l);
3031 }
3032 }
3033 buf[len] = 0;
3034 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003035 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003036 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003037 } else {
3038 NEXT;
3039 }
3040 return(buf);
3041}
3042
3043/**
3044 * xmlParsePubidLiteral:
3045 * @ctxt: an XML parser context
3046 *
3047 * parse an XML public literal
3048 *
3049 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3050 *
3051 * Returns the PubidLiteral parsed or NULL.
3052 */
3053
3054xmlChar *
3055xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3056 xmlChar *buf = NULL;
3057 int len = 0;
3058 int size = XML_PARSER_BUFFER_SIZE;
3059 xmlChar cur;
3060 xmlChar stop;
3061 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003062 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003063
3064 SHRINK;
3065 if (RAW == '"') {
3066 NEXT;
3067 stop = '"';
3068 } else if (RAW == '\'') {
3069 NEXT;
3070 stop = '\'';
3071 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003073 return(NULL);
3074 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003075 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003077 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003078 return(NULL);
3079 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003080 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003082 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003083 if (len + 1 >= size) {
3084 size *= 2;
3085 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3086 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 return(NULL);
3089 }
3090 }
3091 buf[len++] = cur;
3092 count++;
3093 if (count > 50) {
3094 GROW;
3095 count = 0;
3096 }
3097 NEXT;
3098 cur = CUR;
3099 if (cur == 0) {
3100 GROW;
3101 SHRINK;
3102 cur = CUR;
3103 }
3104 }
3105 buf[len] = 0;
3106 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003107 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003108 } else {
3109 NEXT;
3110 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003111 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003112 return(buf);
3113}
3114
Daniel Veillard48b2f892001-02-25 16:11:03 +00003115void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003116/**
3117 * xmlParseCharData:
3118 * @ctxt: an XML parser context
3119 * @cdata: int indicating whether we are within a CDATA section
3120 *
3121 * parse a CharData section.
3122 * if we are within a CDATA section ']]>' marks an end of section.
3123 *
3124 * The right angle bracket (>) may be represented using the string "&gt;",
3125 * and must, for compatibility, be escaped using "&gt;" or a character
3126 * reference when it appears in the string "]]>" in content, when that
3127 * string is not marking the end of a CDATA section.
3128 *
3129 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3130 */
3131
3132void
3133xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003134 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003135 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003136 int line = ctxt->input->line;
3137 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003138
3139 SHRINK;
3140 GROW;
3141 /*
3142 * Accelerated common case where input don't need to be
3143 * modified before passing it to the handler.
3144 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003145 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003146 in = ctxt->input->cur;
3147 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003148get_more_space:
3149 while (*in == 0x20) in++;
3150 if (*in == 0xA) {
3151 ctxt->input->line++;
3152 in++;
3153 while (*in == 0xA) {
3154 ctxt->input->line++;
3155 in++;
3156 }
3157 goto get_more_space;
3158 }
3159 if (*in == '<') {
3160 nbchar = in - ctxt->input->cur;
3161 if (nbchar > 0) {
3162 const xmlChar *tmp = ctxt->input->cur;
3163 ctxt->input->cur = in;
3164
3165 if (ctxt->sax->ignorableWhitespace !=
3166 ctxt->sax->characters) {
3167 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3168 ctxt->sax->ignorableWhitespace(ctxt->userData,
3169 tmp, nbchar);
3170 } else if (ctxt->sax->characters != NULL)
3171 ctxt->sax->characters(ctxt->userData,
3172 tmp, nbchar);
3173 } else if (ctxt->sax->characters != NULL) {
3174 ctxt->sax->characters(ctxt->userData,
3175 tmp, nbchar);
3176 }
3177 }
3178 return;
3179 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003180get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003181 while (((*in > ']') && (*in <= 0x7F)) ||
3182 ((*in > '&') && (*in < '<')) ||
3183 ((*in > '<') && (*in < ']')) ||
3184 ((*in >= 0x20) && (*in < '&')) ||
3185 (*in == 0x09))
3186 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003187 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003188 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003189 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003190 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003191 ctxt->input->line++;
3192 in++;
3193 }
3194 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003195 }
3196 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003197 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003198 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003199 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003200 return;
3201 }
3202 in++;
3203 goto get_more;
3204 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003205 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003206 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003207 if ((ctxt->sax->ignorableWhitespace !=
3208 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003209 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003210 const xmlChar *tmp = ctxt->input->cur;
3211 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003212
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003213 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003214 ctxt->sax->ignorableWhitespace(ctxt->userData,
3215 tmp, nbchar);
3216 } else if (ctxt->sax->characters != NULL)
3217 ctxt->sax->characters(ctxt->userData,
3218 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003219 line = ctxt->input->line;
3220 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003221 } else {
3222 if (ctxt->sax->characters != NULL)
3223 ctxt->sax->characters(ctxt->userData,
3224 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003225 line = ctxt->input->line;
3226 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003227 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003228 }
3229 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003230 if (*in == 0xD) {
3231 in++;
3232 if (*in == 0xA) {
3233 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003234 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003235 ctxt->input->line++;
3236 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003237 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003238 in--;
3239 }
3240 if (*in == '<') {
3241 return;
3242 }
3243 if (*in == '&') {
3244 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003245 }
3246 SHRINK;
3247 GROW;
3248 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003249 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003250 nbchar = 0;
3251 }
Daniel Veillard50582112001-03-26 22:52:16 +00003252 ctxt->input->line = line;
3253 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003254 xmlParseCharDataComplex(ctxt, cdata);
3255}
3256
Daniel Veillard01c13b52002-12-10 15:19:08 +00003257/**
3258 * xmlParseCharDataComplex:
3259 * @ctxt: an XML parser context
3260 * @cdata: int indicating whether we are within a CDATA section
3261 *
3262 * parse a CharData section.this is the fallback function
3263 * of xmlParseCharData() when the parsing requires handling
3264 * of non-ASCII characters.
3265 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003266void
3267xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003268 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3269 int nbchar = 0;
3270 int cur, l;
3271 int count = 0;
3272
3273 SHRINK;
3274 GROW;
3275 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003276 while ((cur != '<') && /* checked */
3277 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003278 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003279 if ((cur == ']') && (NXT(1) == ']') &&
3280 (NXT(2) == '>')) {
3281 if (cdata) break;
3282 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003283 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003284 }
3285 }
3286 COPY_BUF(l,buf,nbchar,cur);
3287 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003288 buf[nbchar] = 0;
3289
Owen Taylor3473f882001-02-23 17:55:21 +00003290 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003291 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003292 */
3293 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003294 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if (ctxt->sax->ignorableWhitespace != NULL)
3296 ctxt->sax->ignorableWhitespace(ctxt->userData,
3297 buf, nbchar);
3298 } else {
3299 if (ctxt->sax->characters != NULL)
3300 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3301 }
3302 }
3303 nbchar = 0;
3304 }
3305 count++;
3306 if (count > 50) {
3307 GROW;
3308 count = 0;
3309 }
3310 NEXTL(l);
3311 cur = CUR_CHAR(l);
3312 }
3313 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003314 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003316 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003317 */
3318 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003319 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003320 if (ctxt->sax->ignorableWhitespace != NULL)
3321 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3322 } else {
3323 if (ctxt->sax->characters != NULL)
3324 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3325 }
3326 }
3327 }
3328}
3329
3330/**
3331 * xmlParseExternalID:
3332 * @ctxt: an XML parser context
3333 * @publicID: a xmlChar** receiving PubidLiteral
3334 * @strict: indicate whether we should restrict parsing to only
3335 * production [75], see NOTE below
3336 *
3337 * Parse an External ID or a Public ID
3338 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003339 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003340 * 'PUBLIC' S PubidLiteral S SystemLiteral
3341 *
3342 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3343 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3344 *
3345 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3346 *
3347 * Returns the function returns SystemLiteral and in the second
3348 * case publicID receives PubidLiteral, is strict is off
3349 * it is possible to return NULL and have publicID set.
3350 */
3351
3352xmlChar *
3353xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3354 xmlChar *URI = NULL;
3355
3356 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003357
3358 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003359 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003360 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003361 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003362 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3363 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 SKIP_BLANKS;
3366 URI = xmlParseSystemLiteral(ctxt);
3367 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003368 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003369 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003370 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003371 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003372 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003374 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003375 }
3376 SKIP_BLANKS;
3377 *publicID = xmlParsePubidLiteral(ctxt);
3378 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003379 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003380 }
3381 if (strict) {
3382 /*
3383 * We don't handle [83] so "S SystemLiteral" is required.
3384 */
William M. Brack76e95df2003-10-18 16:20:14 +00003385 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003387 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003388 }
3389 } else {
3390 /*
3391 * We handle [83] so we return immediately, if
3392 * "S SystemLiteral" is not detected. From a purely parsing
3393 * point of view that's a nice mess.
3394 */
3395 const xmlChar *ptr;
3396 GROW;
3397
3398 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003399 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003400
William M. Brack76e95df2003-10-18 16:20:14 +00003401 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003402 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3403 }
3404 SKIP_BLANKS;
3405 URI = xmlParseSystemLiteral(ctxt);
3406 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003407 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003408 }
3409 }
3410 return(URI);
3411}
3412
3413/**
3414 * xmlParseComment:
3415 * @ctxt: an XML parser context
3416 *
3417 * Skip an XML (SGML) comment <!-- .... -->
3418 * The spec says that "For compatibility, the string "--" (double-hyphen)
3419 * must not occur within comments. "
3420 *
3421 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3422 */
3423void
3424xmlParseComment(xmlParserCtxtPtr ctxt) {
3425 xmlChar *buf = NULL;
3426 int len;
3427 int size = XML_PARSER_BUFFER_SIZE;
3428 int q, ql;
3429 int r, rl;
3430 int cur, l;
3431 xmlParserInputState state;
3432 xmlParserInputPtr input = ctxt->input;
3433 int count = 0;
3434
3435 /*
3436 * Check that there is a comment right here.
3437 */
3438 if ((RAW != '<') || (NXT(1) != '!') ||
3439 (NXT(2) != '-') || (NXT(3) != '-')) return;
3440
3441 state = ctxt->instate;
3442 ctxt->instate = XML_PARSER_COMMENT;
3443 SHRINK;
3444 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003445 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003446 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003447 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003448 ctxt->instate = state;
3449 return;
3450 }
3451 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003452 if (q == 0)
3453 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003454 NEXTL(ql);
3455 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003456 if (r == 0)
3457 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003458 NEXTL(rl);
3459 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003460 if (cur == 0)
3461 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003462 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003463 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003464 ((cur != '>') ||
3465 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003466 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003467 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 }
3469 if (len + 5 >= size) {
3470 size *= 2;
3471 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 ctxt->instate = state;
3475 return;
3476 }
3477 }
3478 COPY_BUF(ql,buf,len,q);
3479 q = r;
3480 ql = rl;
3481 r = cur;
3482 rl = l;
3483
3484 count++;
3485 if (count > 50) {
3486 GROW;
3487 count = 0;
3488 }
3489 NEXTL(l);
3490 cur = CUR_CHAR(l);
3491 if (cur == 0) {
3492 SHRINK;
3493 GROW;
3494 cur = CUR_CHAR(l);
3495 }
3496 }
3497 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003498 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003499 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003500 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 xmlFree(buf);
3502 } else {
3503 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003504 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3505 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003506 }
3507 NEXT;
3508 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3509 (!ctxt->disableSAX))
3510 ctxt->sax->comment(ctxt->userData, buf);
3511 xmlFree(buf);
3512 }
3513 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003514 return;
3515not_terminated:
3516 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3517 "Comment not terminated\n", NULL);
3518 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003519}
3520
3521/**
3522 * xmlParsePITarget:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse the name of a PI
3526 *
3527 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3528 *
3529 * Returns the PITarget name or NULL
3530 */
3531
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003532const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003533xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003534 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003535
3536 name = xmlParseName(ctxt);
3537 if ((name != NULL) &&
3538 ((name[0] == 'x') || (name[0] == 'X')) &&
3539 ((name[1] == 'm') || (name[1] == 'M')) &&
3540 ((name[2] == 'l') || (name[2] == 'L'))) {
3541 int i;
3542 if ((name[0] == 'x') && (name[1] == 'm') &&
3543 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003544 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003545 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003546 return(name);
3547 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003548 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003549 return(name);
3550 }
3551 for (i = 0;;i++) {
3552 if (xmlW3CPIs[i] == NULL) break;
3553 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3554 return(name);
3555 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003556 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3557 "xmlParsePITarget: invalid name prefix 'xml'\n",
3558 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003559 }
3560 return(name);
3561}
3562
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003563#ifdef LIBXML_CATALOG_ENABLED
3564/**
3565 * xmlParseCatalogPI:
3566 * @ctxt: an XML parser context
3567 * @catalog: the PI value string
3568 *
3569 * parse an XML Catalog Processing Instruction.
3570 *
3571 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3572 *
3573 * Occurs only if allowed by the user and if happening in the Misc
3574 * part of the document before any doctype informations
3575 * This will add the given catalog to the parsing context in order
3576 * to be used if there is a resolution need further down in the document
3577 */
3578
3579static void
3580xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3581 xmlChar *URL = NULL;
3582 const xmlChar *tmp, *base;
3583 xmlChar marker;
3584
3585 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003586 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003587 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3588 goto error;
3589 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003590 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003591 if (*tmp != '=') {
3592 return;
3593 }
3594 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003595 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003596 marker = *tmp;
3597 if ((marker != '\'') && (marker != '"'))
3598 goto error;
3599 tmp++;
3600 base = tmp;
3601 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3602 if (*tmp == 0)
3603 goto error;
3604 URL = xmlStrndup(base, tmp - base);
3605 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003606 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003607 if (*tmp != 0)
3608 goto error;
3609
3610 if (URL != NULL) {
3611 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3612 xmlFree(URL);
3613 }
3614 return;
3615
3616error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003617 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3618 "Catalog PI syntax error: %s\n",
3619 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003620 if (URL != NULL)
3621 xmlFree(URL);
3622}
3623#endif
3624
Owen Taylor3473f882001-02-23 17:55:21 +00003625/**
3626 * xmlParsePI:
3627 * @ctxt: an XML parser context
3628 *
3629 * parse an XML Processing Instruction.
3630 *
3631 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3632 *
3633 * The processing is transfered to SAX once parsed.
3634 */
3635
3636void
3637xmlParsePI(xmlParserCtxtPtr ctxt) {
3638 xmlChar *buf = NULL;
3639 int len = 0;
3640 int size = XML_PARSER_BUFFER_SIZE;
3641 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003642 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003643 xmlParserInputState state;
3644 int count = 0;
3645
3646 if ((RAW == '<') && (NXT(1) == '?')) {
3647 xmlParserInputPtr input = ctxt->input;
3648 state = ctxt->instate;
3649 ctxt->instate = XML_PARSER_PI;
3650 /*
3651 * this is a Processing Instruction.
3652 */
3653 SKIP(2);
3654 SHRINK;
3655
3656 /*
3657 * Parse the target name and check for special support like
3658 * namespace.
3659 */
3660 target = xmlParsePITarget(ctxt);
3661 if (target != NULL) {
3662 if ((RAW == '?') && (NXT(1) == '>')) {
3663 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003664 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3665 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003666 }
3667 SKIP(2);
3668
3669 /*
3670 * SAX: PI detected.
3671 */
3672 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3673 (ctxt->sax->processingInstruction != NULL))
3674 ctxt->sax->processingInstruction(ctxt->userData,
3675 target, NULL);
3676 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003677 return;
3678 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003679 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003680 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003681 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003682 ctxt->instate = state;
3683 return;
3684 }
3685 cur = CUR;
3686 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003687 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3688 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003689 }
3690 SKIP_BLANKS;
3691 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003692 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003693 ((cur != '?') || (NXT(1) != '>'))) {
3694 if (len + 5 >= size) {
3695 size *= 2;
3696 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3697 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003698 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003699 ctxt->instate = state;
3700 return;
3701 }
3702 }
3703 count++;
3704 if (count > 50) {
3705 GROW;
3706 count = 0;
3707 }
3708 COPY_BUF(l,buf,len,cur);
3709 NEXTL(l);
3710 cur = CUR_CHAR(l);
3711 if (cur == 0) {
3712 SHRINK;
3713 GROW;
3714 cur = CUR_CHAR(l);
3715 }
3716 }
3717 buf[len] = 0;
3718 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003719 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3720 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 } else {
3722 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3724 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 SKIP(2);
3727
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003728#ifdef LIBXML_CATALOG_ENABLED
3729 if (((state == XML_PARSER_MISC) ||
3730 (state == XML_PARSER_START)) &&
3731 (xmlStrEqual(target, XML_CATALOG_PI))) {
3732 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3733 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3734 (allow == XML_CATA_ALLOW_ALL))
3735 xmlParseCatalogPI(ctxt, buf);
3736 }
3737#endif
3738
3739
Owen Taylor3473f882001-02-23 17:55:21 +00003740 /*
3741 * SAX: PI detected.
3742 */
3743 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3744 (ctxt->sax->processingInstruction != NULL))
3745 ctxt->sax->processingInstruction(ctxt->userData,
3746 target, buf);
3747 }
3748 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003749 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003750 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003751 }
3752 ctxt->instate = state;
3753 }
3754}
3755
3756/**
3757 * xmlParseNotationDecl:
3758 * @ctxt: an XML parser context
3759 *
3760 * parse a notation declaration
3761 *
3762 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3763 *
3764 * Hence there is actually 3 choices:
3765 * 'PUBLIC' S PubidLiteral
3766 * 'PUBLIC' S PubidLiteral S SystemLiteral
3767 * and 'SYSTEM' S SystemLiteral
3768 *
3769 * See the NOTE on xmlParseExternalID().
3770 */
3771
3772void
3773xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003774 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 xmlChar *Pubid;
3776 xmlChar *Systemid;
3777
Daniel Veillarda07050d2003-10-19 14:46:32 +00003778 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003779 xmlParserInputPtr input = ctxt->input;
3780 SHRINK;
3781 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003782 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003783 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3784 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003785 return;
3786 }
3787 SKIP_BLANKS;
3788
Daniel Veillard76d66f42001-05-16 21:05:17 +00003789 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003790 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003791 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003792 return;
3793 }
William M. Brack76e95df2003-10-18 16:20:14 +00003794 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003795 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003796 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003797 return;
3798 }
3799 SKIP_BLANKS;
3800
3801 /*
3802 * Parse the IDs.
3803 */
3804 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3805 SKIP_BLANKS;
3806
3807 if (RAW == '>') {
3808 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003809 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3810 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003811 }
3812 NEXT;
3813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3814 (ctxt->sax->notationDecl != NULL))
3815 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3816 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003817 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003818 }
Owen Taylor3473f882001-02-23 17:55:21 +00003819 if (Systemid != NULL) xmlFree(Systemid);
3820 if (Pubid != NULL) xmlFree(Pubid);
3821 }
3822}
3823
3824/**
3825 * xmlParseEntityDecl:
3826 * @ctxt: an XML parser context
3827 *
3828 * parse <!ENTITY declarations
3829 *
3830 * [70] EntityDecl ::= GEDecl | PEDecl
3831 *
3832 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3833 *
3834 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3835 *
3836 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3837 *
3838 * [74] PEDef ::= EntityValue | ExternalID
3839 *
3840 * [76] NDataDecl ::= S 'NDATA' S Name
3841 *
3842 * [ VC: Notation Declared ]
3843 * The Name must match the declared name of a notation.
3844 */
3845
3846void
3847xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003848 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003849 xmlChar *value = NULL;
3850 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003851 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 int isParameter = 0;
3853 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003854 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003855
3856 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003857 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003858 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003859 SHRINK;
3860 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003861 skipped = SKIP_BLANKS;
3862 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003863 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3864 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
Owen Taylor3473f882001-02-23 17:55:21 +00003866
3867 if (RAW == '%') {
3868 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003869 skipped = SKIP_BLANKS;
3870 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003871 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3872 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003873 }
Owen Taylor3473f882001-02-23 17:55:21 +00003874 isParameter = 1;
3875 }
3876
Daniel Veillard76d66f42001-05-16 21:05:17 +00003877 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003879 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3880 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003881 return;
3882 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003883 skipped = SKIP_BLANKS;
3884 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003885 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3886 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
Owen Taylor3473f882001-02-23 17:55:21 +00003888
Daniel Veillardf5582f12002-06-11 10:08:16 +00003889 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003890 /*
3891 * handle the various case of definitions...
3892 */
3893 if (isParameter) {
3894 if ((RAW == '"') || (RAW == '\'')) {
3895 value = xmlParseEntityValue(ctxt, &orig);
3896 if (value) {
3897 if ((ctxt->sax != NULL) &&
3898 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3899 ctxt->sax->entityDecl(ctxt->userData, name,
3900 XML_INTERNAL_PARAMETER_ENTITY,
3901 NULL, NULL, value);
3902 }
3903 } else {
3904 URI = xmlParseExternalID(ctxt, &literal, 1);
3905 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003906 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003907 }
3908 if (URI) {
3909 xmlURIPtr uri;
3910
3911 uri = xmlParseURI((const char *) URI);
3912 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003913 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3914 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003915 /*
3916 * This really ought to be a well formedness error
3917 * but the XML Core WG decided otherwise c.f. issue
3918 * E26 of the XML erratas.
3919 */
Owen Taylor3473f882001-02-23 17:55:21 +00003920 } else {
3921 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003922 /*
3923 * Okay this is foolish to block those but not
3924 * invalid URIs.
3925 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 } else {
3928 if ((ctxt->sax != NULL) &&
3929 (!ctxt->disableSAX) &&
3930 (ctxt->sax->entityDecl != NULL))
3931 ctxt->sax->entityDecl(ctxt->userData, name,
3932 XML_EXTERNAL_PARAMETER_ENTITY,
3933 literal, URI, NULL);
3934 }
3935 xmlFreeURI(uri);
3936 }
3937 }
3938 }
3939 } else {
3940 if ((RAW == '"') || (RAW == '\'')) {
3941 value = xmlParseEntityValue(ctxt, &orig);
3942 if ((ctxt->sax != NULL) &&
3943 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3944 ctxt->sax->entityDecl(ctxt->userData, name,
3945 XML_INTERNAL_GENERAL_ENTITY,
3946 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003947 /*
3948 * For expat compatibility in SAX mode.
3949 */
3950 if ((ctxt->myDoc == NULL) ||
3951 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3952 if (ctxt->myDoc == NULL) {
3953 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3954 }
3955 if (ctxt->myDoc->intSubset == NULL)
3956 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3957 BAD_CAST "fake", NULL, NULL);
3958
Daniel Veillard1af9a412003-08-20 22:54:39 +00003959 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3960 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003961 }
Owen Taylor3473f882001-02-23 17:55:21 +00003962 } else {
3963 URI = xmlParseExternalID(ctxt, &literal, 1);
3964 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003965 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003966 }
3967 if (URI) {
3968 xmlURIPtr uri;
3969
3970 uri = xmlParseURI((const char *)URI);
3971 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003972 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3973 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003974 /*
3975 * This really ought to be a well formedness error
3976 * but the XML Core WG decided otherwise c.f. issue
3977 * E26 of the XML erratas.
3978 */
Owen Taylor3473f882001-02-23 17:55:21 +00003979 } else {
3980 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003981 /*
3982 * Okay this is foolish to block those but not
3983 * invalid URIs.
3984 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003985 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003986 }
3987 xmlFreeURI(uri);
3988 }
3989 }
William M. Brack76e95df2003-10-18 16:20:14 +00003990 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003991 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3992 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003993 }
3994 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003995 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003996 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003997 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3999 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004000 }
4001 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004002 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4004 (ctxt->sax->unparsedEntityDecl != NULL))
4005 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4006 literal, URI, ndata);
4007 } else {
4008 if ((ctxt->sax != NULL) &&
4009 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4010 ctxt->sax->entityDecl(ctxt->userData, name,
4011 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4012 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004013 /*
4014 * For expat compatibility in SAX mode.
4015 * assuming the entity repalcement was asked for
4016 */
4017 if ((ctxt->replaceEntities != 0) &&
4018 ((ctxt->myDoc == NULL) ||
4019 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4020 if (ctxt->myDoc == NULL) {
4021 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4022 }
4023
4024 if (ctxt->myDoc->intSubset == NULL)
4025 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4026 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004027 xmlSAX2EntityDecl(ctxt, name,
4028 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4029 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004030 }
Owen Taylor3473f882001-02-23 17:55:21 +00004031 }
4032 }
4033 }
4034 SKIP_BLANKS;
4035 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004036 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004037 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004038 } else {
4039 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004040 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4041 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004042 }
4043 NEXT;
4044 }
4045 if (orig != NULL) {
4046 /*
4047 * Ugly mechanism to save the raw entity value.
4048 */
4049 xmlEntityPtr cur = NULL;
4050
4051 if (isParameter) {
4052 if ((ctxt->sax != NULL) &&
4053 (ctxt->sax->getParameterEntity != NULL))
4054 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4055 } else {
4056 if ((ctxt->sax != NULL) &&
4057 (ctxt->sax->getEntity != NULL))
4058 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004059 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004060 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004061 }
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 if (cur != NULL) {
4064 if (cur->orig != NULL)
4065 xmlFree(orig);
4066 else
4067 cur->orig = orig;
4068 } else
4069 xmlFree(orig);
4070 }
Owen Taylor3473f882001-02-23 17:55:21 +00004071 if (value != NULL) xmlFree(value);
4072 if (URI != NULL) xmlFree(URI);
4073 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004074 }
4075}
4076
4077/**
4078 * xmlParseDefaultDecl:
4079 * @ctxt: an XML parser context
4080 * @value: Receive a possible fixed default value for the attribute
4081 *
4082 * Parse an attribute default declaration
4083 *
4084 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4085 *
4086 * [ VC: Required Attribute ]
4087 * if the default declaration is the keyword #REQUIRED, then the
4088 * attribute must be specified for all elements of the type in the
4089 * attribute-list declaration.
4090 *
4091 * [ VC: Attribute Default Legal ]
4092 * The declared default value must meet the lexical constraints of
4093 * the declared attribute type c.f. xmlValidateAttributeDecl()
4094 *
4095 * [ VC: Fixed Attribute Default ]
4096 * if an attribute has a default value declared with the #FIXED
4097 * keyword, instances of that attribute must match the default value.
4098 *
4099 * [ WFC: No < in Attribute Values ]
4100 * handled in xmlParseAttValue()
4101 *
4102 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4103 * or XML_ATTRIBUTE_FIXED.
4104 */
4105
4106int
4107xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4108 int val;
4109 xmlChar *ret;
4110
4111 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004112 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004113 SKIP(9);
4114 return(XML_ATTRIBUTE_REQUIRED);
4115 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004116 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004117 SKIP(8);
4118 return(XML_ATTRIBUTE_IMPLIED);
4119 }
4120 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004121 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004122 SKIP(6);
4123 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004124 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004125 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4126 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004127 }
4128 SKIP_BLANKS;
4129 }
4130 ret = xmlParseAttValue(ctxt);
4131 ctxt->instate = XML_PARSER_DTD;
4132 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004133 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004134 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004135 } else
4136 *value = ret;
4137 return(val);
4138}
4139
4140/**
4141 * xmlParseNotationType:
4142 * @ctxt: an XML parser context
4143 *
4144 * parse an Notation attribute type.
4145 *
4146 * Note: the leading 'NOTATION' S part has already being parsed...
4147 *
4148 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4149 *
4150 * [ VC: Notation Attributes ]
4151 * Values of this type must match one of the notation names included
4152 * in the declaration; all notation names in the declaration must be declared.
4153 *
4154 * Returns: the notation attribute tree built while parsing
4155 */
4156
4157xmlEnumerationPtr
4158xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004159 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004160 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4161
4162 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004163 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004164 return(NULL);
4165 }
4166 SHRINK;
4167 do {
4168 NEXT;
4169 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004170 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004171 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4173 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004174 return(ret);
4175 }
4176 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004177 if (cur == NULL) return(ret);
4178 if (last == NULL) ret = last = cur;
4179 else {
4180 last->next = cur;
4181 last = cur;
4182 }
4183 SKIP_BLANKS;
4184 } while (RAW == '|');
4185 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 if ((last != NULL) && (last != ret))
4188 xmlFreeEnumeration(last);
4189 return(ret);
4190 }
4191 NEXT;
4192 return(ret);
4193}
4194
4195/**
4196 * xmlParseEnumerationType:
4197 * @ctxt: an XML parser context
4198 *
4199 * parse an Enumeration attribute type.
4200 *
4201 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4202 *
4203 * [ VC: Enumeration ]
4204 * Values of this type must match one of the Nmtoken tokens in
4205 * the declaration
4206 *
4207 * Returns: the enumeration attribute tree built while parsing
4208 */
4209
4210xmlEnumerationPtr
4211xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4212 xmlChar *name;
4213 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4214
4215 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004216 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 return(NULL);
4218 }
4219 SHRINK;
4220 do {
4221 NEXT;
4222 SKIP_BLANKS;
4223 name = xmlParseNmtoken(ctxt);
4224 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004225 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 return(ret);
4227 }
4228 cur = xmlCreateEnumeration(name);
4229 xmlFree(name);
4230 if (cur == NULL) return(ret);
4231 if (last == NULL) ret = last = cur;
4232 else {
4233 last->next = cur;
4234 last = cur;
4235 }
4236 SKIP_BLANKS;
4237 } while (RAW == '|');
4238 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004239 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 return(ret);
4241 }
4242 NEXT;
4243 return(ret);
4244}
4245
4246/**
4247 * xmlParseEnumeratedType:
4248 * @ctxt: an XML parser context
4249 * @tree: the enumeration tree built while parsing
4250 *
4251 * parse an Enumerated attribute type.
4252 *
4253 * [57] EnumeratedType ::= NotationType | Enumeration
4254 *
4255 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4256 *
4257 *
4258 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4259 */
4260
4261int
4262xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004263 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004265 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4267 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004268 return(0);
4269 }
4270 SKIP_BLANKS;
4271 *tree = xmlParseNotationType(ctxt);
4272 if (*tree == NULL) return(0);
4273 return(XML_ATTRIBUTE_NOTATION);
4274 }
4275 *tree = xmlParseEnumerationType(ctxt);
4276 if (*tree == NULL) return(0);
4277 return(XML_ATTRIBUTE_ENUMERATION);
4278}
4279
4280/**
4281 * xmlParseAttributeType:
4282 * @ctxt: an XML parser context
4283 * @tree: the enumeration tree built while parsing
4284 *
4285 * parse the Attribute list def for an element
4286 *
4287 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4288 *
4289 * [55] StringType ::= 'CDATA'
4290 *
4291 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4292 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4293 *
4294 * Validity constraints for attribute values syntax are checked in
4295 * xmlValidateAttributeValue()
4296 *
4297 * [ VC: ID ]
4298 * Values of type ID must match the Name production. A name must not
4299 * appear more than once in an XML document as a value of this type;
4300 * i.e., ID values must uniquely identify the elements which bear them.
4301 *
4302 * [ VC: One ID per Element Type ]
4303 * No element type may have more than one ID attribute specified.
4304 *
4305 * [ VC: ID Attribute Default ]
4306 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4307 *
4308 * [ VC: IDREF ]
4309 * Values of type IDREF must match the Name production, and values
4310 * of type IDREFS must match Names; each IDREF Name must match the value
4311 * of an ID attribute on some element in the XML document; i.e. IDREF
4312 * values must match the value of some ID attribute.
4313 *
4314 * [ VC: Entity Name ]
4315 * Values of type ENTITY must match the Name production, values
4316 * of type ENTITIES must match Names; each Entity Name must match the
4317 * name of an unparsed entity declared in the DTD.
4318 *
4319 * [ VC: Name Token ]
4320 * Values of type NMTOKEN must match the Nmtoken production; values
4321 * of type NMTOKENS must match Nmtokens.
4322 *
4323 * Returns the attribute type
4324 */
4325int
4326xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4327 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004328 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004329 SKIP(5);
4330 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004331 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004332 SKIP(6);
4333 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004334 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004335 SKIP(5);
4336 return(XML_ATTRIBUTE_IDREF);
4337 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4338 SKIP(2);
4339 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004340 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004341 SKIP(6);
4342 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004343 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004344 SKIP(8);
4345 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004346 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004347 SKIP(8);
4348 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004349 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004350 SKIP(7);
4351 return(XML_ATTRIBUTE_NMTOKEN);
4352 }
4353 return(xmlParseEnumeratedType(ctxt, tree));
4354}
4355
4356/**
4357 * xmlParseAttributeListDecl:
4358 * @ctxt: an XML parser context
4359 *
4360 * : parse the Attribute list def for an element
4361 *
4362 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4363 *
4364 * [53] AttDef ::= S Name S AttType S DefaultDecl
4365 *
4366 */
4367void
4368xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004369 const xmlChar *elemName;
4370 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 xmlEnumerationPtr tree;
4372
Daniel Veillarda07050d2003-10-19 14:46:32 +00004373 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlParserInputPtr input = ctxt->input;
4375
4376 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004377 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004378 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004379 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004380 }
4381 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004382 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004383 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004384 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4385 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004386 return;
4387 }
4388 SKIP_BLANKS;
4389 GROW;
4390 while (RAW != '>') {
4391 const xmlChar *check = CUR_PTR;
4392 int type;
4393 int def;
4394 xmlChar *defaultValue = NULL;
4395
4396 GROW;
4397 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004398 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004400 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4401 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004402 break;
4403 }
4404 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004405 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004406 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004407 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004408 if (defaultValue != NULL)
4409 xmlFree(defaultValue);
4410 break;
4411 }
4412 SKIP_BLANKS;
4413
4414 type = xmlParseAttributeType(ctxt, &tree);
4415 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004416 if (defaultValue != NULL)
4417 xmlFree(defaultValue);
4418 break;
4419 }
4420
4421 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004422 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4424 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004425 if (defaultValue != NULL)
4426 xmlFree(defaultValue);
4427 if (tree != NULL)
4428 xmlFreeEnumeration(tree);
4429 break;
4430 }
4431 SKIP_BLANKS;
4432
4433 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4434 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004435 if (defaultValue != NULL)
4436 xmlFree(defaultValue);
4437 if (tree != NULL)
4438 xmlFreeEnumeration(tree);
4439 break;
4440 }
4441
4442 GROW;
4443 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004444 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004445 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004446 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004447 if (defaultValue != NULL)
4448 xmlFree(defaultValue);
4449 if (tree != NULL)
4450 xmlFreeEnumeration(tree);
4451 break;
4452 }
4453 SKIP_BLANKS;
4454 }
4455 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004456 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4457 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004458 if (defaultValue != NULL)
4459 xmlFree(defaultValue);
4460 if (tree != NULL)
4461 xmlFreeEnumeration(tree);
4462 break;
4463 }
4464 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4465 (ctxt->sax->attributeDecl != NULL))
4466 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4467 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004468 else if (tree != NULL)
4469 xmlFreeEnumeration(tree);
4470
4471 if ((ctxt->sax2) && (defaultValue != NULL) &&
4472 (def != XML_ATTRIBUTE_IMPLIED) &&
4473 (def != XML_ATTRIBUTE_REQUIRED)) {
4474 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4475 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004476 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4477 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4478 }
Owen Taylor3473f882001-02-23 17:55:21 +00004479 if (defaultValue != NULL)
4480 xmlFree(defaultValue);
4481 GROW;
4482 }
4483 if (RAW == '>') {
4484 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004485 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4486 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004487 }
4488 NEXT;
4489 }
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491}
4492
4493/**
4494 * xmlParseElementMixedContentDecl:
4495 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004496 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004497 *
4498 * parse the declaration for a Mixed Element content
4499 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4500 *
4501 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4502 * '(' S? '#PCDATA' S? ')'
4503 *
4504 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4505 *
4506 * [ VC: No Duplicate Types ]
4507 * The same name must not appear more than once in a single
4508 * mixed-content declaration.
4509 *
4510 * returns: the list of the xmlElementContentPtr describing the element choices
4511 */
4512xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004513xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004514 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004515 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004516
4517 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004518 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004519 SKIP(7);
4520 SKIP_BLANKS;
4521 SHRINK;
4522 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004523 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004524 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4525"Element content declaration doesn't start and stop in the same entity\n",
4526 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004527 }
Owen Taylor3473f882001-02-23 17:55:21 +00004528 NEXT;
4529 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4530 if (RAW == '*') {
4531 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4532 NEXT;
4533 }
4534 return(ret);
4535 }
4536 if ((RAW == '(') || (RAW == '|')) {
4537 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4538 if (ret == NULL) return(NULL);
4539 }
4540 while (RAW == '|') {
4541 NEXT;
4542 if (elem == NULL) {
4543 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4544 if (ret == NULL) return(NULL);
4545 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004546 if (cur != NULL)
4547 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004548 cur = ret;
4549 } else {
4550 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4551 if (n == NULL) return(NULL);
4552 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004553 if (n->c1 != NULL)
4554 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004556 if (n != NULL)
4557 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004559 }
4560 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004561 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004562 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004563 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004564 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004565 xmlFreeElementContent(cur);
4566 return(NULL);
4567 }
4568 SKIP_BLANKS;
4569 GROW;
4570 }
4571 if ((RAW == ')') && (NXT(1) == '*')) {
4572 if (elem != NULL) {
4573 cur->c2 = xmlNewElementContent(elem,
4574 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004575 if (cur->c2 != NULL)
4576 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004577 }
4578 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004579 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004580 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4581"Element content declaration doesn't start and stop in the same entity\n",
4582 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004583 }
Owen Taylor3473f882001-02-23 17:55:21 +00004584 SKIP(2);
4585 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004587 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 return(NULL);
4589 }
4590
4591 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004592 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004593 }
4594 return(ret);
4595}
4596
4597/**
4598 * xmlParseElementChildrenContentDecl:
4599 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004600 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004601 *
4602 * parse the declaration for a Mixed Element content
4603 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4604 *
4605 *
4606 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4607 *
4608 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4609 *
4610 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4611 *
4612 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4613 *
4614 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4615 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004616 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004617 * opening or closing parentheses in a choice, seq, or Mixed
4618 * construct is contained in the replacement text for a parameter
4619 * entity, both must be contained in the same replacement text. For
4620 * interoperability, if a parameter-entity reference appears in a
4621 * choice, seq, or Mixed construct, its replacement text should not
4622 * be empty, and neither the first nor last non-blank character of
4623 * the replacement text should be a connector (| or ,).
4624 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004625 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004626 * hierarchy.
4627 */
4628xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004630 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004631 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004632 xmlChar type = 0;
4633
4634 SKIP_BLANKS;
4635 GROW;
4636 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004637 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004638
Owen Taylor3473f882001-02-23 17:55:21 +00004639 /* Recurse on first child */
4640 NEXT;
4641 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004642 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004643 SKIP_BLANKS;
4644 GROW;
4645 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004646 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004647 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004648 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004649 return(NULL);
4650 }
4651 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004652 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004653 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004654 return(NULL);
4655 }
Owen Taylor3473f882001-02-23 17:55:21 +00004656 GROW;
4657 if (RAW == '?') {
4658 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4659 NEXT;
4660 } else if (RAW == '*') {
4661 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4662 NEXT;
4663 } else if (RAW == '+') {
4664 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4665 NEXT;
4666 } else {
4667 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4668 }
Owen Taylor3473f882001-02-23 17:55:21 +00004669 GROW;
4670 }
4671 SKIP_BLANKS;
4672 SHRINK;
4673 while (RAW != ')') {
4674 /*
4675 * Each loop we parse one separator and one element.
4676 */
4677 if (RAW == ',') {
4678 if (type == 0) type = CUR;
4679
4680 /*
4681 * Detect "Name | Name , Name" error
4682 */
4683 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004684 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004685 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004687 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004688 xmlFreeElementContent(last);
4689 if (ret != NULL)
4690 xmlFreeElementContent(ret);
4691 return(NULL);
4692 }
4693 NEXT;
4694
4695 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4696 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004697 if ((last != NULL) && (last != ret))
4698 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 xmlFreeElementContent(ret);
4700 return(NULL);
4701 }
4702 if (last == NULL) {
4703 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004704 if (ret != NULL)
4705 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004706 ret = cur = op;
4707 } else {
4708 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004709 if (op != NULL)
4710 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004711 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004712 if (last != NULL)
4713 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004714 cur =op;
4715 last = NULL;
4716 }
4717 } else if (RAW == '|') {
4718 if (type == 0) type = CUR;
4719
4720 /*
4721 * Detect "Name , Name | Name" error
4722 */
4723 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004724 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004725 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004726 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004727 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004728 xmlFreeElementContent(last);
4729 if (ret != NULL)
4730 xmlFreeElementContent(ret);
4731 return(NULL);
4732 }
4733 NEXT;
4734
4735 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4736 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004737 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004738 xmlFreeElementContent(last);
4739 if (ret != NULL)
4740 xmlFreeElementContent(ret);
4741 return(NULL);
4742 }
4743 if (last == NULL) {
4744 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004745 if (ret != NULL)
4746 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004747 ret = cur = op;
4748 } else {
4749 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004750 if (op != NULL)
4751 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004752 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004753 if (last != NULL)
4754 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 cur =op;
4756 last = NULL;
4757 }
4758 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004759 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 if (ret != NULL)
4761 xmlFreeElementContent(ret);
4762 return(NULL);
4763 }
4764 GROW;
4765 SKIP_BLANKS;
4766 GROW;
4767 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004768 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004769 /* Recurse on second child */
4770 NEXT;
4771 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004772 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004773 SKIP_BLANKS;
4774 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004775 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004776 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004777 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004778 if (ret != NULL)
4779 xmlFreeElementContent(ret);
4780 return(NULL);
4781 }
4782 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004783 if (RAW == '?') {
4784 last->ocur = XML_ELEMENT_CONTENT_OPT;
4785 NEXT;
4786 } else if (RAW == '*') {
4787 last->ocur = XML_ELEMENT_CONTENT_MULT;
4788 NEXT;
4789 } else if (RAW == '+') {
4790 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4791 NEXT;
4792 } else {
4793 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4794 }
4795 }
4796 SKIP_BLANKS;
4797 GROW;
4798 }
4799 if ((cur != NULL) && (last != NULL)) {
4800 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004801 if (last != NULL)
4802 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004803 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004804 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004805 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4806"Element content declaration doesn't start and stop in the same entity\n",
4807 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004808 }
Owen Taylor3473f882001-02-23 17:55:21 +00004809 NEXT;
4810 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004811 if (ret != NULL) {
4812 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4813 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4814 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4815 else
4816 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4817 }
Owen Taylor3473f882001-02-23 17:55:21 +00004818 NEXT;
4819 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004820 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004821 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004822 cur = ret;
4823 /*
4824 * Some normalization:
4825 * (a | b* | c?)* == (a | b | c)*
4826 */
4827 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4828 if ((cur->c1 != NULL) &&
4829 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4830 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4831 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4832 if ((cur->c2 != NULL) &&
4833 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4834 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4835 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4836 cur = cur->c2;
4837 }
4838 }
Owen Taylor3473f882001-02-23 17:55:21 +00004839 NEXT;
4840 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004841 if (ret != NULL) {
4842 int found = 0;
4843
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004844 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4845 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4846 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004847 else
4848 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004849 /*
4850 * Some normalization:
4851 * (a | b*)+ == (a | b)*
4852 * (a | b?)+ == (a | b)*
4853 */
4854 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4855 if ((cur->c1 != NULL) &&
4856 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4857 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4858 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4859 found = 1;
4860 }
4861 if ((cur->c2 != NULL) &&
4862 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4863 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4864 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4865 found = 1;
4866 }
4867 cur = cur->c2;
4868 }
4869 if (found)
4870 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4871 }
Owen Taylor3473f882001-02-23 17:55:21 +00004872 NEXT;
4873 }
4874 return(ret);
4875}
4876
4877/**
4878 * xmlParseElementContentDecl:
4879 * @ctxt: an XML parser context
4880 * @name: the name of the element being defined.
4881 * @result: the Element Content pointer will be stored here if any
4882 *
4883 * parse the declaration for an Element content either Mixed or Children,
4884 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4885 *
4886 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4887 *
4888 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4889 */
4890
4891int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004892xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004893 xmlElementContentPtr *result) {
4894
4895 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 int res;
4898
4899 *result = NULL;
4900
4901 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004902 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004903 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004904 return(-1);
4905 }
4906 NEXT;
4907 GROW;
4908 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004909 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004910 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004911 res = XML_ELEMENT_TYPE_MIXED;
4912 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004913 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 res = XML_ELEMENT_TYPE_ELEMENT;
4915 }
Owen Taylor3473f882001-02-23 17:55:21 +00004916 SKIP_BLANKS;
4917 *result = tree;
4918 return(res);
4919}
4920
4921/**
4922 * xmlParseElementDecl:
4923 * @ctxt: an XML parser context
4924 *
4925 * parse an Element declaration.
4926 *
4927 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4928 *
4929 * [ VC: Unique Element Type Declaration ]
4930 * No element type may be declared more than once
4931 *
4932 * Returns the type of the element, or -1 in case of error
4933 */
4934int
4935xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004936 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004937 int ret = -1;
4938 xmlElementContentPtr content = NULL;
4939
4940 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004941 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004942 xmlParserInputPtr input = ctxt->input;
4943
4944 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004945 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004950 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004951 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004952 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4953 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004954 return(-1);
4955 }
4956 while ((RAW == 0) && (ctxt->inputNr > 1))
4957 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4960 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004963 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004964 SKIP(5);
4965 /*
4966 * Element must always be empty.
4967 */
4968 ret = XML_ELEMENT_TYPE_EMPTY;
4969 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4970 (NXT(2) == 'Y')) {
4971 SKIP(3);
4972 /*
4973 * Element is a generic container.
4974 */
4975 ret = XML_ELEMENT_TYPE_ANY;
4976 } else if (RAW == '(') {
4977 ret = xmlParseElementContentDecl(ctxt, name, &content);
4978 } else {
4979 /*
4980 * [ WFC: PEs in Internal Subset ] error handling.
4981 */
4982 if ((RAW == '%') && (ctxt->external == 0) &&
4983 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004984 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004986 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004987 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004988 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4989 }
Owen Taylor3473f882001-02-23 17:55:21 +00004990 return(-1);
4991 }
4992
4993 SKIP_BLANKS;
4994 /*
4995 * Pop-up of finished entities.
4996 */
4997 while ((RAW == 0) && (ctxt->inputNr > 1))
4998 xmlPopInput(ctxt);
4999 SKIP_BLANKS;
5000
5001 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005002 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005003 } else {
5004 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005005 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5006 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005007 }
5008
5009 NEXT;
5010 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5011 (ctxt->sax->elementDecl != NULL))
5012 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5013 content);
5014 }
5015 if (content != NULL) {
5016 xmlFreeElementContent(content);
5017 }
Owen Taylor3473f882001-02-23 17:55:21 +00005018 }
5019 return(ret);
5020}
5021
5022/**
Owen Taylor3473f882001-02-23 17:55:21 +00005023 * xmlParseConditionalSections
5024 * @ctxt: an XML parser context
5025 *
5026 * [61] conditionalSect ::= includeSect | ignoreSect
5027 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5028 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5029 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5030 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5031 */
5032
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005033static void
Owen Taylor3473f882001-02-23 17:55:21 +00005034xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5035 SKIP(3);
5036 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005037 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005038 SKIP(7);
5039 SKIP_BLANKS;
5040 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005041 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005042 } else {
5043 NEXT;
5044 }
5045 if (xmlParserDebugEntities) {
5046 if ((ctxt->input != NULL) && (ctxt->input->filename))
5047 xmlGenericError(xmlGenericErrorContext,
5048 "%s(%d): ", ctxt->input->filename,
5049 ctxt->input->line);
5050 xmlGenericError(xmlGenericErrorContext,
5051 "Entering INCLUDE Conditional Section\n");
5052 }
5053
5054 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5055 (NXT(2) != '>'))) {
5056 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005057 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005058
5059 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5060 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005061 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005062 NEXT;
5063 } else if (RAW == '%') {
5064 xmlParsePEReference(ctxt);
5065 } else
5066 xmlParseMarkupDecl(ctxt);
5067
5068 /*
5069 * Pop-up of finished entities.
5070 */
5071 while ((RAW == 0) && (ctxt->inputNr > 1))
5072 xmlPopInput(ctxt);
5073
Daniel Veillardfdc91562002-07-01 21:52:03 +00005074 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005075 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 break;
5077 }
5078 }
5079 if (xmlParserDebugEntities) {
5080 if ((ctxt->input != NULL) && (ctxt->input->filename))
5081 xmlGenericError(xmlGenericErrorContext,
5082 "%s(%d): ", ctxt->input->filename,
5083 ctxt->input->line);
5084 xmlGenericError(xmlGenericErrorContext,
5085 "Leaving INCLUDE Conditional Section\n");
5086 }
5087
Daniel Veillarda07050d2003-10-19 14:46:32 +00005088 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005089 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005090 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 int depth = 0;
5092
5093 SKIP(6);
5094 SKIP_BLANKS;
5095 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005096 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 } else {
5098 NEXT;
5099 }
5100 if (xmlParserDebugEntities) {
5101 if ((ctxt->input != NULL) && (ctxt->input->filename))
5102 xmlGenericError(xmlGenericErrorContext,
5103 "%s(%d): ", ctxt->input->filename,
5104 ctxt->input->line);
5105 xmlGenericError(xmlGenericErrorContext,
5106 "Entering IGNORE Conditional Section\n");
5107 }
5108
5109 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005110 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005111 * But disable SAX event generating DTD building in the meantime
5112 */
5113 state = ctxt->disableSAX;
5114 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005115 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 ctxt->instate = XML_PARSER_IGNORE;
5117
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005118 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005119 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5120 depth++;
5121 SKIP(3);
5122 continue;
5123 }
5124 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5125 if (--depth >= 0) SKIP(3);
5126 continue;
5127 }
5128 NEXT;
5129 continue;
5130 }
5131
5132 ctxt->disableSAX = state;
5133 ctxt->instate = instate;
5134
5135 if (xmlParserDebugEntities) {
5136 if ((ctxt->input != NULL) && (ctxt->input->filename))
5137 xmlGenericError(xmlGenericErrorContext,
5138 "%s(%d): ", ctxt->input->filename,
5139 ctxt->input->line);
5140 xmlGenericError(xmlGenericErrorContext,
5141 "Leaving IGNORE Conditional Section\n");
5142 }
5143
5144 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005145 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
5147
5148 if (RAW == 0)
5149 SHRINK;
5150
5151 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005152 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 } else {
5154 SKIP(3);
5155 }
5156}
5157
5158/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005159 * xmlParseMarkupDecl:
5160 * @ctxt: an XML parser context
5161 *
5162 * parse Markup declarations
5163 *
5164 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5165 * NotationDecl | PI | Comment
5166 *
5167 * [ VC: Proper Declaration/PE Nesting ]
5168 * Parameter-entity replacement text must be properly nested with
5169 * markup declarations. That is to say, if either the first character
5170 * or the last character of a markup declaration (markupdecl above) is
5171 * contained in the replacement text for a parameter-entity reference,
5172 * both must be contained in the same replacement text.
5173 *
5174 * [ WFC: PEs in Internal Subset ]
5175 * In the internal DTD subset, parameter-entity references can occur
5176 * only where markup declarations can occur, not within markup declarations.
5177 * (This does not apply to references that occur in external parameter
5178 * entities or to the external subset.)
5179 */
5180void
5181xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5182 GROW;
5183 xmlParseElementDecl(ctxt);
5184 xmlParseAttributeListDecl(ctxt);
5185 xmlParseEntityDecl(ctxt);
5186 xmlParseNotationDecl(ctxt);
5187 xmlParsePI(ctxt);
5188 xmlParseComment(ctxt);
5189 /*
5190 * This is only for internal subset. On external entities,
5191 * the replacement is done before parsing stage
5192 */
5193 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5194 xmlParsePEReference(ctxt);
5195
5196 /*
5197 * Conditional sections are allowed from entities included
5198 * by PE References in the internal subset.
5199 */
5200 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5201 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5202 xmlParseConditionalSections(ctxt);
5203 }
5204 }
5205
5206 ctxt->instate = XML_PARSER_DTD;
5207}
5208
5209/**
5210 * xmlParseTextDecl:
5211 * @ctxt: an XML parser context
5212 *
5213 * parse an XML declaration header for external entities
5214 *
5215 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5216 *
5217 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5218 */
5219
5220void
5221xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5222 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005223 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005224
5225 /*
5226 * We know that '<?xml' is here.
5227 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005228 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005229 SKIP(5);
5230 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005231 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005232 return;
5233 }
5234
William M. Brack76e95df2003-10-18 16:20:14 +00005235 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005236 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5237 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005238 }
5239 SKIP_BLANKS;
5240
5241 /*
5242 * We may have the VersionInfo here.
5243 */
5244 version = xmlParseVersionInfo(ctxt);
5245 if (version == NULL)
5246 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005247 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005248 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005249 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5250 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005251 }
5252 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005253 ctxt->input->version = version;
5254
5255 /*
5256 * We must have the encoding declaration
5257 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005258 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005259 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5260 /*
5261 * The XML REC instructs us to stop parsing right here
5262 */
5263 return;
5264 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005265 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5266 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5267 "Missing encoding in text declaration\n");
5268 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005269
5270 SKIP_BLANKS;
5271 if ((RAW == '?') && (NXT(1) == '>')) {
5272 SKIP(2);
5273 } else if (RAW == '>') {
5274 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005275 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005278 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005279 MOVETO_ENDTAG(CUR_PTR);
5280 NEXT;
5281 }
5282}
5283
5284/**
Owen Taylor3473f882001-02-23 17:55:21 +00005285 * xmlParseExternalSubset:
5286 * @ctxt: an XML parser context
5287 * @ExternalID: the external identifier
5288 * @SystemID: the system identifier (or URL)
5289 *
5290 * parse Markup declarations from an external subset
5291 *
5292 * [30] extSubset ::= textDecl? extSubsetDecl
5293 *
5294 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5295 */
5296void
5297xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5298 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005299 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005300 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005301 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005302 xmlParseTextDecl(ctxt);
5303 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5304 /*
5305 * The XML REC instructs us to stop parsing right here
5306 */
5307 ctxt->instate = XML_PARSER_EOF;
5308 return;
5309 }
5310 }
5311 if (ctxt->myDoc == NULL) {
5312 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5313 }
5314 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5315 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5316
5317 ctxt->instate = XML_PARSER_DTD;
5318 ctxt->external = 1;
5319 while (((RAW == '<') && (NXT(1) == '?')) ||
5320 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005321 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005322 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005323 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005324
5325 GROW;
5326 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5327 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005328 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005329 NEXT;
5330 } else if (RAW == '%') {
5331 xmlParsePEReference(ctxt);
5332 } else
5333 xmlParseMarkupDecl(ctxt);
5334
5335 /*
5336 * Pop-up of finished entities.
5337 */
5338 while ((RAW == 0) && (ctxt->inputNr > 1))
5339 xmlPopInput(ctxt);
5340
Daniel Veillardfdc91562002-07-01 21:52:03 +00005341 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 break;
5344 }
5345 }
5346
5347 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005348 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005349 }
5350
5351}
5352
5353/**
5354 * xmlParseReference:
5355 * @ctxt: an XML parser context
5356 *
5357 * parse and handle entity references in content, depending on the SAX
5358 * interface, this may end-up in a call to character() if this is a
5359 * CharRef, a predefined entity, if there is no reference() callback.
5360 * or if the parser was asked to switch to that mode.
5361 *
5362 * [67] Reference ::= EntityRef | CharRef
5363 */
5364void
5365xmlParseReference(xmlParserCtxtPtr ctxt) {
5366 xmlEntityPtr ent;
5367 xmlChar *val;
5368 if (RAW != '&') return;
5369
5370 if (NXT(1) == '#') {
5371 int i = 0;
5372 xmlChar out[10];
5373 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005374 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005375
5376 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5377 /*
5378 * So we are using non-UTF-8 buffers
5379 * Check that the char fit on 8bits, if not
5380 * generate a CharRef.
5381 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005382 if (value <= 0xFF) {
5383 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005384 out[1] = 0;
5385 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5386 (!ctxt->disableSAX))
5387 ctxt->sax->characters(ctxt->userData, out, 1);
5388 } else {
5389 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005390 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005391 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005392 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005393 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5394 (!ctxt->disableSAX))
5395 ctxt->sax->reference(ctxt->userData, out);
5396 }
5397 } else {
5398 /*
5399 * Just encode the value in UTF-8
5400 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005401 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005402 out[i] = 0;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5404 (!ctxt->disableSAX))
5405 ctxt->sax->characters(ctxt->userData, out, i);
5406 }
5407 } else {
5408 ent = xmlParseEntityRef(ctxt);
5409 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005410 if (!ctxt->wellFormed)
5411 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005412 if ((ent->name != NULL) &&
5413 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5414 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005415 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005416
5417
5418 /*
5419 * The first reference to the entity trigger a parsing phase
5420 * where the ent->children is filled with the result from
5421 * the parsing.
5422 */
5423 if (ent->children == NULL) {
5424 xmlChar *value;
5425 value = ent->content;
5426
5427 /*
5428 * Check that this entity is well formed
5429 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005430 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005431 (value[1] == 0) && (value[0] == '<') &&
5432 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5433 /*
5434 * DONE: get definite answer on this !!!
5435 * Lots of entity decls are used to declare a single
5436 * char
5437 * <!ENTITY lt "<">
5438 * Which seems to be valid since
5439 * 2.4: The ampersand character (&) and the left angle
5440 * bracket (<) may appear in their literal form only
5441 * when used ... They are also legal within the literal
5442 * entity value of an internal entity declaration;i
5443 * see "4.3.2 Well-Formed Parsed Entities".
5444 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5445 * Looking at the OASIS test suite and James Clark
5446 * tests, this is broken. However the XML REC uses
5447 * it. Is the XML REC not well-formed ????
5448 * This is a hack to avoid this problem
5449 *
5450 * ANSWER: since lt gt amp .. are already defined,
5451 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005452 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005453 * is lousy but acceptable.
5454 */
5455 list = xmlNewDocText(ctxt->myDoc, value);
5456 if (list != NULL) {
5457 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5458 (ent->children == NULL)) {
5459 ent->children = list;
5460 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005461 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005462 list->parent = (xmlNodePtr) ent;
5463 } else {
5464 xmlFreeNodeList(list);
5465 }
5466 } else if (list != NULL) {
5467 xmlFreeNodeList(list);
5468 }
5469 } else {
5470 /*
5471 * 4.3.2: An internal general parsed entity is well-formed
5472 * if its replacement text matches the production labeled
5473 * content.
5474 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005475
5476 void *user_data;
5477 /*
5478 * This is a bit hackish but this seems the best
5479 * way to make sure both SAX and DOM entity support
5480 * behaves okay.
5481 */
5482 if (ctxt->userData == ctxt)
5483 user_data = NULL;
5484 else
5485 user_data = ctxt->userData;
5486
Owen Taylor3473f882001-02-23 17:55:21 +00005487 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5488 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005489 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5490 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 ctxt->depth--;
5492 } else if (ent->etype ==
5493 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5494 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005495 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005496 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005497 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 ctxt->depth--;
5499 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005500 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005501 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5502 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 }
5504 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005505 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005506 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005507 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005508 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5509 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005510 (ent->children == NULL)) {
5511 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005512 if (ctxt->replaceEntities) {
5513 /*
5514 * Prune it directly in the generated document
5515 * except for single text nodes.
5516 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005517 if (((list->type == XML_TEXT_NODE) &&
5518 (list->next == NULL)) ||
5519 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005520 list->parent = (xmlNodePtr) ent;
5521 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005522 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005523 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005524 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005525 while (list != NULL) {
5526 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005527 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005528 if (list->next == NULL)
5529 ent->last = list;
5530 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005531 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005532 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005533#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005534 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5535 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005536#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005537 }
5538 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005539 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005540 while (list != NULL) {
5541 list->parent = (xmlNodePtr) ent;
5542 if (list->next == NULL)
5543 ent->last = list;
5544 list = list->next;
5545 }
Owen Taylor3473f882001-02-23 17:55:21 +00005546 }
5547 } else {
5548 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005549 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005550 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005551 } else if ((ret != XML_ERR_OK) &&
5552 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005553 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 } else if (list != NULL) {
5555 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005557 }
5558 }
5559 }
5560 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5561 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5562 /*
5563 * Create a node.
5564 */
5565 ctxt->sax->reference(ctxt->userData, ent->name);
5566 return;
5567 } else if (ctxt->replaceEntities) {
5568 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5569 /*
5570 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005571 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005572 * In the first occurrence list contains the replacement.
5573 * progressive == 2 means we are operating on the Reader
5574 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005575 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005576 if (((list == NULL) && (ent->owner == 0)) ||
5577 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005578 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005579
5580 /*
5581 * when operating on a reader, the entities definitions
5582 * are always owning the entities subtree.
5583 if (ctxt->parseMode == XML_PARSE_READER)
5584 ent->owner = 1;
5585 */
5586
Daniel Veillard62f313b2001-07-04 19:49:14 +00005587 cur = ent->children;
5588 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005589 nw = xmlCopyNode(cur, 1);
5590 if (nw != NULL) {
5591 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005592 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005593 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005594 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005595 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005596 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005597 if (cur == ent->last) {
5598 /*
5599 * needed to detect some strange empty
5600 * node cases in the reader tests
5601 */
5602 if ((ctxt->parseMode == XML_PARSE_READER) &&
5603 (nw->type == XML_ELEMENT_NODE) &&
5604 (nw->children == NULL))
5605 nw->extra = 1;
5606
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005608 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005609 cur = cur->next;
5610 }
Daniel Veillard81273902003-09-30 00:43:48 +00005611#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005612 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005613 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005614#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005615 } else if (list == NULL) {
5616 xmlNodePtr nw = NULL, cur, next, last,
5617 firstChild = NULL;
5618 /*
5619 * Copy the entity child list and make it the new
5620 * entity child list. The goal is to make sure any
5621 * ID or REF referenced will be the one from the
5622 * document content and not the entity copy.
5623 */
5624 cur = ent->children;
5625 ent->children = NULL;
5626 last = ent->last;
5627 ent->last = NULL;
5628 while (cur != NULL) {
5629 next = cur->next;
5630 cur->next = NULL;
5631 cur->parent = NULL;
5632 nw = xmlCopyNode(cur, 1);
5633 if (nw != NULL) {
5634 nw->_private = cur->_private;
5635 if (firstChild == NULL){
5636 firstChild = cur;
5637 }
5638 xmlAddChild((xmlNodePtr) ent, nw);
5639 xmlAddChild(ctxt->node, cur);
5640 }
5641 if (cur == last)
5642 break;
5643 cur = next;
5644 }
5645 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005646#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005647 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5648 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005649#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005650 } else {
5651 /*
5652 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005653 * node with a possible previous text one which
5654 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005655 */
5656 if (ent->children->type == XML_TEXT_NODE)
5657 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5658 if ((ent->last != ent->children) &&
5659 (ent->last->type == XML_TEXT_NODE))
5660 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5661 xmlAddChildList(ctxt->node, ent->children);
5662 }
5663
Owen Taylor3473f882001-02-23 17:55:21 +00005664 /*
5665 * This is to avoid a nasty side effect, see
5666 * characters() in SAX.c
5667 */
5668 ctxt->nodemem = 0;
5669 ctxt->nodelen = 0;
5670 return;
5671 } else {
5672 /*
5673 * Probably running in SAX mode
5674 */
5675 xmlParserInputPtr input;
5676
5677 input = xmlNewEntityInputStream(ctxt, ent);
5678 xmlPushInput(ctxt, input);
5679 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005680 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5681 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005682 xmlParseTextDecl(ctxt);
5683 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5684 /*
5685 * The XML REC instructs us to stop parsing right here
5686 */
5687 ctxt->instate = XML_PARSER_EOF;
5688 return;
5689 }
5690 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005691 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5692 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005693 }
5694 }
5695 return;
5696 }
5697 }
5698 } else {
5699 val = ent->content;
5700 if (val == NULL) return;
5701 /*
5702 * inline the entity.
5703 */
5704 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5705 (!ctxt->disableSAX))
5706 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5707 }
5708 }
5709}
5710
5711/**
5712 * xmlParseEntityRef:
5713 * @ctxt: an XML parser context
5714 *
5715 * parse ENTITY references declarations
5716 *
5717 * [68] EntityRef ::= '&' Name ';'
5718 *
5719 * [ WFC: Entity Declared ]
5720 * In a document without any DTD, a document with only an internal DTD
5721 * subset which contains no parameter entity references, or a document
5722 * with "standalone='yes'", the Name given in the entity reference
5723 * must match that in an entity declaration, except that well-formed
5724 * documents need not declare any of the following entities: amp, lt,
5725 * gt, apos, quot. The declaration of a parameter entity must precede
5726 * any reference to it. Similarly, the declaration of a general entity
5727 * must precede any reference to it which appears in a default value in an
5728 * attribute-list declaration. Note that if entities are declared in the
5729 * external subset or in external parameter entities, a non-validating
5730 * processor is not obligated to read and process their declarations;
5731 * for such documents, the rule that an entity must be declared is a
5732 * well-formedness constraint only if standalone='yes'.
5733 *
5734 * [ WFC: Parsed Entity ]
5735 * An entity reference must not contain the name of an unparsed entity
5736 *
5737 * Returns the xmlEntityPtr if found, or NULL otherwise.
5738 */
5739xmlEntityPtr
5740xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005741 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005742 xmlEntityPtr ent = NULL;
5743
5744 GROW;
5745
5746 if (RAW == '&') {
5747 NEXT;
5748 name = xmlParseName(ctxt);
5749 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005750 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5751 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005752 } else {
5753 if (RAW == ';') {
5754 NEXT;
5755 /*
5756 * Ask first SAX for entity resolution, otherwise try the
5757 * predefined set.
5758 */
5759 if (ctxt->sax != NULL) {
5760 if (ctxt->sax->getEntity != NULL)
5761 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005762 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005763 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005764 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5765 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005766 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005767 }
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
5769 /*
5770 * [ WFC: Entity Declared ]
5771 * In a document without any DTD, a document with only an
5772 * internal DTD subset which contains no parameter entity
5773 * references, or a document with "standalone='yes'", the
5774 * Name given in the entity reference must match that in an
5775 * entity declaration, except that well-formed documents
5776 * need not declare any of the following entities: amp, lt,
5777 * gt, apos, quot.
5778 * The declaration of a parameter entity must precede any
5779 * reference to it.
5780 * Similarly, the declaration of a general entity must
5781 * precede any reference to it which appears in a default
5782 * value in an attribute-list declaration. Note that if
5783 * entities are declared in the external subset or in
5784 * external parameter entities, a non-validating processor
5785 * is not obligated to read and process their declarations;
5786 * for such documents, the rule that an entity must be
5787 * declared is a well-formedness constraint only if
5788 * standalone='yes'.
5789 */
5790 if (ent == NULL) {
5791 if ((ctxt->standalone == 1) ||
5792 ((ctxt->hasExternalSubset == 0) &&
5793 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005794 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005795 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005796 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005797 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005798 "Entity '%s' not defined\n", name);
5799 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005800 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802
5803 /*
5804 * [ WFC: Parsed Entity ]
5805 * An entity reference must not contain the name of an
5806 * unparsed entity
5807 */
5808 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005809 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005810 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005811 }
5812
5813 /*
5814 * [ WFC: No External Entity References ]
5815 * Attribute values cannot contain direct or indirect
5816 * entity references to external entities.
5817 */
5818 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5819 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005820 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5821 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005822 }
5823 /*
5824 * [ WFC: No < in Attribute Values ]
5825 * The replacement text of any entity referred to directly or
5826 * indirectly in an attribute value (other than "&lt;") must
5827 * not contain a <.
5828 */
5829 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5830 (ent != NULL) &&
5831 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5832 (ent->content != NULL) &&
5833 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005834 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005835 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005836 }
5837
5838 /*
5839 * Internal check, no parameter entities here ...
5840 */
5841 else {
5842 switch (ent->etype) {
5843 case XML_INTERNAL_PARAMETER_ENTITY:
5844 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005845 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5846 "Attempt to reference the parameter entity '%s'\n",
5847 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 break;
5849 default:
5850 break;
5851 }
5852 }
5853
5854 /*
5855 * [ WFC: No Recursion ]
5856 * A parsed entity must not contain a recursive reference
5857 * to itself, either directly or indirectly.
5858 * Done somewhere else
5859 */
5860
5861 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005862 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005863 }
Owen Taylor3473f882001-02-23 17:55:21 +00005864 }
5865 }
5866 return(ent);
5867}
5868
5869/**
5870 * xmlParseStringEntityRef:
5871 * @ctxt: an XML parser context
5872 * @str: a pointer to an index in the string
5873 *
5874 * parse ENTITY references declarations, but this version parses it from
5875 * a string value.
5876 *
5877 * [68] EntityRef ::= '&' Name ';'
5878 *
5879 * [ WFC: Entity Declared ]
5880 * In a document without any DTD, a document with only an internal DTD
5881 * subset which contains no parameter entity references, or a document
5882 * with "standalone='yes'", the Name given in the entity reference
5883 * must match that in an entity declaration, except that well-formed
5884 * documents need not declare any of the following entities: amp, lt,
5885 * gt, apos, quot. The declaration of a parameter entity must precede
5886 * any reference to it. Similarly, the declaration of a general entity
5887 * must precede any reference to it which appears in a default value in an
5888 * attribute-list declaration. Note that if entities are declared in the
5889 * external subset or in external parameter entities, a non-validating
5890 * processor is not obligated to read and process their declarations;
5891 * for such documents, the rule that an entity must be declared is a
5892 * well-formedness constraint only if standalone='yes'.
5893 *
5894 * [ WFC: Parsed Entity ]
5895 * An entity reference must not contain the name of an unparsed entity
5896 *
5897 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5898 * is updated to the current location in the string.
5899 */
5900xmlEntityPtr
5901xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5902 xmlChar *name;
5903 const xmlChar *ptr;
5904 xmlChar cur;
5905 xmlEntityPtr ent = NULL;
5906
5907 if ((str == NULL) || (*str == NULL))
5908 return(NULL);
5909 ptr = *str;
5910 cur = *ptr;
5911 if (cur == '&') {
5912 ptr++;
5913 cur = *ptr;
5914 name = xmlParseStringName(ctxt, &ptr);
5915 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005916 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5917 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005918 } else {
5919 if (*ptr == ';') {
5920 ptr++;
5921 /*
5922 * Ask first SAX for entity resolution, otherwise try the
5923 * predefined set.
5924 */
5925 if (ctxt->sax != NULL) {
5926 if (ctxt->sax->getEntity != NULL)
5927 ent = ctxt->sax->getEntity(ctxt->userData, name);
5928 if (ent == NULL)
5929 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005930 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005931 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005932 }
Owen Taylor3473f882001-02-23 17:55:21 +00005933 }
5934 /*
5935 * [ WFC: Entity Declared ]
5936 * In a document without any DTD, a document with only an
5937 * internal DTD subset which contains no parameter entity
5938 * references, or a document with "standalone='yes'", the
5939 * Name given in the entity reference must match that in an
5940 * entity declaration, except that well-formed documents
5941 * need not declare any of the following entities: amp, lt,
5942 * gt, apos, quot.
5943 * The declaration of a parameter entity must precede any
5944 * reference to it.
5945 * Similarly, the declaration of a general entity must
5946 * precede any reference to it which appears in a default
5947 * value in an attribute-list declaration. Note that if
5948 * entities are declared in the external subset or in
5949 * external parameter entities, a non-validating processor
5950 * is not obligated to read and process their declarations;
5951 * for such documents, the rule that an entity must be
5952 * declared is a well-formedness constraint only if
5953 * standalone='yes'.
5954 */
5955 if (ent == NULL) {
5956 if ((ctxt->standalone == 1) ||
5957 ((ctxt->hasExternalSubset == 0) &&
5958 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005959 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005960 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005961 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005962 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005963 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005964 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005965 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005966 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005967 }
5968
5969 /*
5970 * [ WFC: Parsed Entity ]
5971 * An entity reference must not contain the name of an
5972 * unparsed entity
5973 */
5974 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005975 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005976 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005977 }
5978
5979 /*
5980 * [ WFC: No External Entity References ]
5981 * Attribute values cannot contain direct or indirect
5982 * entity references to external entities.
5983 */
5984 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5985 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005986 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005987 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005988 }
5989 /*
5990 * [ WFC: No < in Attribute Values ]
5991 * The replacement text of any entity referred to directly or
5992 * indirectly in an attribute value (other than "&lt;") must
5993 * not contain a <.
5994 */
5995 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5996 (ent != NULL) &&
5997 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5998 (ent->content != NULL) &&
5999 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006000 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6001 "'<' in entity '%s' is not allowed in attributes values\n",
6002 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006003 }
6004
6005 /*
6006 * Internal check, no parameter entities here ...
6007 */
6008 else {
6009 switch (ent->etype) {
6010 case XML_INTERNAL_PARAMETER_ENTITY:
6011 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006012 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6013 "Attempt to reference the parameter entity '%s'\n",
6014 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006015 break;
6016 default:
6017 break;
6018 }
6019 }
6020
6021 /*
6022 * [ WFC: No Recursion ]
6023 * A parsed entity must not contain a recursive reference
6024 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006025 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006026 */
6027
6028 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006030 }
6031 xmlFree(name);
6032 }
6033 }
6034 *str = ptr;
6035 return(ent);
6036}
6037
6038/**
6039 * xmlParsePEReference:
6040 * @ctxt: an XML parser context
6041 *
6042 * parse PEReference declarations
6043 * The entity content is handled directly by pushing it's content as
6044 * a new input stream.
6045 *
6046 * [69] PEReference ::= '%' Name ';'
6047 *
6048 * [ WFC: No Recursion ]
6049 * A parsed entity must not contain a recursive
6050 * reference to itself, either directly or indirectly.
6051 *
6052 * [ WFC: Entity Declared ]
6053 * In a document without any DTD, a document with only an internal DTD
6054 * subset which contains no parameter entity references, or a document
6055 * with "standalone='yes'", ... ... The declaration of a parameter
6056 * entity must precede any reference to it...
6057 *
6058 * [ VC: Entity Declared ]
6059 * In a document with an external subset or external parameter entities
6060 * with "standalone='no'", ... ... The declaration of a parameter entity
6061 * must precede any reference to it...
6062 *
6063 * [ WFC: In DTD ]
6064 * Parameter-entity references may only appear in the DTD.
6065 * NOTE: misleading but this is handled.
6066 */
6067void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006068xmlParsePEReference(xmlParserCtxtPtr ctxt)
6069{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006070 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006071 xmlEntityPtr entity = NULL;
6072 xmlParserInputPtr input;
6073
6074 if (RAW == '%') {
6075 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006076 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006077 if (name == NULL) {
6078 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079 "xmlParsePEReference: no name\n");
6080 } else {
6081 if (RAW == ';') {
6082 NEXT;
6083 if ((ctxt->sax != NULL) &&
6084 (ctxt->sax->getParameterEntity != NULL))
6085 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6086 name);
6087 if (entity == NULL) {
6088 /*
6089 * [ WFC: Entity Declared ]
6090 * In a document without any DTD, a document with only an
6091 * internal DTD subset which contains no parameter entity
6092 * references, or a document with "standalone='yes'", ...
6093 * ... The declaration of a parameter entity must precede
6094 * any reference to it...
6095 */
6096 if ((ctxt->standalone == 1) ||
6097 ((ctxt->hasExternalSubset == 0) &&
6098 (ctxt->hasPErefs == 0))) {
6099 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6100 "PEReference: %%%s; not found\n",
6101 name);
6102 } else {
6103 /*
6104 * [ VC: Entity Declared ]
6105 * In a document with an external subset or external
6106 * parameter entities with "standalone='no'", ...
6107 * ... The declaration of a parameter entity must
6108 * precede any reference to it...
6109 */
6110 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6111 "PEReference: %%%s; not found\n",
6112 name, NULL);
6113 ctxt->valid = 0;
6114 }
6115 } else {
6116 /*
6117 * Internal checking in case the entity quest barfed
6118 */
6119 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6120 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6121 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6122 "Internal: %%%s; is not a parameter entity\n",
6123 name, NULL);
6124 } else if (ctxt->input->free != deallocblankswrapper) {
6125 input =
6126 xmlNewBlanksWrapperInputStream(ctxt, entity);
6127 xmlPushInput(ctxt, input);
6128 } else {
6129 /*
6130 * TODO !!!
6131 * handle the extra spaces added before and after
6132 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6133 */
6134 input = xmlNewEntityInputStream(ctxt, entity);
6135 xmlPushInput(ctxt, input);
6136 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006137 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006138 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006139 xmlParseTextDecl(ctxt);
6140 if (ctxt->errNo ==
6141 XML_ERR_UNSUPPORTED_ENCODING) {
6142 /*
6143 * The XML REC instructs us to stop parsing
6144 * right here
6145 */
6146 ctxt->instate = XML_PARSER_EOF;
6147 return;
6148 }
6149 }
6150 }
6151 }
6152 ctxt->hasPErefs = 1;
6153 } else {
6154 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6155 }
6156 }
Owen Taylor3473f882001-02-23 17:55:21 +00006157 }
6158}
6159
6160/**
6161 * xmlParseStringPEReference:
6162 * @ctxt: an XML parser context
6163 * @str: a pointer to an index in the string
6164 *
6165 * parse PEReference declarations
6166 *
6167 * [69] PEReference ::= '%' Name ';'
6168 *
6169 * [ WFC: No Recursion ]
6170 * A parsed entity must not contain a recursive
6171 * reference to itself, either directly or indirectly.
6172 *
6173 * [ WFC: Entity Declared ]
6174 * In a document without any DTD, a document with only an internal DTD
6175 * subset which contains no parameter entity references, or a document
6176 * with "standalone='yes'", ... ... The declaration of a parameter
6177 * entity must precede any reference to it...
6178 *
6179 * [ VC: Entity Declared ]
6180 * In a document with an external subset or external parameter entities
6181 * with "standalone='no'", ... ... The declaration of a parameter entity
6182 * must precede any reference to it...
6183 *
6184 * [ WFC: In DTD ]
6185 * Parameter-entity references may only appear in the DTD.
6186 * NOTE: misleading but this is handled.
6187 *
6188 * Returns the string of the entity content.
6189 * str is updated to the current value of the index
6190 */
6191xmlEntityPtr
6192xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6193 const xmlChar *ptr;
6194 xmlChar cur;
6195 xmlChar *name;
6196 xmlEntityPtr entity = NULL;
6197
6198 if ((str == NULL) || (*str == NULL)) return(NULL);
6199 ptr = *str;
6200 cur = *ptr;
6201 if (cur == '%') {
6202 ptr++;
6203 cur = *ptr;
6204 name = xmlParseStringName(ctxt, &ptr);
6205 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006206 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6207 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006208 } else {
6209 cur = *ptr;
6210 if (cur == ';') {
6211 ptr++;
6212 cur = *ptr;
6213 if ((ctxt->sax != NULL) &&
6214 (ctxt->sax->getParameterEntity != NULL))
6215 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6216 name);
6217 if (entity == NULL) {
6218 /*
6219 * [ WFC: Entity Declared ]
6220 * In a document without any DTD, a document with only an
6221 * internal DTD subset which contains no parameter entity
6222 * references, or a document with "standalone='yes'", ...
6223 * ... The declaration of a parameter entity must precede
6224 * any reference to it...
6225 */
6226 if ((ctxt->standalone == 1) ||
6227 ((ctxt->hasExternalSubset == 0) &&
6228 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006229 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006230 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006231 } else {
6232 /*
6233 * [ VC: Entity Declared ]
6234 * In a document with an external subset or external
6235 * parameter entities with "standalone='no'", ...
6236 * ... The declaration of a parameter entity must
6237 * precede any reference to it...
6238 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6240 "PEReference: %%%s; not found\n",
6241 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006242 ctxt->valid = 0;
6243 }
6244 } else {
6245 /*
6246 * Internal checking in case the entity quest barfed
6247 */
6248 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6249 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006250 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6251 "%%%s; is not a parameter entity\n",
6252 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006253 }
6254 }
6255 ctxt->hasPErefs = 1;
6256 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006257 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006258 }
6259 xmlFree(name);
6260 }
6261 }
6262 *str = ptr;
6263 return(entity);
6264}
6265
6266/**
6267 * xmlParseDocTypeDecl:
6268 * @ctxt: an XML parser context
6269 *
6270 * parse a DOCTYPE declaration
6271 *
6272 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6273 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6274 *
6275 * [ VC: Root Element Type ]
6276 * The Name in the document type declaration must match the element
6277 * type of the root element.
6278 */
6279
6280void
6281xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006282 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006283 xmlChar *ExternalID = NULL;
6284 xmlChar *URI = NULL;
6285
6286 /*
6287 * We know that '<!DOCTYPE' has been detected.
6288 */
6289 SKIP(9);
6290
6291 SKIP_BLANKS;
6292
6293 /*
6294 * Parse the DOCTYPE name.
6295 */
6296 name = xmlParseName(ctxt);
6297 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006298 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6299 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006300 }
6301 ctxt->intSubName = name;
6302
6303 SKIP_BLANKS;
6304
6305 /*
6306 * Check for SystemID and ExternalID
6307 */
6308 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6309
6310 if ((URI != NULL) || (ExternalID != NULL)) {
6311 ctxt->hasExternalSubset = 1;
6312 }
6313 ctxt->extSubURI = URI;
6314 ctxt->extSubSystem = ExternalID;
6315
6316 SKIP_BLANKS;
6317
6318 /*
6319 * Create and update the internal subset.
6320 */
6321 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6322 (!ctxt->disableSAX))
6323 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6324
6325 /*
6326 * Is there any internal subset declarations ?
6327 * they are handled separately in xmlParseInternalSubset()
6328 */
6329 if (RAW == '[')
6330 return;
6331
6332 /*
6333 * We should be at the end of the DOCTYPE declaration.
6334 */
6335 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006336 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 NEXT;
6339}
6340
6341/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006342 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006343 * @ctxt: an XML parser context
6344 *
6345 * parse the internal subset declaration
6346 *
6347 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6348 */
6349
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006350static void
Owen Taylor3473f882001-02-23 17:55:21 +00006351xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6352 /*
6353 * Is there any DTD definition ?
6354 */
6355 if (RAW == '[') {
6356 ctxt->instate = XML_PARSER_DTD;
6357 NEXT;
6358 /*
6359 * Parse the succession of Markup declarations and
6360 * PEReferences.
6361 * Subsequence (markupdecl | PEReference | S)*
6362 */
6363 while (RAW != ']') {
6364 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006365 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006366
6367 SKIP_BLANKS;
6368 xmlParseMarkupDecl(ctxt);
6369 xmlParsePEReference(ctxt);
6370
6371 /*
6372 * Pop-up of finished entities.
6373 */
6374 while ((RAW == 0) && (ctxt->inputNr > 1))
6375 xmlPopInput(ctxt);
6376
6377 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006378 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006379 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006380 break;
6381 }
6382 }
6383 if (RAW == ']') {
6384 NEXT;
6385 SKIP_BLANKS;
6386 }
6387 }
6388
6389 /*
6390 * We should be at the end of the DOCTYPE declaration.
6391 */
6392 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006393 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006394 }
6395 NEXT;
6396}
6397
Daniel Veillard81273902003-09-30 00:43:48 +00006398#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006399/**
6400 * xmlParseAttribute:
6401 * @ctxt: an XML parser context
6402 * @value: a xmlChar ** used to store the value of the attribute
6403 *
6404 * parse an attribute
6405 *
6406 * [41] Attribute ::= Name Eq AttValue
6407 *
6408 * [ WFC: No External Entity References ]
6409 * Attribute values cannot contain direct or indirect entity references
6410 * to external entities.
6411 *
6412 * [ WFC: No < in Attribute Values ]
6413 * The replacement text of any entity referred to directly or indirectly in
6414 * an attribute value (other than "&lt;") must not contain a <.
6415 *
6416 * [ VC: Attribute Value Type ]
6417 * The attribute must have been declared; the value must be of the type
6418 * declared for it.
6419 *
6420 * [25] Eq ::= S? '=' S?
6421 *
6422 * With namespace:
6423 *
6424 * [NS 11] Attribute ::= QName Eq AttValue
6425 *
6426 * Also the case QName == xmlns:??? is handled independently as a namespace
6427 * definition.
6428 *
6429 * Returns the attribute name, and the value in *value.
6430 */
6431
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006432const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006433xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006434 const xmlChar *name;
6435 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006436
6437 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006438 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006439 name = xmlParseName(ctxt);
6440 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006441 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006442 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006443 return(NULL);
6444 }
6445
6446 /*
6447 * read the value
6448 */
6449 SKIP_BLANKS;
6450 if (RAW == '=') {
6451 NEXT;
6452 SKIP_BLANKS;
6453 val = xmlParseAttValue(ctxt);
6454 ctxt->instate = XML_PARSER_CONTENT;
6455 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006456 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006457 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006458 return(NULL);
6459 }
6460
6461 /*
6462 * Check that xml:lang conforms to the specification
6463 * No more registered as an error, just generate a warning now
6464 * since this was deprecated in XML second edition
6465 */
6466 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6467 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006468 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6469 "Malformed value for xml:lang : %s\n",
6470 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006471 }
6472 }
6473
6474 /*
6475 * Check that xml:space conforms to the specification
6476 */
6477 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6478 if (xmlStrEqual(val, BAD_CAST "default"))
6479 *(ctxt->space) = 0;
6480 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6481 *(ctxt->space) = 1;
6482 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006483 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006484"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006485 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 }
6487 }
6488
6489 *value = val;
6490 return(name);
6491}
6492
6493/**
6494 * xmlParseStartTag:
6495 * @ctxt: an XML parser context
6496 *
6497 * parse a start of tag either for rule element or
6498 * EmptyElement. In both case we don't parse the tag closing chars.
6499 *
6500 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6501 *
6502 * [ WFC: Unique Att Spec ]
6503 * No attribute name may appear more than once in the same start-tag or
6504 * empty-element tag.
6505 *
6506 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6507 *
6508 * [ WFC: Unique Att Spec ]
6509 * No attribute name may appear more than once in the same start-tag or
6510 * empty-element tag.
6511 *
6512 * With namespace:
6513 *
6514 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6515 *
6516 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6517 *
6518 * Returns the element name parsed
6519 */
6520
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006521const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006522xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006523 const xmlChar *name;
6524 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006526 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006528 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006529 int i;
6530
6531 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006532 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006533
6534 name = xmlParseName(ctxt);
6535 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006536 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006537 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006538 return(NULL);
6539 }
6540
6541 /*
6542 * Now parse the attributes, it ends up with the ending
6543 *
6544 * (S Attribute)* S?
6545 */
6546 SKIP_BLANKS;
6547 GROW;
6548
Daniel Veillard21a0f912001-02-25 19:54:14 +00006549 while ((RAW != '>') &&
6550 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006551 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006552 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006553 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006554
6555 attname = xmlParseAttribute(ctxt, &attvalue);
6556 if ((attname != NULL) && (attvalue != NULL)) {
6557 /*
6558 * [ WFC: Unique Att Spec ]
6559 * No attribute name may appear more than once in the same
6560 * start-tag or empty-element tag.
6561 */
6562 for (i = 0; i < nbatts;i += 2) {
6563 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006564 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 xmlFree(attvalue);
6566 goto failed;
6567 }
6568 }
Owen Taylor3473f882001-02-23 17:55:21 +00006569 /*
6570 * Add the pair to atts
6571 */
6572 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006573 maxatts = 22; /* allow for 10 attrs by default */
6574 atts = (const xmlChar **)
6575 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006576 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006577 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006578 if (attvalue != NULL)
6579 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006580 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006581 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006582 ctxt->atts = atts;
6583 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006584 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006585 const xmlChar **n;
6586
Owen Taylor3473f882001-02-23 17:55:21 +00006587 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006588 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006589 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006590 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006591 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006592 if (attvalue != NULL)
6593 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006594 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006595 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006596 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006597 ctxt->atts = atts;
6598 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006599 }
6600 atts[nbatts++] = attname;
6601 atts[nbatts++] = attvalue;
6602 atts[nbatts] = NULL;
6603 atts[nbatts + 1] = NULL;
6604 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006605 if (attvalue != NULL)
6606 xmlFree(attvalue);
6607 }
6608
6609failed:
6610
Daniel Veillard3772de32002-12-17 10:31:45 +00006611 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006612 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6613 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006614 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006615 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6616 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006617 }
6618 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006619 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6620 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006621 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6622 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006623 break;
6624 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006625 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006626 GROW;
6627 }
6628
6629 /*
6630 * SAX: Start of Element !
6631 */
6632 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006633 (!ctxt->disableSAX)) {
6634 if (nbatts > 0)
6635 ctxt->sax->startElement(ctxt->userData, name, atts);
6636 else
6637 ctxt->sax->startElement(ctxt->userData, name, NULL);
6638 }
Owen Taylor3473f882001-02-23 17:55:21 +00006639
6640 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006641 /* Free only the content strings */
6642 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006643 if (atts[i] != NULL)
6644 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006645 }
6646 return(name);
6647}
6648
6649/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006650 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006651 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006652 * @line: line of the start tag
6653 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006654 *
6655 * parse an end of tag
6656 *
6657 * [42] ETag ::= '</' Name S? '>'
6658 *
6659 * With namespace
6660 *
6661 * [NS 9] ETag ::= '</' QName S? '>'
6662 */
6663
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006664static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006665xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006666 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006667
6668 GROW;
6669 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006670 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006671 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006672 return;
6673 }
6674 SKIP(2);
6675
Daniel Veillard46de64e2002-05-29 08:21:33 +00006676 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006677
6678 /*
6679 * We should definitely be at the ending "S? '>'" part
6680 */
6681 GROW;
6682 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006683 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006684 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006686 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006687
6688 /*
6689 * [ WFC: Element Type Match ]
6690 * The Name in an element's end-tag must match the element type in the
6691 * start-tag.
6692 *
6693 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006694 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006695 if (name == NULL) name = BAD_CAST "unparseable";
6696 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006697 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006698 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006699 }
6700
6701 /*
6702 * SAX: End of Tag
6703 */
6704 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6705 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006706 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006707
Daniel Veillarde57ec792003-09-10 10:50:59 +00006708 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006709 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006710 return;
6711}
6712
6713/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006714 * xmlParseEndTag:
6715 * @ctxt: an XML parser context
6716 *
6717 * parse an end of tag
6718 *
6719 * [42] ETag ::= '</' Name S? '>'
6720 *
6721 * With namespace
6722 *
6723 * [NS 9] ETag ::= '</' QName S? '>'
6724 */
6725
6726void
6727xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006728 xmlParseEndTag1(ctxt, 0);
6729}
Daniel Veillard81273902003-09-30 00:43:48 +00006730#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006731
6732/************************************************************************
6733 * *
6734 * SAX 2 specific operations *
6735 * *
6736 ************************************************************************/
6737
6738static const xmlChar *
6739xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6740 int len = 0, l;
6741 int c;
6742 int count = 0;
6743
6744 /*
6745 * Handler for more complex cases
6746 */
6747 GROW;
6748 c = CUR_CHAR(l);
6749 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006750 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006751 return(NULL);
6752 }
6753
6754 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006755 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006756 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006757 (IS_COMBINING(c)) ||
6758 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006759 if (count++ > 100) {
6760 count = 0;
6761 GROW;
6762 }
6763 len += l;
6764 NEXTL(l);
6765 c = CUR_CHAR(l);
6766 }
6767 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6768}
6769
6770/*
6771 * xmlGetNamespace:
6772 * @ctxt: an XML parser context
6773 * @prefix: the prefix to lookup
6774 *
6775 * Lookup the namespace name for the @prefix (which ca be NULL)
6776 * The prefix must come from the @ctxt->dict dictionnary
6777 *
6778 * Returns the namespace name or NULL if not bound
6779 */
6780static const xmlChar *
6781xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6782 int i;
6783
Daniel Veillarde57ec792003-09-10 10:50:59 +00006784 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006785 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006786 if (ctxt->nsTab[i] == prefix) {
6787 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6788 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006789 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006790 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006791 return(NULL);
6792}
6793
6794/**
6795 * xmlParseNCName:
6796 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006797 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006798 *
6799 * parse an XML name.
6800 *
6801 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6802 * CombiningChar | Extender
6803 *
6804 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6805 *
6806 * Returns the Name parsed or NULL
6807 */
6808
6809static const xmlChar *
6810xmlParseNCName(xmlParserCtxtPtr ctxt) {
6811 const xmlChar *in;
6812 const xmlChar *ret;
6813 int count = 0;
6814
6815 /*
6816 * Accelerator for simple ASCII names
6817 */
6818 in = ctxt->input->cur;
6819 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6820 ((*in >= 0x41) && (*in <= 0x5A)) ||
6821 (*in == '_')) {
6822 in++;
6823 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6824 ((*in >= 0x41) && (*in <= 0x5A)) ||
6825 ((*in >= 0x30) && (*in <= 0x39)) ||
6826 (*in == '_') || (*in == '-') ||
6827 (*in == '.'))
6828 in++;
6829 if ((*in > 0) && (*in < 0x80)) {
6830 count = in - ctxt->input->cur;
6831 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6832 ctxt->input->cur = in;
6833 ctxt->nbChars += count;
6834 ctxt->input->col += count;
6835 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006836 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006837 }
6838 return(ret);
6839 }
6840 }
6841 return(xmlParseNCNameComplex(ctxt));
6842}
6843
6844/**
6845 * xmlParseQName:
6846 * @ctxt: an XML parser context
6847 * @prefix: pointer to store the prefix part
6848 *
6849 * parse an XML Namespace QName
6850 *
6851 * [6] QName ::= (Prefix ':')? LocalPart
6852 * [7] Prefix ::= NCName
6853 * [8] LocalPart ::= NCName
6854 *
6855 * Returns the Name parsed or NULL
6856 */
6857
6858static const xmlChar *
6859xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6860 const xmlChar *l, *p;
6861
6862 GROW;
6863
6864 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006865 if (l == NULL) {
6866 if (CUR == ':') {
6867 l = xmlParseName(ctxt);
6868 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006869 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6870 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006871 *prefix = NULL;
6872 return(l);
6873 }
6874 }
6875 return(NULL);
6876 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006877 if (CUR == ':') {
6878 NEXT;
6879 p = l;
6880 l = xmlParseNCName(ctxt);
6881 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006882 xmlChar *tmp;
6883
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006884 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6885 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006886 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6887 p = xmlDictLookup(ctxt->dict, tmp, -1);
6888 if (tmp != NULL) xmlFree(tmp);
6889 *prefix = NULL;
6890 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006891 }
6892 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006893 xmlChar *tmp;
6894
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006895 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6896 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006897 NEXT;
6898 tmp = (xmlChar *) xmlParseName(ctxt);
6899 if (tmp != NULL) {
6900 tmp = xmlBuildQName(tmp, l, NULL, 0);
6901 l = xmlDictLookup(ctxt->dict, tmp, -1);
6902 if (tmp != NULL) xmlFree(tmp);
6903 *prefix = p;
6904 return(l);
6905 }
6906 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6907 l = xmlDictLookup(ctxt->dict, tmp, -1);
6908 if (tmp != NULL) xmlFree(tmp);
6909 *prefix = p;
6910 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006911 }
6912 *prefix = p;
6913 } else
6914 *prefix = NULL;
6915 return(l);
6916}
6917
6918/**
6919 * xmlParseQNameAndCompare:
6920 * @ctxt: an XML parser context
6921 * @name: the localname
6922 * @prefix: the prefix, if any.
6923 *
6924 * parse an XML name and compares for match
6925 * (specialized for endtag parsing)
6926 *
6927 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6928 * and the name for mismatch
6929 */
6930
6931static const xmlChar *
6932xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6933 xmlChar const *prefix) {
6934 const xmlChar *cmp = name;
6935 const xmlChar *in;
6936 const xmlChar *ret;
6937 const xmlChar *prefix2;
6938
6939 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6940
6941 GROW;
6942 in = ctxt->input->cur;
6943
6944 cmp = prefix;
6945 while (*in != 0 && *in == *cmp) {
6946 ++in;
6947 ++cmp;
6948 }
6949 if ((*cmp == 0) && (*in == ':')) {
6950 in++;
6951 cmp = name;
6952 while (*in != 0 && *in == *cmp) {
6953 ++in;
6954 ++cmp;
6955 }
William M. Brack76e95df2003-10-18 16:20:14 +00006956 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006957 /* success */
6958 ctxt->input->cur = in;
6959 return((const xmlChar*) 1);
6960 }
6961 }
6962 /*
6963 * all strings coms from the dictionary, equality can be done directly
6964 */
6965 ret = xmlParseQName (ctxt, &prefix2);
6966 if ((ret == name) && (prefix == prefix2))
6967 return((const xmlChar*) 1);
6968 return ret;
6969}
6970
6971/**
6972 * xmlParseAttValueInternal:
6973 * @ctxt: an XML parser context
6974 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006975 * @alloc: whether the attribute was reallocated as a new string
6976 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006977 *
6978 * parse a value for an attribute.
6979 * NOTE: if no normalization is needed, the routine will return pointers
6980 * directly from the data buffer.
6981 *
6982 * 3.3.3 Attribute-Value Normalization:
6983 * Before the value of an attribute is passed to the application or
6984 * checked for validity, the XML processor must normalize it as follows:
6985 * - a character reference is processed by appending the referenced
6986 * character to the attribute value
6987 * - an entity reference is processed by recursively processing the
6988 * replacement text of the entity
6989 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6990 * appending #x20 to the normalized value, except that only a single
6991 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6992 * parsed entity or the literal entity value of an internal parsed entity
6993 * - other characters are processed by appending them to the normalized value
6994 * If the declared value is not CDATA, then the XML processor must further
6995 * process the normalized attribute value by discarding any leading and
6996 * trailing space (#x20) characters, and by replacing sequences of space
6997 * (#x20) characters by a single space (#x20) character.
6998 * All attributes for which no declaration has been read should be treated
6999 * by a non-validating parser as if declared CDATA.
7000 *
7001 * Returns the AttValue parsed or NULL. The value has to be freed by the
7002 * caller if it was copied, this can be detected by val[*len] == 0.
7003 */
7004
7005static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007006xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7007 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007008{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007009 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007010 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007011 xmlChar *ret = NULL;
7012
7013 GROW;
7014 in = (xmlChar *) CUR_PTR;
7015 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007016 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007017 return (NULL);
7018 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007019 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007020
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007021 /*
7022 * try to handle in this routine the most common case where no
7023 * allocation of a new string is required and where content is
7024 * pure ASCII.
7025 */
7026 limit = *in++;
7027 end = ctxt->input->end;
7028 start = in;
7029 if (in >= end) {
7030 const xmlChar *oldbase = ctxt->input->base;
7031 GROW;
7032 if (oldbase != ctxt->input->base) {
7033 long delta = ctxt->input->base - oldbase;
7034 start = start + delta;
7035 in = in + delta;
7036 }
7037 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007038 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007039 if (normalize) {
7040 /*
7041 * Skip any leading spaces
7042 */
7043 while ((in < end) && (*in != limit) &&
7044 ((*in == 0x20) || (*in == 0x9) ||
7045 (*in == 0xA) || (*in == 0xD))) {
7046 in++;
7047 start = in;
7048 if (in >= end) {
7049 const xmlChar *oldbase = ctxt->input->base;
7050 GROW;
7051 if (oldbase != ctxt->input->base) {
7052 long delta = ctxt->input->base - oldbase;
7053 start = start + delta;
7054 in = in + delta;
7055 }
7056 end = ctxt->input->end;
7057 }
7058 }
7059 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7060 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7061 if ((*in++ == 0x20) && (*in == 0x20)) break;
7062 if (in >= end) {
7063 const xmlChar *oldbase = ctxt->input->base;
7064 GROW;
7065 if (oldbase != ctxt->input->base) {
7066 long delta = ctxt->input->base - oldbase;
7067 start = start + delta;
7068 in = in + delta;
7069 }
7070 end = ctxt->input->end;
7071 }
7072 }
7073 last = in;
7074 /*
7075 * skip the trailing blanks
7076 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007077 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007078 while ((in < end) && (*in != limit) &&
7079 ((*in == 0x20) || (*in == 0x9) ||
7080 (*in == 0xA) || (*in == 0xD))) {
7081 in++;
7082 if (in >= end) {
7083 const xmlChar *oldbase = ctxt->input->base;
7084 GROW;
7085 if (oldbase != ctxt->input->base) {
7086 long delta = ctxt->input->base - oldbase;
7087 start = start + delta;
7088 in = in + delta;
7089 last = last + delta;
7090 }
7091 end = ctxt->input->end;
7092 }
7093 }
7094 if (*in != limit) goto need_complex;
7095 } else {
7096 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7097 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7098 in++;
7099 if (in >= end) {
7100 const xmlChar *oldbase = ctxt->input->base;
7101 GROW;
7102 if (oldbase != ctxt->input->base) {
7103 long delta = ctxt->input->base - oldbase;
7104 start = start + delta;
7105 in = in + delta;
7106 }
7107 end = ctxt->input->end;
7108 }
7109 }
7110 last = in;
7111 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007112 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007113 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007114 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007115 *len = last - start;
7116 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007118 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007119 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 }
7121 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007122 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007123 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007124need_complex:
7125 if (alloc) *alloc = 1;
7126 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007127}
7128
7129/**
7130 * xmlParseAttribute2:
7131 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007132 * @pref: the element prefix
7133 * @elem: the element name
7134 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007135 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007136 * @len: an int * to save the length of the attribute
7137 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007138 *
7139 * parse an attribute in the new SAX2 framework.
7140 *
7141 * Returns the attribute name, and the value in *value, .
7142 */
7143
7144static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007145xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7146 const xmlChar *pref, const xmlChar *elem,
7147 const xmlChar **prefix, xmlChar **value,
7148 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 const xmlChar *name;
7150 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007151 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007152
7153 *value = NULL;
7154 GROW;
7155 name = xmlParseQName(ctxt, prefix);
7156 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007157 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7158 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007159 return(NULL);
7160 }
7161
7162 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007163 * get the type if needed
7164 */
7165 if (ctxt->attsSpecial != NULL) {
7166 int type;
7167
7168 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7169 pref, elem, *prefix, name);
7170 if (type != 0) normalize = 1;
7171 }
7172
7173 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007174 * read the value
7175 */
7176 SKIP_BLANKS;
7177 if (RAW == '=') {
7178 NEXT;
7179 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007180 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007181 ctxt->instate = XML_PARSER_CONTENT;
7182 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007183 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007185 return(NULL);
7186 }
7187
7188 /*
7189 * Check that xml:lang conforms to the specification
7190 * No more registered as an error, just generate a warning now
7191 * since this was deprecated in XML second edition
7192 */
7193 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7194 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007195 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7196 "Malformed value for xml:lang : %s\n",
7197 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198 }
7199 }
7200
7201 /*
7202 * Check that xml:space conforms to the specification
7203 */
7204 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7205 if (xmlStrEqual(val, BAD_CAST "default"))
7206 *(ctxt->space) = 0;
7207 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7208 *(ctxt->space) = 1;
7209 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007210 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007211"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7212 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007213 }
7214 }
7215
7216 *value = val;
7217 return(name);
7218}
7219
7220/**
7221 * xmlParseStartTag2:
7222 * @ctxt: an XML parser context
7223 *
7224 * parse a start of tag either for rule element or
7225 * EmptyElement. In both case we don't parse the tag closing chars.
7226 * This routine is called when running SAX2 parsing
7227 *
7228 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7229 *
7230 * [ WFC: Unique Att Spec ]
7231 * No attribute name may appear more than once in the same start-tag or
7232 * empty-element tag.
7233 *
7234 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7235 *
7236 * [ WFC: Unique Att Spec ]
7237 * No attribute name may appear more than once in the same start-tag or
7238 * empty-element tag.
7239 *
7240 * With namespace:
7241 *
7242 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7243 *
7244 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7245 *
7246 * Returns the element name parsed
7247 */
7248
7249static const xmlChar *
7250xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007251 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 const xmlChar *localname;
7253 const xmlChar *prefix;
7254 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007255 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007256 const xmlChar *nsname;
7257 xmlChar *attvalue;
7258 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007259 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007260 int nratts, nbatts, nbdef;
7261 int i, j, nbNs, attval;
7262 const xmlChar *base;
7263 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007264
7265 if (RAW != '<') return(NULL);
7266 NEXT1;
7267
7268 /*
7269 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7270 * point since the attribute values may be stored as pointers to
7271 * the buffer and calling SHRINK would destroy them !
7272 * The Shrinking is only possible once the full set of attribute
7273 * callbacks have been done.
7274 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007275reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007276 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007277 base = ctxt->input->base;
7278 cur = ctxt->input->cur - ctxt->input->base;
7279 nbatts = 0;
7280 nratts = 0;
7281 nbdef = 0;
7282 nbNs = 0;
7283 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007284
7285 localname = xmlParseQName(ctxt, &prefix);
7286 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007287 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7288 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007289 return(NULL);
7290 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007291 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007292
7293 /*
7294 * Now parse the attributes, it ends up with the ending
7295 *
7296 * (S Attribute)* S?
7297 */
7298 SKIP_BLANKS;
7299 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007300 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007301
7302 while ((RAW != '>') &&
7303 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007304 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007305 const xmlChar *q = CUR_PTR;
7306 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007309 attname = xmlParseAttribute2(ctxt, prefix, localname,
7310 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007311 if ((attname != NULL) && (attvalue != NULL)) {
7312 if (len < 0) len = xmlStrlen(attvalue);
7313 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007314 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7315 xmlURIPtr uri;
7316
7317 if (*URL != 0) {
7318 uri = xmlParseURI((const char *) URL);
7319 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007320 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7321 "xmlns: %s not a valid URI\n",
7322 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007323 } else {
7324 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007325 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7326 "xmlns: URI %s is not absolute\n",
7327 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007328 }
7329 xmlFreeURI(uri);
7330 }
7331 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007332 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007333 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007335 for (j = 1;j <= nbNs;j++)
7336 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7337 break;
7338 if (j <= nbNs)
7339 xmlErrAttributeDup(ctxt, NULL, attname);
7340 else
7341 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007342 if (alloc != 0) xmlFree(attvalue);
7343 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007344 continue;
7345 }
7346 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007347 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7348 xmlURIPtr uri;
7349
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007350 if (attname == ctxt->str_xml) {
7351 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007352 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7353 "xml namespace prefix mapped to wrong URI\n",
7354 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007355 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007356 /*
7357 * Do not keep a namespace definition node
7358 */
7359 if (alloc != 0) xmlFree(attvalue);
7360 SKIP_BLANKS;
7361 continue;
7362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007363 uri = xmlParseURI((const char *) URL);
7364 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007365 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7366 "xmlns:%s: '%s' is not a valid URI\n",
7367 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007368 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007369 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007370 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7371 "xmlns:%s: URI %s is not absolute\n",
7372 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007373 }
7374 xmlFreeURI(uri);
7375 }
7376
Daniel Veillard0fb18932003-09-07 09:14:37 +00007377 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007378 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007379 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007380 for (j = 1;j <= nbNs;j++)
7381 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7382 break;
7383 if (j <= nbNs)
7384 xmlErrAttributeDup(ctxt, aprefix, attname);
7385 else
7386 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007387 if (alloc != 0) xmlFree(attvalue);
7388 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007389 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390 continue;
7391 }
7392
7393 /*
7394 * Add the pair to atts
7395 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007396 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7397 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 if (attvalue[len] == 0)
7399 xmlFree(attvalue);
7400 goto failed;
7401 }
7402 maxatts = ctxt->maxatts;
7403 atts = ctxt->atts;
7404 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007405 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007406 atts[nbatts++] = attname;
7407 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007408 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007409 atts[nbatts++] = attvalue;
7410 attvalue += len;
7411 atts[nbatts++] = attvalue;
7412 /*
7413 * tag if some deallocation is needed
7414 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007415 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 } else {
7417 if ((attvalue != NULL) && (attvalue[len] == 0))
7418 xmlFree(attvalue);
7419 }
7420
7421failed:
7422
7423 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007424 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007425 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7426 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007427 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007428 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7429 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007430 }
7431 SKIP_BLANKS;
7432 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7433 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007434 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007436 break;
7437 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007438 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007439 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 }
7441
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007443 * The attributes defaulting
7444 */
7445 if (ctxt->attsDefault != NULL) {
7446 xmlDefAttrsPtr defaults;
7447
7448 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7449 if (defaults != NULL) {
7450 for (i = 0;i < defaults->nbAttrs;i++) {
7451 attname = defaults->values[4 * i];
7452 aprefix = defaults->values[4 * i + 1];
7453
7454 /*
7455 * special work for namespaces defaulted defs
7456 */
7457 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7458 /*
7459 * check that it's not a defined namespace
7460 */
7461 for (j = 1;j <= nbNs;j++)
7462 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7463 break;
7464 if (j <= nbNs) continue;
7465
7466 nsname = xmlGetNamespace(ctxt, NULL);
7467 if (nsname != defaults->values[4 * i + 2]) {
7468 if (nsPush(ctxt, NULL,
7469 defaults->values[4 * i + 2]) > 0)
7470 nbNs++;
7471 }
7472 } else if (aprefix == ctxt->str_xmlns) {
7473 /*
7474 * check that it's not a defined namespace
7475 */
7476 for (j = 1;j <= nbNs;j++)
7477 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7478 break;
7479 if (j <= nbNs) continue;
7480
7481 nsname = xmlGetNamespace(ctxt, attname);
7482 if (nsname != defaults->values[2]) {
7483 if (nsPush(ctxt, attname,
7484 defaults->values[4 * i + 2]) > 0)
7485 nbNs++;
7486 }
7487 } else {
7488 /*
7489 * check that it's not a defined attribute
7490 */
7491 for (j = 0;j < nbatts;j+=5) {
7492 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7493 break;
7494 }
7495 if (j < nbatts) continue;
7496
7497 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7498 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007499 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007500 }
7501 maxatts = ctxt->maxatts;
7502 atts = ctxt->atts;
7503 }
7504 atts[nbatts++] = attname;
7505 atts[nbatts++] = aprefix;
7506 if (aprefix == NULL)
7507 atts[nbatts++] = NULL;
7508 else
7509 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7510 atts[nbatts++] = defaults->values[4 * i + 2];
7511 atts[nbatts++] = defaults->values[4 * i + 3];
7512 nbdef++;
7513 }
7514 }
7515 }
7516 }
7517
Daniel Veillarde70c8772003-11-25 07:21:18 +00007518 /*
7519 * The attributes checkings
7520 */
7521 for (i = 0; i < nbatts;i += 5) {
7522 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7523 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7524 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7525 "Namespace prefix %s for %s on %s is not defined\n",
7526 atts[i + 1], atts[i], localname);
7527 }
7528 atts[i + 2] = nsname;
7529 /*
7530 * [ WFC: Unique Att Spec ]
7531 * No attribute name may appear more than once in the same
7532 * start-tag or empty-element tag.
7533 * As extended by the Namespace in XML REC.
7534 */
7535 for (j = 0; j < i;j += 5) {
7536 if (atts[i] == atts[j]) {
7537 if (atts[i+1] == atts[j+1]) {
7538 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7539 break;
7540 }
7541 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7542 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7543 "Namespaced Attribute %s in '%s' redefined\n",
7544 atts[i], nsname, NULL);
7545 break;
7546 }
7547 }
7548 }
7549 }
7550
Daniel Veillarde57ec792003-09-10 10:50:59 +00007551 nsname = xmlGetNamespace(ctxt, prefix);
7552 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007553 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7554 "Namespace prefix %s on %s is not defined\n",
7555 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007556 }
7557 *pref = prefix;
7558 *URI = nsname;
7559
7560 /*
7561 * SAX: Start of Element !
7562 */
7563 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7564 (!ctxt->disableSAX)) {
7565 if (nbNs > 0)
7566 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7567 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7568 nbatts / 5, nbdef, atts);
7569 else
7570 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7571 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7572 }
7573
7574 /*
7575 * Free up attribute allocated strings if needed
7576 */
7577 if (attval != 0) {
7578 for (i = 3,j = 0; j < nratts;i += 5,j++)
7579 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7580 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581 }
7582
7583 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007584
7585base_changed:
7586 /*
7587 * the attribute strings are valid iif the base didn't changed
7588 */
7589 if (attval != 0) {
7590 for (i = 3,j = 0; j < nratts;i += 5,j++)
7591 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7592 xmlFree((xmlChar *) atts[i]);
7593 }
7594 ctxt->input->cur = ctxt->input->base + cur;
7595 if (ctxt->wellFormed == 1) {
7596 goto reparse;
7597 }
7598 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007599}
7600
7601/**
7602 * xmlParseEndTag2:
7603 * @ctxt: an XML parser context
7604 * @line: line of the start tag
7605 * @nsNr: number of namespaces on the start tag
7606 *
7607 * parse an end of tag
7608 *
7609 * [42] ETag ::= '</' Name S? '>'
7610 *
7611 * With namespace
7612 *
7613 * [NS 9] ETag ::= '</' QName S? '>'
7614 */
7615
7616static void
7617xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007618 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619 const xmlChar *name;
7620
7621 GROW;
7622 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007624 return;
7625 }
7626 SKIP(2);
7627
Daniel Veillard453e71b2004-04-20 17:44:46 +00007628 if ((tlen > 0) && (strncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007629 if (ctxt->input->cur[tlen] == '>') {
7630 ctxt->input->cur += tlen + 1;
7631 goto done;
7632 }
7633 ctxt->input->cur += tlen;
7634 name = (xmlChar*)1;
7635 } else {
7636 if (prefix == NULL)
7637 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7638 else
7639 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7640 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641
7642 /*
7643 * We should definitely be at the ending "S? '>'" part
7644 */
7645 GROW;
7646 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007647 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007648 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007649 } else
7650 NEXT1;
7651
7652 /*
7653 * [ WFC: Element Type Match ]
7654 * The Name in an element's end-tag must match the element type in the
7655 * start-tag.
7656 *
7657 */
7658 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007659 if (name == NULL) name = BAD_CAST "unparseable";
7660 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007662 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 }
7664
7665 /*
7666 * SAX: End of Tag
7667 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007668done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007669 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7670 (!ctxt->disableSAX))
7671 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7672
Daniel Veillard0fb18932003-09-07 09:14:37 +00007673 spacePop(ctxt);
7674 if (nsNr != 0)
7675 nsPop(ctxt, nsNr);
7676 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007677}
7678
7679/**
Owen Taylor3473f882001-02-23 17:55:21 +00007680 * xmlParseCDSect:
7681 * @ctxt: an XML parser context
7682 *
7683 * Parse escaped pure raw content.
7684 *
7685 * [18] CDSect ::= CDStart CData CDEnd
7686 *
7687 * [19] CDStart ::= '<![CDATA['
7688 *
7689 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7690 *
7691 * [21] CDEnd ::= ']]>'
7692 */
7693void
7694xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7695 xmlChar *buf = NULL;
7696 int len = 0;
7697 int size = XML_PARSER_BUFFER_SIZE;
7698 int r, rl;
7699 int s, sl;
7700 int cur, l;
7701 int count = 0;
7702
Daniel Veillard8f597c32003-10-06 08:19:27 +00007703 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007704 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007705 SKIP(9);
7706 } else
7707 return;
7708
7709 ctxt->instate = XML_PARSER_CDATA_SECTION;
7710 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007711 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007712 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007713 ctxt->instate = XML_PARSER_CONTENT;
7714 return;
7715 }
7716 NEXTL(rl);
7717 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007718 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007719 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007720 ctxt->instate = XML_PARSER_CONTENT;
7721 return;
7722 }
7723 NEXTL(sl);
7724 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007725 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007726 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007727 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007728 return;
7729 }
William M. Brack871611b2003-10-18 04:53:14 +00007730 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007731 ((r != ']') || (s != ']') || (cur != '>'))) {
7732 if (len + 5 >= size) {
7733 size *= 2;
7734 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7735 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007737 return;
7738 }
7739 }
7740 COPY_BUF(rl,buf,len,r);
7741 r = s;
7742 rl = sl;
7743 s = cur;
7744 sl = l;
7745 count++;
7746 if (count > 50) {
7747 GROW;
7748 count = 0;
7749 }
7750 NEXTL(l);
7751 cur = CUR_CHAR(l);
7752 }
7753 buf[len] = 0;
7754 ctxt->instate = XML_PARSER_CONTENT;
7755 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007756 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007757 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007758 xmlFree(buf);
7759 return;
7760 }
7761 NEXTL(l);
7762
7763 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007764 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007765 */
7766 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7767 if (ctxt->sax->cdataBlock != NULL)
7768 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007769 else if (ctxt->sax->characters != NULL)
7770 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007771 }
7772 xmlFree(buf);
7773}
7774
7775/**
7776 * xmlParseContent:
7777 * @ctxt: an XML parser context
7778 *
7779 * Parse a content:
7780 *
7781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7782 */
7783
7784void
7785xmlParseContent(xmlParserCtxtPtr ctxt) {
7786 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007787 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007788 ((RAW != '<') || (NXT(1) != '/'))) {
7789 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007790 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007791 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007792
7793 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007794 * First case : a Processing Instruction.
7795 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007796 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007797 xmlParsePI(ctxt);
7798 }
7799
7800 /*
7801 * Second case : a CDSection
7802 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007803 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007804 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007805 xmlParseCDSect(ctxt);
7806 }
7807
7808 /*
7809 * Third case : a comment
7810 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007811 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007812 (NXT(2) == '-') && (NXT(3) == '-')) {
7813 xmlParseComment(ctxt);
7814 ctxt->instate = XML_PARSER_CONTENT;
7815 }
7816
7817 /*
7818 * Fourth case : a sub-element.
7819 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007820 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007821 xmlParseElement(ctxt);
7822 }
7823
7824 /*
7825 * Fifth case : a reference. If if has not been resolved,
7826 * parsing returns it's Name, create the node
7827 */
7828
Daniel Veillard21a0f912001-02-25 19:54:14 +00007829 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007830 xmlParseReference(ctxt);
7831 }
7832
7833 /*
7834 * Last case, text. Note that References are handled directly.
7835 */
7836 else {
7837 xmlParseCharData(ctxt, 0);
7838 }
7839
7840 GROW;
7841 /*
7842 * Pop-up of finished entities.
7843 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007844 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007845 xmlPopInput(ctxt);
7846 SHRINK;
7847
Daniel Veillardfdc91562002-07-01 21:52:03 +00007848 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007849 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7850 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007851 ctxt->instate = XML_PARSER_EOF;
7852 break;
7853 }
7854 }
7855}
7856
7857/**
7858 * xmlParseElement:
7859 * @ctxt: an XML parser context
7860 *
7861 * parse an XML element, this is highly recursive
7862 *
7863 * [39] element ::= EmptyElemTag | STag content ETag
7864 *
7865 * [ WFC: Element Type Match ]
7866 * The Name in an element's end-tag must match the element type in the
7867 * start-tag.
7868 *
Owen Taylor3473f882001-02-23 17:55:21 +00007869 */
7870
7871void
7872xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007873 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007874 const xmlChar *prefix;
7875 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007876 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007877 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007878 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007879 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007880
7881 /* Capture start position */
7882 if (ctxt->record_info) {
7883 node_info.begin_pos = ctxt->input->consumed +
7884 (CUR_PTR - ctxt->input->base);
7885 node_info.begin_line = ctxt->input->line;
7886 }
7887
7888 if (ctxt->spaceNr == 0)
7889 spacePush(ctxt, -1);
7890 else
7891 spacePush(ctxt, *ctxt->space);
7892
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007893 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007894#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007895 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007896#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007897 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007898#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007899 else
7900 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007901#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007902 if (name == NULL) {
7903 spacePop(ctxt);
7904 return;
7905 }
7906 namePush(ctxt, name);
7907 ret = ctxt->node;
7908
Daniel Veillard4432df22003-09-28 18:58:27 +00007909#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007910 /*
7911 * [ VC: Root Element Type ]
7912 * The Name in the document type declaration must match the element
7913 * type of the root element.
7914 */
7915 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7916 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7917 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007918#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007919
7920 /*
7921 * Check for an Empty Element.
7922 */
7923 if ((RAW == '/') && (NXT(1) == '>')) {
7924 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007925 if (ctxt->sax2) {
7926 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7927 (!ctxt->disableSAX))
7928 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007929#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007930 } else {
7931 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7932 (!ctxt->disableSAX))
7933 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007934#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007935 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 namePop(ctxt);
7937 spacePop(ctxt);
7938 if (nsNr != ctxt->nsNr)
7939 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007940 if ( ret != NULL && ctxt->record_info ) {
7941 node_info.end_pos = ctxt->input->consumed +
7942 (CUR_PTR - ctxt->input->base);
7943 node_info.end_line = ctxt->input->line;
7944 node_info.node = ret;
7945 xmlParserAddNodeInfo(ctxt, &node_info);
7946 }
7947 return;
7948 }
7949 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007950 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007951 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007952 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7953 "Couldn't find end of Start Tag %s line %d\n",
7954 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007955
7956 /*
7957 * end of parsing of this node.
7958 */
7959 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007960 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007961 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 if (nsNr != ctxt->nsNr)
7963 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007964
7965 /*
7966 * Capture end position and add node
7967 */
7968 if ( ret != NULL && ctxt->record_info ) {
7969 node_info.end_pos = ctxt->input->consumed +
7970 (CUR_PTR - ctxt->input->base);
7971 node_info.end_line = ctxt->input->line;
7972 node_info.node = ret;
7973 xmlParserAddNodeInfo(ctxt, &node_info);
7974 }
7975 return;
7976 }
7977
7978 /*
7979 * Parse the content of the element:
7980 */
7981 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007982 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007983 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007984 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007985 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007986
7987 /*
7988 * end of parsing of this node.
7989 */
7990 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007991 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007992 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007993 if (nsNr != ctxt->nsNr)
7994 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007995 return;
7996 }
7997
7998 /*
7999 * parse the end of tag: '</' should be here.
8000 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008001 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008002 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008003 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008004 }
8005#ifdef LIBXML_SAX1_ENABLED
8006 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008007 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008008#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008009
8010 /*
8011 * Capture end position and add node
8012 */
8013 if ( ret != NULL && ctxt->record_info ) {
8014 node_info.end_pos = ctxt->input->consumed +
8015 (CUR_PTR - ctxt->input->base);
8016 node_info.end_line = ctxt->input->line;
8017 node_info.node = ret;
8018 xmlParserAddNodeInfo(ctxt, &node_info);
8019 }
8020}
8021
8022/**
8023 * xmlParseVersionNum:
8024 * @ctxt: an XML parser context
8025 *
8026 * parse the XML version value.
8027 *
8028 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8029 *
8030 * Returns the string giving the XML version number, or NULL
8031 */
8032xmlChar *
8033xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8034 xmlChar *buf = NULL;
8035 int len = 0;
8036 int size = 10;
8037 xmlChar cur;
8038
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008039 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008040 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008041 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008042 return(NULL);
8043 }
8044 cur = CUR;
8045 while (((cur >= 'a') && (cur <= 'z')) ||
8046 ((cur >= 'A') && (cur <= 'Z')) ||
8047 ((cur >= '0') && (cur <= '9')) ||
8048 (cur == '_') || (cur == '.') ||
8049 (cur == ':') || (cur == '-')) {
8050 if (len + 1 >= size) {
8051 size *= 2;
8052 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8053 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008054 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008055 return(NULL);
8056 }
8057 }
8058 buf[len++] = cur;
8059 NEXT;
8060 cur=CUR;
8061 }
8062 buf[len] = 0;
8063 return(buf);
8064}
8065
8066/**
8067 * xmlParseVersionInfo:
8068 * @ctxt: an XML parser context
8069 *
8070 * parse the XML version.
8071 *
8072 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8073 *
8074 * [25] Eq ::= S? '=' S?
8075 *
8076 * Returns the version string, e.g. "1.0"
8077 */
8078
8079xmlChar *
8080xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8081 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008082
Daniel Veillarda07050d2003-10-19 14:46:32 +00008083 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008084 SKIP(7);
8085 SKIP_BLANKS;
8086 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008087 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008088 return(NULL);
8089 }
8090 NEXT;
8091 SKIP_BLANKS;
8092 if (RAW == '"') {
8093 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008094 version = xmlParseVersionNum(ctxt);
8095 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008096 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008097 } else
8098 NEXT;
8099 } else if (RAW == '\''){
8100 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008101 version = xmlParseVersionNum(ctxt);
8102 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008103 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008104 } else
8105 NEXT;
8106 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008107 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008108 }
8109 }
8110 return(version);
8111}
8112
8113/**
8114 * xmlParseEncName:
8115 * @ctxt: an XML parser context
8116 *
8117 * parse the XML encoding name
8118 *
8119 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8120 *
8121 * Returns the encoding name value or NULL
8122 */
8123xmlChar *
8124xmlParseEncName(xmlParserCtxtPtr ctxt) {
8125 xmlChar *buf = NULL;
8126 int len = 0;
8127 int size = 10;
8128 xmlChar cur;
8129
8130 cur = CUR;
8131 if (((cur >= 'a') && (cur <= 'z')) ||
8132 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008133 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008134 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008135 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008136 return(NULL);
8137 }
8138
8139 buf[len++] = cur;
8140 NEXT;
8141 cur = CUR;
8142 while (((cur >= 'a') && (cur <= 'z')) ||
8143 ((cur >= 'A') && (cur <= 'Z')) ||
8144 ((cur >= '0') && (cur <= '9')) ||
8145 (cur == '.') || (cur == '_') ||
8146 (cur == '-')) {
8147 if (len + 1 >= size) {
8148 size *= 2;
8149 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8150 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008151 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008152 return(NULL);
8153 }
8154 }
8155 buf[len++] = cur;
8156 NEXT;
8157 cur = CUR;
8158 if (cur == 0) {
8159 SHRINK;
8160 GROW;
8161 cur = CUR;
8162 }
8163 }
8164 buf[len] = 0;
8165 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008166 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008167 }
8168 return(buf);
8169}
8170
8171/**
8172 * xmlParseEncodingDecl:
8173 * @ctxt: an XML parser context
8174 *
8175 * parse the XML encoding declaration
8176 *
8177 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8178 *
8179 * this setups the conversion filters.
8180 *
8181 * Returns the encoding value or NULL
8182 */
8183
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008184const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008185xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8186 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008187
8188 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008189 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008190 SKIP(8);
8191 SKIP_BLANKS;
8192 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008193 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008194 return(NULL);
8195 }
8196 NEXT;
8197 SKIP_BLANKS;
8198 if (RAW == '"') {
8199 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008200 encoding = xmlParseEncName(ctxt);
8201 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008202 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008203 } else
8204 NEXT;
8205 } else if (RAW == '\''){
8206 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008207 encoding = xmlParseEncName(ctxt);
8208 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008209 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008210 } else
8211 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008212 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008213 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008215 /*
8216 * UTF-16 encoding stwich has already taken place at this stage,
8217 * more over the little-endian/big-endian selection is already done
8218 */
8219 if ((encoding != NULL) &&
8220 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8221 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008222 if (ctxt->encoding != NULL)
8223 xmlFree((xmlChar *) ctxt->encoding);
8224 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008225 }
8226 /*
8227 * UTF-8 encoding is handled natively
8228 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008229 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008230 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8231 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008232 if (ctxt->encoding != NULL)
8233 xmlFree((xmlChar *) ctxt->encoding);
8234 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008235 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008236 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008237 xmlCharEncodingHandlerPtr handler;
8238
8239 if (ctxt->input->encoding != NULL)
8240 xmlFree((xmlChar *) ctxt->input->encoding);
8241 ctxt->input->encoding = encoding;
8242
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008243 handler = xmlFindCharEncodingHandler((const char *) encoding);
8244 if (handler != NULL) {
8245 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008246 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008247 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008248 "Unsupported encoding %s\n", encoding);
8249 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 }
8251 }
8252 }
8253 return(encoding);
8254}
8255
8256/**
8257 * xmlParseSDDecl:
8258 * @ctxt: an XML parser context
8259 *
8260 * parse the XML standalone declaration
8261 *
8262 * [32] SDDecl ::= S 'standalone' Eq
8263 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8264 *
8265 * [ VC: Standalone Document Declaration ]
8266 * TODO The standalone document declaration must have the value "no"
8267 * if any external markup declarations contain declarations of:
8268 * - attributes with default values, if elements to which these
8269 * attributes apply appear in the document without specifications
8270 * of values for these attributes, or
8271 * - entities (other than amp, lt, gt, apos, quot), if references
8272 * to those entities appear in the document, or
8273 * - attributes with values subject to normalization, where the
8274 * attribute appears in the document with a value which will change
8275 * as a result of normalization, or
8276 * - element types with element content, if white space occurs directly
8277 * within any instance of those types.
8278 *
8279 * Returns 1 if standalone, 0 otherwise
8280 */
8281
8282int
8283xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8284 int standalone = -1;
8285
8286 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008287 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008288 SKIP(10);
8289 SKIP_BLANKS;
8290 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008291 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008292 return(standalone);
8293 }
8294 NEXT;
8295 SKIP_BLANKS;
8296 if (RAW == '\''){
8297 NEXT;
8298 if ((RAW == 'n') && (NXT(1) == 'o')) {
8299 standalone = 0;
8300 SKIP(2);
8301 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8302 (NXT(2) == 's')) {
8303 standalone = 1;
8304 SKIP(3);
8305 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008306 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 }
8308 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008309 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008310 } else
8311 NEXT;
8312 } else if (RAW == '"'){
8313 NEXT;
8314 if ((RAW == 'n') && (NXT(1) == 'o')) {
8315 standalone = 0;
8316 SKIP(2);
8317 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8318 (NXT(2) == 's')) {
8319 standalone = 1;
8320 SKIP(3);
8321 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008322 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008323 }
8324 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008325 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008326 } else
8327 NEXT;
8328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008329 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008330 }
8331 }
8332 return(standalone);
8333}
8334
8335/**
8336 * xmlParseXMLDecl:
8337 * @ctxt: an XML parser context
8338 *
8339 * parse an XML declaration header
8340 *
8341 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8342 */
8343
8344void
8345xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8346 xmlChar *version;
8347
8348 /*
8349 * We know that '<?xml' is here.
8350 */
8351 SKIP(5);
8352
William M. Brack76e95df2003-10-18 16:20:14 +00008353 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008354 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8355 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008356 }
8357 SKIP_BLANKS;
8358
8359 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008360 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008361 */
8362 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008363 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008364 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008365 } else {
8366 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8367 /*
8368 * TODO: Blueberry should be detected here
8369 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008370 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8371 "Unsupported version '%s'\n",
8372 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008373 }
8374 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008375 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008376 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008377 }
Owen Taylor3473f882001-02-23 17:55:21 +00008378
8379 /*
8380 * We may have the encoding declaration
8381 */
William M. Brack76e95df2003-10-18 16:20:14 +00008382 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008383 if ((RAW == '?') && (NXT(1) == '>')) {
8384 SKIP(2);
8385 return;
8386 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008387 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008388 }
8389 xmlParseEncodingDecl(ctxt);
8390 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8391 /*
8392 * The XML REC instructs us to stop parsing right here
8393 */
8394 return;
8395 }
8396
8397 /*
8398 * We may have the standalone status.
8399 */
William M. Brack76e95df2003-10-18 16:20:14 +00008400 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008401 if ((RAW == '?') && (NXT(1) == '>')) {
8402 SKIP(2);
8403 return;
8404 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008405 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008406 }
8407 SKIP_BLANKS;
8408 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8409
8410 SKIP_BLANKS;
8411 if ((RAW == '?') && (NXT(1) == '>')) {
8412 SKIP(2);
8413 } else if (RAW == '>') {
8414 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008415 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008416 NEXT;
8417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008418 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008419 MOVETO_ENDTAG(CUR_PTR);
8420 NEXT;
8421 }
8422}
8423
8424/**
8425 * xmlParseMisc:
8426 * @ctxt: an XML parser context
8427 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008428 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008429 *
8430 * [27] Misc ::= Comment | PI | S
8431 */
8432
8433void
8434xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008435 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008436 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008437 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008438 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008439 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008440 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008441 NEXT;
8442 } else
8443 xmlParseComment(ctxt);
8444 }
8445}
8446
8447/**
8448 * xmlParseDocument:
8449 * @ctxt: an XML parser context
8450 *
8451 * parse an XML document (and build a tree if using the standard SAX
8452 * interface).
8453 *
8454 * [1] document ::= prolog element Misc*
8455 *
8456 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8457 *
8458 * Returns 0, -1 in case of error. the parser context is augmented
8459 * as a result of the parsing.
8460 */
8461
8462int
8463xmlParseDocument(xmlParserCtxtPtr ctxt) {
8464 xmlChar start[4];
8465 xmlCharEncoding enc;
8466
8467 xmlInitParser();
8468
8469 GROW;
8470
8471 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008472 * SAX: detecting the level.
8473 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008474 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008475
8476 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008477 * SAX: beginning of the document processing.
8478 */
8479 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8480 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8481
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008482 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8483 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008484 /*
8485 * Get the 4 first bytes and decode the charset
8486 * if enc != XML_CHAR_ENCODING_NONE
8487 * plug some encoding conversion routines.
8488 */
8489 start[0] = RAW;
8490 start[1] = NXT(1);
8491 start[2] = NXT(2);
8492 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008493 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008494 if (enc != XML_CHAR_ENCODING_NONE) {
8495 xmlSwitchEncoding(ctxt, enc);
8496 }
Owen Taylor3473f882001-02-23 17:55:21 +00008497 }
8498
8499
8500 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008501 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008502 }
8503
8504 /*
8505 * Check for the XMLDecl in the Prolog.
8506 */
8507 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008508 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008509
8510 /*
8511 * Note that we will switch encoding on the fly.
8512 */
8513 xmlParseXMLDecl(ctxt);
8514 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8515 /*
8516 * The XML REC instructs us to stop parsing right here
8517 */
8518 return(-1);
8519 }
8520 ctxt->standalone = ctxt->input->standalone;
8521 SKIP_BLANKS;
8522 } else {
8523 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8524 }
8525 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8526 ctxt->sax->startDocument(ctxt->userData);
8527
8528 /*
8529 * The Misc part of the Prolog
8530 */
8531 GROW;
8532 xmlParseMisc(ctxt);
8533
8534 /*
8535 * Then possibly doc type declaration(s) and more Misc
8536 * (doctypedecl Misc*)?
8537 */
8538 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008539 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008540
8541 ctxt->inSubset = 1;
8542 xmlParseDocTypeDecl(ctxt);
8543 if (RAW == '[') {
8544 ctxt->instate = XML_PARSER_DTD;
8545 xmlParseInternalSubset(ctxt);
8546 }
8547
8548 /*
8549 * Create and update the external subset.
8550 */
8551 ctxt->inSubset = 2;
8552 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8553 (!ctxt->disableSAX))
8554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8555 ctxt->extSubSystem, ctxt->extSubURI);
8556 ctxt->inSubset = 0;
8557
8558
8559 ctxt->instate = XML_PARSER_PROLOG;
8560 xmlParseMisc(ctxt);
8561 }
8562
8563 /*
8564 * Time to start parsing the tree itself
8565 */
8566 GROW;
8567 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008568 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8569 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008570 } else {
8571 ctxt->instate = XML_PARSER_CONTENT;
8572 xmlParseElement(ctxt);
8573 ctxt->instate = XML_PARSER_EPILOG;
8574
8575
8576 /*
8577 * The Misc part at the end
8578 */
8579 xmlParseMisc(ctxt);
8580
Daniel Veillard561b7f82002-03-20 21:55:57 +00008581 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 }
8584 ctxt->instate = XML_PARSER_EOF;
8585 }
8586
8587 /*
8588 * SAX: end of the document processing.
8589 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008590 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008591 ctxt->sax->endDocument(ctxt->userData);
8592
Daniel Veillard5997aca2002-03-18 18:36:20 +00008593 /*
8594 * Remove locally kept entity definitions if the tree was not built
8595 */
8596 if ((ctxt->myDoc != NULL) &&
8597 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8598 xmlFreeDoc(ctxt->myDoc);
8599 ctxt->myDoc = NULL;
8600 }
8601
Daniel Veillardc7612992002-02-17 22:47:37 +00008602 if (! ctxt->wellFormed) {
8603 ctxt->valid = 0;
8604 return(-1);
8605 }
Owen Taylor3473f882001-02-23 17:55:21 +00008606 return(0);
8607}
8608
8609/**
8610 * xmlParseExtParsedEnt:
8611 * @ctxt: an XML parser context
8612 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008613 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008614 * An external general parsed entity is well-formed if it matches the
8615 * production labeled extParsedEnt.
8616 *
8617 * [78] extParsedEnt ::= TextDecl? content
8618 *
8619 * Returns 0, -1 in case of error. the parser context is augmented
8620 * as a result of the parsing.
8621 */
8622
8623int
8624xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8625 xmlChar start[4];
8626 xmlCharEncoding enc;
8627
8628 xmlDefaultSAXHandlerInit();
8629
Daniel Veillard309f81d2003-09-23 09:02:53 +00008630 xmlDetectSAX2(ctxt);
8631
Owen Taylor3473f882001-02-23 17:55:21 +00008632 GROW;
8633
8634 /*
8635 * SAX: beginning of the document processing.
8636 */
8637 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8638 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8639
8640 /*
8641 * Get the 4 first bytes and decode the charset
8642 * if enc != XML_CHAR_ENCODING_NONE
8643 * plug some encoding conversion routines.
8644 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008645 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8646 start[0] = RAW;
8647 start[1] = NXT(1);
8648 start[2] = NXT(2);
8649 start[3] = NXT(3);
8650 enc = xmlDetectCharEncoding(start, 4);
8651 if (enc != XML_CHAR_ENCODING_NONE) {
8652 xmlSwitchEncoding(ctxt, enc);
8653 }
Owen Taylor3473f882001-02-23 17:55:21 +00008654 }
8655
8656
8657 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008658 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008659 }
8660
8661 /*
8662 * Check for the XMLDecl in the Prolog.
8663 */
8664 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008665 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008666
8667 /*
8668 * Note that we will switch encoding on the fly.
8669 */
8670 xmlParseXMLDecl(ctxt);
8671 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8672 /*
8673 * The XML REC instructs us to stop parsing right here
8674 */
8675 return(-1);
8676 }
8677 SKIP_BLANKS;
8678 } else {
8679 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8680 }
8681 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8682 ctxt->sax->startDocument(ctxt->userData);
8683
8684 /*
8685 * Doing validity checking on chunk doesn't make sense
8686 */
8687 ctxt->instate = XML_PARSER_CONTENT;
8688 ctxt->validate = 0;
8689 ctxt->loadsubset = 0;
8690 ctxt->depth = 0;
8691
8692 xmlParseContent(ctxt);
8693
8694 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008697 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008698 }
8699
8700 /*
8701 * SAX: end of the document processing.
8702 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008703 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008704 ctxt->sax->endDocument(ctxt->userData);
8705
8706 if (! ctxt->wellFormed) return(-1);
8707 return(0);
8708}
8709
Daniel Veillard73b013f2003-09-30 12:36:01 +00008710#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008711/************************************************************************
8712 * *
8713 * Progressive parsing interfaces *
8714 * *
8715 ************************************************************************/
8716
8717/**
8718 * xmlParseLookupSequence:
8719 * @ctxt: an XML parser context
8720 * @first: the first char to lookup
8721 * @next: the next char to lookup or zero
8722 * @third: the next char to lookup or zero
8723 *
8724 * Try to find if a sequence (first, next, third) or just (first next) or
8725 * (first) is available in the input stream.
8726 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8727 * to avoid rescanning sequences of bytes, it DOES change the state of the
8728 * parser, do not use liberally.
8729 *
8730 * Returns the index to the current parsing point if the full sequence
8731 * is available, -1 otherwise.
8732 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008733static int
Owen Taylor3473f882001-02-23 17:55:21 +00008734xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8735 xmlChar next, xmlChar third) {
8736 int base, len;
8737 xmlParserInputPtr in;
8738 const xmlChar *buf;
8739
8740 in = ctxt->input;
8741 if (in == NULL) return(-1);
8742 base = in->cur - in->base;
8743 if (base < 0) return(-1);
8744 if (ctxt->checkIndex > base)
8745 base = ctxt->checkIndex;
8746 if (in->buf == NULL) {
8747 buf = in->base;
8748 len = in->length;
8749 } else {
8750 buf = in->buf->buffer->content;
8751 len = in->buf->buffer->use;
8752 }
8753 /* take into account the sequence length */
8754 if (third) len -= 2;
8755 else if (next) len --;
8756 for (;base < len;base++) {
8757 if (buf[base] == first) {
8758 if (third != 0) {
8759 if ((buf[base + 1] != next) ||
8760 (buf[base + 2] != third)) continue;
8761 } else if (next != 0) {
8762 if (buf[base + 1] != next) continue;
8763 }
8764 ctxt->checkIndex = 0;
8765#ifdef DEBUG_PUSH
8766 if (next == 0)
8767 xmlGenericError(xmlGenericErrorContext,
8768 "PP: lookup '%c' found at %d\n",
8769 first, base);
8770 else if (third == 0)
8771 xmlGenericError(xmlGenericErrorContext,
8772 "PP: lookup '%c%c' found at %d\n",
8773 first, next, base);
8774 else
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: lookup '%c%c%c' found at %d\n",
8777 first, next, third, base);
8778#endif
8779 return(base - (in->cur - in->base));
8780 }
8781 }
8782 ctxt->checkIndex = base;
8783#ifdef DEBUG_PUSH
8784 if (next == 0)
8785 xmlGenericError(xmlGenericErrorContext,
8786 "PP: lookup '%c' failed\n", first);
8787 else if (third == 0)
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: lookup '%c%c' failed\n", first, next);
8790 else
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: lookup '%c%c%c' failed\n", first, next, third);
8793#endif
8794 return(-1);
8795}
8796
8797/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008798 * xmlParseGetLasts:
8799 * @ctxt: an XML parser context
8800 * @lastlt: pointer to store the last '<' from the input
8801 * @lastgt: pointer to store the last '>' from the input
8802 *
8803 * Lookup the last < and > in the current chunk
8804 */
8805static void
8806xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8807 const xmlChar **lastgt) {
8808 const xmlChar *tmp;
8809
8810 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8811 xmlGenericError(xmlGenericErrorContext,
8812 "Internal error: xmlParseGetLasts\n");
8813 return;
8814 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008815 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008816 tmp = ctxt->input->end;
8817 tmp--;
8818 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8819 (*tmp != '>')) tmp--;
8820 if (tmp < ctxt->input->base) {
8821 *lastlt = NULL;
8822 *lastgt = NULL;
8823 } else if (*tmp == '<') {
8824 *lastlt = tmp;
8825 tmp--;
8826 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8827 if (tmp < ctxt->input->base)
8828 *lastgt = NULL;
8829 else
8830 *lastgt = tmp;
8831 } else {
8832 *lastgt = tmp;
8833 tmp--;
8834 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8835 if (tmp < ctxt->input->base)
8836 *lastlt = NULL;
8837 else
8838 *lastlt = tmp;
8839 }
8840
8841 } else {
8842 *lastlt = NULL;
8843 *lastgt = NULL;
8844 }
8845}
8846/**
Owen Taylor3473f882001-02-23 17:55:21 +00008847 * xmlParseTryOrFinish:
8848 * @ctxt: an XML parser context
8849 * @terminate: last chunk indicator
8850 *
8851 * Try to progress on parsing
8852 *
8853 * Returns zero if no parsing was possible
8854 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008855static int
Owen Taylor3473f882001-02-23 17:55:21 +00008856xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8857 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008858 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008859 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008860 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008861
8862#ifdef DEBUG_PUSH
8863 switch (ctxt->instate) {
8864 case XML_PARSER_EOF:
8865 xmlGenericError(xmlGenericErrorContext,
8866 "PP: try EOF\n"); break;
8867 case XML_PARSER_START:
8868 xmlGenericError(xmlGenericErrorContext,
8869 "PP: try START\n"); break;
8870 case XML_PARSER_MISC:
8871 xmlGenericError(xmlGenericErrorContext,
8872 "PP: try MISC\n");break;
8873 case XML_PARSER_COMMENT:
8874 xmlGenericError(xmlGenericErrorContext,
8875 "PP: try COMMENT\n");break;
8876 case XML_PARSER_PROLOG:
8877 xmlGenericError(xmlGenericErrorContext,
8878 "PP: try PROLOG\n");break;
8879 case XML_PARSER_START_TAG:
8880 xmlGenericError(xmlGenericErrorContext,
8881 "PP: try START_TAG\n");break;
8882 case XML_PARSER_CONTENT:
8883 xmlGenericError(xmlGenericErrorContext,
8884 "PP: try CONTENT\n");break;
8885 case XML_PARSER_CDATA_SECTION:
8886 xmlGenericError(xmlGenericErrorContext,
8887 "PP: try CDATA_SECTION\n");break;
8888 case XML_PARSER_END_TAG:
8889 xmlGenericError(xmlGenericErrorContext,
8890 "PP: try END_TAG\n");break;
8891 case XML_PARSER_ENTITY_DECL:
8892 xmlGenericError(xmlGenericErrorContext,
8893 "PP: try ENTITY_DECL\n");break;
8894 case XML_PARSER_ENTITY_VALUE:
8895 xmlGenericError(xmlGenericErrorContext,
8896 "PP: try ENTITY_VALUE\n");break;
8897 case XML_PARSER_ATTRIBUTE_VALUE:
8898 xmlGenericError(xmlGenericErrorContext,
8899 "PP: try ATTRIBUTE_VALUE\n");break;
8900 case XML_PARSER_DTD:
8901 xmlGenericError(xmlGenericErrorContext,
8902 "PP: try DTD\n");break;
8903 case XML_PARSER_EPILOG:
8904 xmlGenericError(xmlGenericErrorContext,
8905 "PP: try EPILOG\n");break;
8906 case XML_PARSER_PI:
8907 xmlGenericError(xmlGenericErrorContext,
8908 "PP: try PI\n");break;
8909 case XML_PARSER_IGNORE:
8910 xmlGenericError(xmlGenericErrorContext,
8911 "PP: try IGNORE\n");break;
8912 }
8913#endif
8914
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008915 if ((ctxt->input != NULL) &&
8916 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008917 xmlSHRINK(ctxt);
8918 ctxt->checkIndex = 0;
8919 }
8920 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008921
Daniel Veillarda880b122003-04-21 21:36:41 +00008922 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008923 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8924 return(0);
8925
8926
Owen Taylor3473f882001-02-23 17:55:21 +00008927 /*
8928 * Pop-up of finished entities.
8929 */
8930 while ((RAW == 0) && (ctxt->inputNr > 1))
8931 xmlPopInput(ctxt);
8932
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008933 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008934 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008935 avail = ctxt->input->length -
8936 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008937 else {
8938 /*
8939 * If we are operating on converted input, try to flush
8940 * remainng chars to avoid them stalling in the non-converted
8941 * buffer.
8942 */
8943 if ((ctxt->input->buf->raw != NULL) &&
8944 (ctxt->input->buf->raw->use > 0)) {
8945 int base = ctxt->input->base -
8946 ctxt->input->buf->buffer->content;
8947 int current = ctxt->input->cur - ctxt->input->base;
8948
8949 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8950 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8951 ctxt->input->cur = ctxt->input->base + current;
8952 ctxt->input->end =
8953 &ctxt->input->buf->buffer->content[
8954 ctxt->input->buf->buffer->use];
8955 }
8956 avail = ctxt->input->buf->buffer->use -
8957 (ctxt->input->cur - ctxt->input->base);
8958 }
Owen Taylor3473f882001-02-23 17:55:21 +00008959 if (avail < 1)
8960 goto done;
8961 switch (ctxt->instate) {
8962 case XML_PARSER_EOF:
8963 /*
8964 * Document parsing is done !
8965 */
8966 goto done;
8967 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008968 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8969 xmlChar start[4];
8970 xmlCharEncoding enc;
8971
8972 /*
8973 * Very first chars read from the document flow.
8974 */
8975 if (avail < 4)
8976 goto done;
8977
8978 /*
8979 * Get the 4 first bytes and decode the charset
8980 * if enc != XML_CHAR_ENCODING_NONE
8981 * plug some encoding conversion routines.
8982 */
8983 start[0] = RAW;
8984 start[1] = NXT(1);
8985 start[2] = NXT(2);
8986 start[3] = NXT(3);
8987 enc = xmlDetectCharEncoding(start, 4);
8988 if (enc != XML_CHAR_ENCODING_NONE) {
8989 xmlSwitchEncoding(ctxt, enc);
8990 }
8991 break;
8992 }
Owen Taylor3473f882001-02-23 17:55:21 +00008993
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008994 if (avail < 2)
8995 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008996 cur = ctxt->input->cur[0];
8997 next = ctxt->input->cur[1];
8998 if (cur == 0) {
8999 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9000 ctxt->sax->setDocumentLocator(ctxt->userData,
9001 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009002 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009003 ctxt->instate = XML_PARSER_EOF;
9004#ifdef DEBUG_PUSH
9005 xmlGenericError(xmlGenericErrorContext,
9006 "PP: entering EOF\n");
9007#endif
9008 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9009 ctxt->sax->endDocument(ctxt->userData);
9010 goto done;
9011 }
9012 if ((cur == '<') && (next == '?')) {
9013 /* PI or XML decl */
9014 if (avail < 5) return(ret);
9015 if ((!terminate) &&
9016 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9017 return(ret);
9018 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9019 ctxt->sax->setDocumentLocator(ctxt->userData,
9020 &xmlDefaultSAXLocator);
9021 if ((ctxt->input->cur[2] == 'x') &&
9022 (ctxt->input->cur[3] == 'm') &&
9023 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009024 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009025 ret += 5;
9026#ifdef DEBUG_PUSH
9027 xmlGenericError(xmlGenericErrorContext,
9028 "PP: Parsing XML Decl\n");
9029#endif
9030 xmlParseXMLDecl(ctxt);
9031 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9032 /*
9033 * The XML REC instructs us to stop parsing right
9034 * here
9035 */
9036 ctxt->instate = XML_PARSER_EOF;
9037 return(0);
9038 }
9039 ctxt->standalone = ctxt->input->standalone;
9040 if ((ctxt->encoding == NULL) &&
9041 (ctxt->input->encoding != NULL))
9042 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9043 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9044 (!ctxt->disableSAX))
9045 ctxt->sax->startDocument(ctxt->userData);
9046 ctxt->instate = XML_PARSER_MISC;
9047#ifdef DEBUG_PUSH
9048 xmlGenericError(xmlGenericErrorContext,
9049 "PP: entering MISC\n");
9050#endif
9051 } else {
9052 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9053 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9054 (!ctxt->disableSAX))
9055 ctxt->sax->startDocument(ctxt->userData);
9056 ctxt->instate = XML_PARSER_MISC;
9057#ifdef DEBUG_PUSH
9058 xmlGenericError(xmlGenericErrorContext,
9059 "PP: entering MISC\n");
9060#endif
9061 }
9062 } else {
9063 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9064 ctxt->sax->setDocumentLocator(ctxt->userData,
9065 &xmlDefaultSAXLocator);
9066 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9067 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9068 (!ctxt->disableSAX))
9069 ctxt->sax->startDocument(ctxt->userData);
9070 ctxt->instate = XML_PARSER_MISC;
9071#ifdef DEBUG_PUSH
9072 xmlGenericError(xmlGenericErrorContext,
9073 "PP: entering MISC\n");
9074#endif
9075 }
9076 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009077 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 const xmlChar *name;
9079 const xmlChar *prefix;
9080 const xmlChar *URI;
9081 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009082
9083 if ((avail < 2) && (ctxt->inputNr == 1))
9084 goto done;
9085 cur = ctxt->input->cur[0];
9086 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009087 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009088 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009089 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9090 ctxt->sax->endDocument(ctxt->userData);
9091 goto done;
9092 }
9093 if (!terminate) {
9094 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009095 /* > can be found unescaped in attribute values */
9096 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009097 goto done;
9098 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9099 goto done;
9100 }
9101 }
9102 if (ctxt->spaceNr == 0)
9103 spacePush(ctxt, -1);
9104 else
9105 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009106#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009107 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009108#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009109 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009110#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009111 else
9112 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009113#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009114 if (name == NULL) {
9115 spacePop(ctxt);
9116 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009117 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9118 ctxt->sax->endDocument(ctxt->userData);
9119 goto done;
9120 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009121#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009122 /*
9123 * [ VC: Root Element Type ]
9124 * The Name in the document type declaration must match
9125 * the element type of the root element.
9126 */
9127 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9128 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9129 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009130#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009131
9132 /*
9133 * Check for an Empty Element.
9134 */
9135 if ((RAW == '/') && (NXT(1) == '>')) {
9136 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009137
9138 if (ctxt->sax2) {
9139 if ((ctxt->sax != NULL) &&
9140 (ctxt->sax->endElementNs != NULL) &&
9141 (!ctxt->disableSAX))
9142 ctxt->sax->endElementNs(ctxt->userData, name,
9143 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009144#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009145 } else {
9146 if ((ctxt->sax != NULL) &&
9147 (ctxt->sax->endElement != NULL) &&
9148 (!ctxt->disableSAX))
9149 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009150#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009151 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009152 spacePop(ctxt);
9153 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009154 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009155 } else {
9156 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009157 }
9158 break;
9159 }
9160 if (RAW == '>') {
9161 NEXT;
9162 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009163 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009164 "Couldn't find end of Start Tag %s\n",
9165 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009166 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009167 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009169 if (ctxt->sax2)
9170 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009171#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009172 else
9173 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009174#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009175
Daniel Veillarda880b122003-04-21 21:36:41 +00009176 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009177 break;
9178 }
9179 case XML_PARSER_CONTENT: {
9180 const xmlChar *test;
9181 unsigned int cons;
9182 if ((avail < 2) && (ctxt->inputNr == 1))
9183 goto done;
9184 cur = ctxt->input->cur[0];
9185 next = ctxt->input->cur[1];
9186
9187 test = CUR_PTR;
9188 cons = ctxt->input->consumed;
9189 if ((cur == '<') && (next == '/')) {
9190 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009191 break;
9192 } else if ((cur == '<') && (next == '?')) {
9193 if ((!terminate) &&
9194 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9195 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009196 xmlParsePI(ctxt);
9197 } else if ((cur == '<') && (next != '!')) {
9198 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009199 break;
9200 } else if ((cur == '<') && (next == '!') &&
9201 (ctxt->input->cur[2] == '-') &&
9202 (ctxt->input->cur[3] == '-')) {
9203 if ((!terminate) &&
9204 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9205 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009206 xmlParseComment(ctxt);
9207 ctxt->instate = XML_PARSER_CONTENT;
9208 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9209 (ctxt->input->cur[2] == '[') &&
9210 (ctxt->input->cur[3] == 'C') &&
9211 (ctxt->input->cur[4] == 'D') &&
9212 (ctxt->input->cur[5] == 'A') &&
9213 (ctxt->input->cur[6] == 'T') &&
9214 (ctxt->input->cur[7] == 'A') &&
9215 (ctxt->input->cur[8] == '[')) {
9216 SKIP(9);
9217 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009218 break;
9219 } else if ((cur == '<') && (next == '!') &&
9220 (avail < 9)) {
9221 goto done;
9222 } else if (cur == '&') {
9223 if ((!terminate) &&
9224 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9225 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009226 xmlParseReference(ctxt);
9227 } else {
9228 /* TODO Avoid the extra copy, handle directly !!! */
9229 /*
9230 * Goal of the following test is:
9231 * - minimize calls to the SAX 'character' callback
9232 * when they are mergeable
9233 * - handle an problem for isBlank when we only parse
9234 * a sequence of blank chars and the next one is
9235 * not available to check against '<' presence.
9236 * - tries to homogenize the differences in SAX
9237 * callbacks between the push and pull versions
9238 * of the parser.
9239 */
9240 if ((ctxt->inputNr == 1) &&
9241 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9242 if (!terminate) {
9243 if (ctxt->progressive) {
9244 if ((lastlt == NULL) ||
9245 (ctxt->input->cur > lastlt))
9246 goto done;
9247 } else if (xmlParseLookupSequence(ctxt,
9248 '<', 0, 0) < 0) {
9249 goto done;
9250 }
9251 }
9252 }
9253 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009254 xmlParseCharData(ctxt, 0);
9255 }
9256 /*
9257 * Pop-up of finished entities.
9258 */
9259 while ((RAW == 0) && (ctxt->inputNr > 1))
9260 xmlPopInput(ctxt);
9261 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009262 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9263 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009264 ctxt->instate = XML_PARSER_EOF;
9265 break;
9266 }
9267 break;
9268 }
9269 case XML_PARSER_END_TAG:
9270 if (avail < 2)
9271 goto done;
9272 if (!terminate) {
9273 if (ctxt->progressive) {
9274 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9275 goto done;
9276 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9277 goto done;
9278 }
9279 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009280 if (ctxt->sax2) {
9281 xmlParseEndTag2(ctxt,
9282 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9283 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009284 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009285 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009286 }
9287#ifdef LIBXML_SAX1_ENABLED
9288 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009289 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009290#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009291 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009292 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009293 } else {
9294 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009295 }
9296 break;
9297 case XML_PARSER_CDATA_SECTION: {
9298 /*
9299 * The Push mode need to have the SAX callback for
9300 * cdataBlock merge back contiguous callbacks.
9301 */
9302 int base;
9303
9304 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9305 if (base < 0) {
9306 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9307 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9308 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009309 ctxt->sax->cdataBlock(ctxt->userData,
9310 ctxt->input->cur,
9311 XML_PARSER_BIG_BUFFER_SIZE);
9312 else if (ctxt->sax->characters != NULL)
9313 ctxt->sax->characters(ctxt->userData,
9314 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009315 XML_PARSER_BIG_BUFFER_SIZE);
9316 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009317 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009318 ctxt->checkIndex = 0;
9319 }
9320 goto done;
9321 } else {
9322 if ((ctxt->sax != NULL) && (base > 0) &&
9323 (!ctxt->disableSAX)) {
9324 if (ctxt->sax->cdataBlock != NULL)
9325 ctxt->sax->cdataBlock(ctxt->userData,
9326 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009327 else if (ctxt->sax->characters != NULL)
9328 ctxt->sax->characters(ctxt->userData,
9329 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009330 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009331 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009332 ctxt->checkIndex = 0;
9333 ctxt->instate = XML_PARSER_CONTENT;
9334#ifdef DEBUG_PUSH
9335 xmlGenericError(xmlGenericErrorContext,
9336 "PP: entering CONTENT\n");
9337#endif
9338 }
9339 break;
9340 }
Owen Taylor3473f882001-02-23 17:55:21 +00009341 case XML_PARSER_MISC:
9342 SKIP_BLANKS;
9343 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009344 avail = ctxt->input->length -
9345 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009347 avail = ctxt->input->buf->buffer->use -
9348 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009349 if (avail < 2)
9350 goto done;
9351 cur = ctxt->input->cur[0];
9352 next = ctxt->input->cur[1];
9353 if ((cur == '<') && (next == '?')) {
9354 if ((!terminate) &&
9355 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9356 goto done;
9357#ifdef DEBUG_PUSH
9358 xmlGenericError(xmlGenericErrorContext,
9359 "PP: Parsing PI\n");
9360#endif
9361 xmlParsePI(ctxt);
9362 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009363 (ctxt->input->cur[2] == '-') &&
9364 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009365 if ((!terminate) &&
9366 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9367 goto done;
9368#ifdef DEBUG_PUSH
9369 xmlGenericError(xmlGenericErrorContext,
9370 "PP: Parsing Comment\n");
9371#endif
9372 xmlParseComment(ctxt);
9373 ctxt->instate = XML_PARSER_MISC;
9374 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009375 (ctxt->input->cur[2] == 'D') &&
9376 (ctxt->input->cur[3] == 'O') &&
9377 (ctxt->input->cur[4] == 'C') &&
9378 (ctxt->input->cur[5] == 'T') &&
9379 (ctxt->input->cur[6] == 'Y') &&
9380 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009381 (ctxt->input->cur[8] == 'E')) {
9382 if ((!terminate) &&
9383 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9384 goto done;
9385#ifdef DEBUG_PUSH
9386 xmlGenericError(xmlGenericErrorContext,
9387 "PP: Parsing internal subset\n");
9388#endif
9389 ctxt->inSubset = 1;
9390 xmlParseDocTypeDecl(ctxt);
9391 if (RAW == '[') {
9392 ctxt->instate = XML_PARSER_DTD;
9393#ifdef DEBUG_PUSH
9394 xmlGenericError(xmlGenericErrorContext,
9395 "PP: entering DTD\n");
9396#endif
9397 } else {
9398 /*
9399 * Create and update the external subset.
9400 */
9401 ctxt->inSubset = 2;
9402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9403 (ctxt->sax->externalSubset != NULL))
9404 ctxt->sax->externalSubset(ctxt->userData,
9405 ctxt->intSubName, ctxt->extSubSystem,
9406 ctxt->extSubURI);
9407 ctxt->inSubset = 0;
9408 ctxt->instate = XML_PARSER_PROLOG;
9409#ifdef DEBUG_PUSH
9410 xmlGenericError(xmlGenericErrorContext,
9411 "PP: entering PROLOG\n");
9412#endif
9413 }
9414 } else if ((cur == '<') && (next == '!') &&
9415 (avail < 9)) {
9416 goto done;
9417 } else {
9418 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009419 ctxt->progressive = 1;
9420 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009421#ifdef DEBUG_PUSH
9422 xmlGenericError(xmlGenericErrorContext,
9423 "PP: entering START_TAG\n");
9424#endif
9425 }
9426 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009427 case XML_PARSER_PROLOG:
9428 SKIP_BLANKS;
9429 if (ctxt->input->buf == NULL)
9430 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9431 else
9432 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9433 if (avail < 2)
9434 goto done;
9435 cur = ctxt->input->cur[0];
9436 next = ctxt->input->cur[1];
9437 if ((cur == '<') && (next == '?')) {
9438 if ((!terminate) &&
9439 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9440 goto done;
9441#ifdef DEBUG_PUSH
9442 xmlGenericError(xmlGenericErrorContext,
9443 "PP: Parsing PI\n");
9444#endif
9445 xmlParsePI(ctxt);
9446 } else if ((cur == '<') && (next == '!') &&
9447 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9448 if ((!terminate) &&
9449 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9450 goto done;
9451#ifdef DEBUG_PUSH
9452 xmlGenericError(xmlGenericErrorContext,
9453 "PP: Parsing Comment\n");
9454#endif
9455 xmlParseComment(ctxt);
9456 ctxt->instate = XML_PARSER_PROLOG;
9457 } else if ((cur == '<') && (next == '!') &&
9458 (avail < 4)) {
9459 goto done;
9460 } else {
9461 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009462 if (ctxt->progressive == 0)
9463 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009464 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009465#ifdef DEBUG_PUSH
9466 xmlGenericError(xmlGenericErrorContext,
9467 "PP: entering START_TAG\n");
9468#endif
9469 }
9470 break;
9471 case XML_PARSER_EPILOG:
9472 SKIP_BLANKS;
9473 if (ctxt->input->buf == NULL)
9474 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9475 else
9476 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9477 if (avail < 2)
9478 goto done;
9479 cur = ctxt->input->cur[0];
9480 next = ctxt->input->cur[1];
9481 if ((cur == '<') && (next == '?')) {
9482 if ((!terminate) &&
9483 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9484 goto done;
9485#ifdef DEBUG_PUSH
9486 xmlGenericError(xmlGenericErrorContext,
9487 "PP: Parsing PI\n");
9488#endif
9489 xmlParsePI(ctxt);
9490 ctxt->instate = XML_PARSER_EPILOG;
9491 } else if ((cur == '<') && (next == '!') &&
9492 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9493 if ((!terminate) &&
9494 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9495 goto done;
9496#ifdef DEBUG_PUSH
9497 xmlGenericError(xmlGenericErrorContext,
9498 "PP: Parsing Comment\n");
9499#endif
9500 xmlParseComment(ctxt);
9501 ctxt->instate = XML_PARSER_EPILOG;
9502 } else if ((cur == '<') && (next == '!') &&
9503 (avail < 4)) {
9504 goto done;
9505 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009506 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009507 ctxt->instate = XML_PARSER_EOF;
9508#ifdef DEBUG_PUSH
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: entering EOF\n");
9511#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009512 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009513 ctxt->sax->endDocument(ctxt->userData);
9514 goto done;
9515 }
9516 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009517 case XML_PARSER_DTD: {
9518 /*
9519 * Sorry but progressive parsing of the internal subset
9520 * is not expected to be supported. We first check that
9521 * the full content of the internal subset is available and
9522 * the parsing is launched only at that point.
9523 * Internal subset ends up with "']' S? '>'" in an unescaped
9524 * section and not in a ']]>' sequence which are conditional
9525 * sections (whoever argued to keep that crap in XML deserve
9526 * a place in hell !).
9527 */
9528 int base, i;
9529 xmlChar *buf;
9530 xmlChar quote = 0;
9531
9532 base = ctxt->input->cur - ctxt->input->base;
9533 if (base < 0) return(0);
9534 if (ctxt->checkIndex > base)
9535 base = ctxt->checkIndex;
9536 buf = ctxt->input->buf->buffer->content;
9537 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9538 base++) {
9539 if (quote != 0) {
9540 if (buf[base] == quote)
9541 quote = 0;
9542 continue;
9543 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009544 if ((quote == 0) && (buf[base] == '<')) {
9545 int found = 0;
9546 /* special handling of comments */
9547 if (((unsigned int) base + 4 <
9548 ctxt->input->buf->buffer->use) &&
9549 (buf[base + 1] == '!') &&
9550 (buf[base + 2] == '-') &&
9551 (buf[base + 3] == '-')) {
9552 for (;(unsigned int) base + 3 <
9553 ctxt->input->buf->buffer->use; base++) {
9554 if ((buf[base] == '-') &&
9555 (buf[base + 1] == '-') &&
9556 (buf[base + 2] == '>')) {
9557 found = 1;
9558 base += 2;
9559 break;
9560 }
9561 }
9562 if (!found)
9563 break;
9564 continue;
9565 }
9566 }
Owen Taylor3473f882001-02-23 17:55:21 +00009567 if (buf[base] == '"') {
9568 quote = '"';
9569 continue;
9570 }
9571 if (buf[base] == '\'') {
9572 quote = '\'';
9573 continue;
9574 }
9575 if (buf[base] == ']') {
9576 if ((unsigned int) base +1 >=
9577 ctxt->input->buf->buffer->use)
9578 break;
9579 if (buf[base + 1] == ']') {
9580 /* conditional crap, skip both ']' ! */
9581 base++;
9582 continue;
9583 }
9584 for (i = 0;
9585 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9586 i++) {
9587 if (buf[base + i] == '>')
9588 goto found_end_int_subset;
9589 }
9590 break;
9591 }
9592 }
9593 /*
9594 * We didn't found the end of the Internal subset
9595 */
9596 if (quote == 0)
9597 ctxt->checkIndex = base;
9598#ifdef DEBUG_PUSH
9599 if (next == 0)
9600 xmlGenericError(xmlGenericErrorContext,
9601 "PP: lookup of int subset end filed\n");
9602#endif
9603 goto done;
9604
9605found_end_int_subset:
9606 xmlParseInternalSubset(ctxt);
9607 ctxt->inSubset = 2;
9608 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9609 (ctxt->sax->externalSubset != NULL))
9610 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9611 ctxt->extSubSystem, ctxt->extSubURI);
9612 ctxt->inSubset = 0;
9613 ctxt->instate = XML_PARSER_PROLOG;
9614 ctxt->checkIndex = 0;
9615#ifdef DEBUG_PUSH
9616 xmlGenericError(xmlGenericErrorContext,
9617 "PP: entering PROLOG\n");
9618#endif
9619 break;
9620 }
9621 case XML_PARSER_COMMENT:
9622 xmlGenericError(xmlGenericErrorContext,
9623 "PP: internal error, state == COMMENT\n");
9624 ctxt->instate = XML_PARSER_CONTENT;
9625#ifdef DEBUG_PUSH
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: entering CONTENT\n");
9628#endif
9629 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009630 case XML_PARSER_IGNORE:
9631 xmlGenericError(xmlGenericErrorContext,
9632 "PP: internal error, state == IGNORE");
9633 ctxt->instate = XML_PARSER_DTD;
9634#ifdef DEBUG_PUSH
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: entering DTD\n");
9637#endif
9638 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009639 case XML_PARSER_PI:
9640 xmlGenericError(xmlGenericErrorContext,
9641 "PP: internal error, state == PI\n");
9642 ctxt->instate = XML_PARSER_CONTENT;
9643#ifdef DEBUG_PUSH
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: entering CONTENT\n");
9646#endif
9647 break;
9648 case XML_PARSER_ENTITY_DECL:
9649 xmlGenericError(xmlGenericErrorContext,
9650 "PP: internal error, state == ENTITY_DECL\n");
9651 ctxt->instate = XML_PARSER_DTD;
9652#ifdef DEBUG_PUSH
9653 xmlGenericError(xmlGenericErrorContext,
9654 "PP: entering DTD\n");
9655#endif
9656 break;
9657 case XML_PARSER_ENTITY_VALUE:
9658 xmlGenericError(xmlGenericErrorContext,
9659 "PP: internal error, state == ENTITY_VALUE\n");
9660 ctxt->instate = XML_PARSER_CONTENT;
9661#ifdef DEBUG_PUSH
9662 xmlGenericError(xmlGenericErrorContext,
9663 "PP: entering DTD\n");
9664#endif
9665 break;
9666 case XML_PARSER_ATTRIBUTE_VALUE:
9667 xmlGenericError(xmlGenericErrorContext,
9668 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9669 ctxt->instate = XML_PARSER_START_TAG;
9670#ifdef DEBUG_PUSH
9671 xmlGenericError(xmlGenericErrorContext,
9672 "PP: entering START_TAG\n");
9673#endif
9674 break;
9675 case XML_PARSER_SYSTEM_LITERAL:
9676 xmlGenericError(xmlGenericErrorContext,
9677 "PP: internal error, state == SYSTEM_LITERAL\n");
9678 ctxt->instate = XML_PARSER_START_TAG;
9679#ifdef DEBUG_PUSH
9680 xmlGenericError(xmlGenericErrorContext,
9681 "PP: entering START_TAG\n");
9682#endif
9683 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009684 case XML_PARSER_PUBLIC_LITERAL:
9685 xmlGenericError(xmlGenericErrorContext,
9686 "PP: internal error, state == PUBLIC_LITERAL\n");
9687 ctxt->instate = XML_PARSER_START_TAG;
9688#ifdef DEBUG_PUSH
9689 xmlGenericError(xmlGenericErrorContext,
9690 "PP: entering START_TAG\n");
9691#endif
9692 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009693 }
9694 }
9695done:
9696#ifdef DEBUG_PUSH
9697 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9698#endif
9699 return(ret);
9700}
9701
9702/**
Owen Taylor3473f882001-02-23 17:55:21 +00009703 * xmlParseChunk:
9704 * @ctxt: an XML parser context
9705 * @chunk: an char array
9706 * @size: the size in byte of the chunk
9707 * @terminate: last chunk indicator
9708 *
9709 * Parse a Chunk of memory
9710 *
9711 * Returns zero if no error, the xmlParserErrors otherwise.
9712 */
9713int
9714xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9715 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009716 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9717 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009718 if (ctxt->instate == XML_PARSER_START)
9719 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009720 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9721 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9722 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9723 int cur = ctxt->input->cur - ctxt->input->base;
9724
9725 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9726 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9727 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009728 ctxt->input->end =
9729 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009730#ifdef DEBUG_PUSH
9731 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9732#endif
9733
Owen Taylor3473f882001-02-23 17:55:21 +00009734 } else if (ctxt->instate != XML_PARSER_EOF) {
9735 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9736 xmlParserInputBufferPtr in = ctxt->input->buf;
9737 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9738 (in->raw != NULL)) {
9739 int nbchars;
9740
9741 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9742 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009743 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009744 xmlGenericError(xmlGenericErrorContext,
9745 "xmlParseChunk: encoder error\n");
9746 return(XML_ERR_INVALID_ENCODING);
9747 }
9748 }
9749 }
9750 }
9751 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009752 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9753 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009754 if (terminate) {
9755 /*
9756 * Check for termination
9757 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009758 int avail = 0;
9759 if (ctxt->input->buf == NULL)
9760 avail = ctxt->input->length -
9761 (ctxt->input->cur - ctxt->input->base);
9762 else
9763 avail = ctxt->input->buf->buffer->use -
9764 (ctxt->input->cur - ctxt->input->base);
9765
Owen Taylor3473f882001-02-23 17:55:21 +00009766 if ((ctxt->instate != XML_PARSER_EOF) &&
9767 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009768 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009769 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009770 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009771 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009772 }
Owen Taylor3473f882001-02-23 17:55:21 +00009773 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009774 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009775 ctxt->sax->endDocument(ctxt->userData);
9776 }
9777 ctxt->instate = XML_PARSER_EOF;
9778 }
9779 return((xmlParserErrors) ctxt->errNo);
9780}
9781
9782/************************************************************************
9783 * *
9784 * I/O front end functions to the parser *
9785 * *
9786 ************************************************************************/
9787
9788/**
9789 * xmlStopParser:
9790 * @ctxt: an XML parser context
9791 *
9792 * Blocks further parser processing
9793 */
9794void
9795xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009796 if (ctxt == NULL)
9797 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009798 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009799 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009800 if (ctxt->input != NULL)
9801 ctxt->input->cur = BAD_CAST"";
9802}
9803
9804/**
9805 * xmlCreatePushParserCtxt:
9806 * @sax: a SAX handler
9807 * @user_data: The user data returned on SAX callbacks
9808 * @chunk: a pointer to an array of chars
9809 * @size: number of chars in the array
9810 * @filename: an optional file name or URI
9811 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009812 * Create a parser context for using the XML parser in push mode.
9813 * If @buffer and @size are non-NULL, the data is used to detect
9814 * the encoding. The remaining characters will be parsed so they
9815 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009816 * To allow content encoding detection, @size should be >= 4
9817 * The value of @filename is used for fetching external entities
9818 * and error/warning reports.
9819 *
9820 * Returns the new parser context or NULL
9821 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009822
Owen Taylor3473f882001-02-23 17:55:21 +00009823xmlParserCtxtPtr
9824xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9825 const char *chunk, int size, const char *filename) {
9826 xmlParserCtxtPtr ctxt;
9827 xmlParserInputPtr inputStream;
9828 xmlParserInputBufferPtr buf;
9829 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9830
9831 /*
9832 * plug some encoding conversion routines
9833 */
9834 if ((chunk != NULL) && (size >= 4))
9835 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9836
9837 buf = xmlAllocParserInputBuffer(enc);
9838 if (buf == NULL) return(NULL);
9839
9840 ctxt = xmlNewParserCtxt();
9841 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009842 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009843 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009844 return(NULL);
9845 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009846 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9847 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009848 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009849 xmlFreeParserInputBuffer(buf);
9850 xmlFreeParserCtxt(ctxt);
9851 return(NULL);
9852 }
Owen Taylor3473f882001-02-23 17:55:21 +00009853 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009854#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009855 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009856#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009857 xmlFree(ctxt->sax);
9858 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9859 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009860 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009861 xmlFreeParserInputBuffer(buf);
9862 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009863 return(NULL);
9864 }
9865 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9866 if (user_data != NULL)
9867 ctxt->userData = user_data;
9868 }
9869 if (filename == NULL) {
9870 ctxt->directory = NULL;
9871 } else {
9872 ctxt->directory = xmlParserGetDirectory(filename);
9873 }
9874
9875 inputStream = xmlNewInputStream(ctxt);
9876 if (inputStream == NULL) {
9877 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009878 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009879 return(NULL);
9880 }
9881
9882 if (filename == NULL)
9883 inputStream->filename = NULL;
9884 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009885 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009886 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009887 inputStream->buf = buf;
9888 inputStream->base = inputStream->buf->buffer->content;
9889 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009890 inputStream->end =
9891 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009892
9893 inputPush(ctxt, inputStream);
9894
9895 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9896 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009897 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9898 int cur = ctxt->input->cur - ctxt->input->base;
9899
Owen Taylor3473f882001-02-23 17:55:21 +00009900 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009901
9902 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9903 ctxt->input->cur = ctxt->input->base + cur;
9904 ctxt->input->end =
9905 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009906#ifdef DEBUG_PUSH
9907 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9908#endif
9909 }
9910
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009911 if (enc != XML_CHAR_ENCODING_NONE) {
9912 xmlSwitchEncoding(ctxt, enc);
9913 }
9914
Owen Taylor3473f882001-02-23 17:55:21 +00009915 return(ctxt);
9916}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009917#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009918
9919/**
9920 * xmlCreateIOParserCtxt:
9921 * @sax: a SAX handler
9922 * @user_data: The user data returned on SAX callbacks
9923 * @ioread: an I/O read function
9924 * @ioclose: an I/O close function
9925 * @ioctx: an I/O handler
9926 * @enc: the charset encoding if known
9927 *
9928 * Create a parser context for using the XML parser with an existing
9929 * I/O stream
9930 *
9931 * Returns the new parser context or NULL
9932 */
9933xmlParserCtxtPtr
9934xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9935 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9936 void *ioctx, xmlCharEncoding enc) {
9937 xmlParserCtxtPtr ctxt;
9938 xmlParserInputPtr inputStream;
9939 xmlParserInputBufferPtr buf;
9940
9941 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9942 if (buf == NULL) return(NULL);
9943
9944 ctxt = xmlNewParserCtxt();
9945 if (ctxt == NULL) {
9946 xmlFree(buf);
9947 return(NULL);
9948 }
9949 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009950#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009951 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009952#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009953 xmlFree(ctxt->sax);
9954 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9955 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009956 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009957 xmlFree(ctxt);
9958 return(NULL);
9959 }
9960 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9961 if (user_data != NULL)
9962 ctxt->userData = user_data;
9963 }
9964
9965 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9966 if (inputStream == NULL) {
9967 xmlFreeParserCtxt(ctxt);
9968 return(NULL);
9969 }
9970 inputPush(ctxt, inputStream);
9971
9972 return(ctxt);
9973}
9974
Daniel Veillard4432df22003-09-28 18:58:27 +00009975#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009976/************************************************************************
9977 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009978 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009979 * *
9980 ************************************************************************/
9981
9982/**
9983 * xmlIOParseDTD:
9984 * @sax: the SAX handler block or NULL
9985 * @input: an Input Buffer
9986 * @enc: the charset encoding if known
9987 *
9988 * Load and parse a DTD
9989 *
9990 * Returns the resulting xmlDtdPtr or NULL in case of error.
9991 * @input will be freed at parsing end.
9992 */
9993
9994xmlDtdPtr
9995xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9996 xmlCharEncoding enc) {
9997 xmlDtdPtr ret = NULL;
9998 xmlParserCtxtPtr ctxt;
9999 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010000 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010001
10002 if (input == NULL)
10003 return(NULL);
10004
10005 ctxt = xmlNewParserCtxt();
10006 if (ctxt == NULL) {
10007 return(NULL);
10008 }
10009
10010 /*
10011 * Set-up the SAX context
10012 */
10013 if (sax != NULL) {
10014 if (ctxt->sax != NULL)
10015 xmlFree(ctxt->sax);
10016 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010017 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010018 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010019 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010020
10021 /*
10022 * generate a parser input from the I/O handler
10023 */
10024
Daniel Veillard43caefb2003-12-07 19:32:22 +000010025 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010026 if (pinput == NULL) {
10027 if (sax != NULL) ctxt->sax = NULL;
10028 xmlFreeParserCtxt(ctxt);
10029 return(NULL);
10030 }
10031
10032 /*
10033 * plug some encoding conversion routines here.
10034 */
10035 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010036 if (enc != XML_CHAR_ENCODING_NONE) {
10037 xmlSwitchEncoding(ctxt, enc);
10038 }
Owen Taylor3473f882001-02-23 17:55:21 +000010039
10040 pinput->filename = NULL;
10041 pinput->line = 1;
10042 pinput->col = 1;
10043 pinput->base = ctxt->input->cur;
10044 pinput->cur = ctxt->input->cur;
10045 pinput->free = NULL;
10046
10047 /*
10048 * let's parse that entity knowing it's an external subset.
10049 */
10050 ctxt->inSubset = 2;
10051 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10052 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10053 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010054
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010055 if ((enc == XML_CHAR_ENCODING_NONE) &&
10056 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010057 /*
10058 * Get the 4 first bytes and decode the charset
10059 * if enc != XML_CHAR_ENCODING_NONE
10060 * plug some encoding conversion routines.
10061 */
10062 start[0] = RAW;
10063 start[1] = NXT(1);
10064 start[2] = NXT(2);
10065 start[3] = NXT(3);
10066 enc = xmlDetectCharEncoding(start, 4);
10067 if (enc != XML_CHAR_ENCODING_NONE) {
10068 xmlSwitchEncoding(ctxt, enc);
10069 }
10070 }
10071
Owen Taylor3473f882001-02-23 17:55:21 +000010072 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10073
10074 if (ctxt->myDoc != NULL) {
10075 if (ctxt->wellFormed) {
10076 ret = ctxt->myDoc->extSubset;
10077 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010078 if (ret != NULL) {
10079 xmlNodePtr tmp;
10080
10081 ret->doc = NULL;
10082 tmp = ret->children;
10083 while (tmp != NULL) {
10084 tmp->doc = NULL;
10085 tmp = tmp->next;
10086 }
10087 }
Owen Taylor3473f882001-02-23 17:55:21 +000010088 } else {
10089 ret = NULL;
10090 }
10091 xmlFreeDoc(ctxt->myDoc);
10092 ctxt->myDoc = NULL;
10093 }
10094 if (sax != NULL) ctxt->sax = NULL;
10095 xmlFreeParserCtxt(ctxt);
10096
10097 return(ret);
10098}
10099
10100/**
10101 * xmlSAXParseDTD:
10102 * @sax: the SAX handler block
10103 * @ExternalID: a NAME* containing the External ID of the DTD
10104 * @SystemID: a NAME* containing the URL to the DTD
10105 *
10106 * Load and parse an external subset.
10107 *
10108 * Returns the resulting xmlDtdPtr or NULL in case of error.
10109 */
10110
10111xmlDtdPtr
10112xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10113 const xmlChar *SystemID) {
10114 xmlDtdPtr ret = NULL;
10115 xmlParserCtxtPtr ctxt;
10116 xmlParserInputPtr input = NULL;
10117 xmlCharEncoding enc;
10118
10119 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10120
10121 ctxt = xmlNewParserCtxt();
10122 if (ctxt == NULL) {
10123 return(NULL);
10124 }
10125
10126 /*
10127 * Set-up the SAX context
10128 */
10129 if (sax != NULL) {
10130 if (ctxt->sax != NULL)
10131 xmlFree(ctxt->sax);
10132 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010133 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010134 }
10135
10136 /*
10137 * Ask the Entity resolver to load the damn thing
10138 */
10139
10140 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010141 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010142 if (input == NULL) {
10143 if (sax != NULL) ctxt->sax = NULL;
10144 xmlFreeParserCtxt(ctxt);
10145 return(NULL);
10146 }
10147
10148 /*
10149 * plug some encoding conversion routines here.
10150 */
10151 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010152 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10153 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10154 xmlSwitchEncoding(ctxt, enc);
10155 }
Owen Taylor3473f882001-02-23 17:55:21 +000010156
10157 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010158 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010159 input->line = 1;
10160 input->col = 1;
10161 input->base = ctxt->input->cur;
10162 input->cur = ctxt->input->cur;
10163 input->free = NULL;
10164
10165 /*
10166 * let's parse that entity knowing it's an external subset.
10167 */
10168 ctxt->inSubset = 2;
10169 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10170 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10171 ExternalID, SystemID);
10172 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10173
10174 if (ctxt->myDoc != NULL) {
10175 if (ctxt->wellFormed) {
10176 ret = ctxt->myDoc->extSubset;
10177 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010178 if (ret != NULL) {
10179 xmlNodePtr tmp;
10180
10181 ret->doc = NULL;
10182 tmp = ret->children;
10183 while (tmp != NULL) {
10184 tmp->doc = NULL;
10185 tmp = tmp->next;
10186 }
10187 }
Owen Taylor3473f882001-02-23 17:55:21 +000010188 } else {
10189 ret = NULL;
10190 }
10191 xmlFreeDoc(ctxt->myDoc);
10192 ctxt->myDoc = NULL;
10193 }
10194 if (sax != NULL) ctxt->sax = NULL;
10195 xmlFreeParserCtxt(ctxt);
10196
10197 return(ret);
10198}
10199
Daniel Veillard4432df22003-09-28 18:58:27 +000010200
Owen Taylor3473f882001-02-23 17:55:21 +000010201/**
10202 * xmlParseDTD:
10203 * @ExternalID: a NAME* containing the External ID of the DTD
10204 * @SystemID: a NAME* containing the URL to the DTD
10205 *
10206 * Load and parse an external subset.
10207 *
10208 * Returns the resulting xmlDtdPtr or NULL in case of error.
10209 */
10210
10211xmlDtdPtr
10212xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10213 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10214}
Daniel Veillard4432df22003-09-28 18:58:27 +000010215#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010216
10217/************************************************************************
10218 * *
10219 * Front ends when parsing an Entity *
10220 * *
10221 ************************************************************************/
10222
10223/**
Owen Taylor3473f882001-02-23 17:55:21 +000010224 * xmlParseCtxtExternalEntity:
10225 * @ctx: the existing parsing context
10226 * @URL: the URL for the entity to load
10227 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010228 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010229 *
10230 * Parse an external general entity within an existing parsing context
10231 * An external general parsed entity is well-formed if it matches the
10232 * production labeled extParsedEnt.
10233 *
10234 * [78] extParsedEnt ::= TextDecl? content
10235 *
10236 * Returns 0 if the entity is well formed, -1 in case of args problem and
10237 * the parser error code otherwise
10238 */
10239
10240int
10241xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010242 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010243 xmlParserCtxtPtr ctxt;
10244 xmlDocPtr newDoc;
10245 xmlSAXHandlerPtr oldsax = NULL;
10246 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010247 xmlChar start[4];
10248 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010249
10250 if (ctx->depth > 40) {
10251 return(XML_ERR_ENTITY_LOOP);
10252 }
10253
Daniel Veillardcda96922001-08-21 10:56:31 +000010254 if (lst != NULL)
10255 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010256 if ((URL == NULL) && (ID == NULL))
10257 return(-1);
10258 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10259 return(-1);
10260
10261
10262 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10263 if (ctxt == NULL) return(-1);
10264 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010265 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010266 oldsax = ctxt->sax;
10267 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010268 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 newDoc = xmlNewDoc(BAD_CAST "1.0");
10270 if (newDoc == NULL) {
10271 xmlFreeParserCtxt(ctxt);
10272 return(-1);
10273 }
10274 if (ctx->myDoc != NULL) {
10275 newDoc->intSubset = ctx->myDoc->intSubset;
10276 newDoc->extSubset = ctx->myDoc->extSubset;
10277 }
10278 if (ctx->myDoc->URL != NULL) {
10279 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10280 }
10281 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10282 if (newDoc->children == NULL) {
10283 ctxt->sax = oldsax;
10284 xmlFreeParserCtxt(ctxt);
10285 newDoc->intSubset = NULL;
10286 newDoc->extSubset = NULL;
10287 xmlFreeDoc(newDoc);
10288 return(-1);
10289 }
10290 nodePush(ctxt, newDoc->children);
10291 if (ctx->myDoc == NULL) {
10292 ctxt->myDoc = newDoc;
10293 } else {
10294 ctxt->myDoc = ctx->myDoc;
10295 newDoc->children->doc = ctx->myDoc;
10296 }
10297
Daniel Veillard87a764e2001-06-20 17:41:10 +000010298 /*
10299 * Get the 4 first bytes and decode the charset
10300 * if enc != XML_CHAR_ENCODING_NONE
10301 * plug some encoding conversion routines.
10302 */
10303 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010304 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10305 start[0] = RAW;
10306 start[1] = NXT(1);
10307 start[2] = NXT(2);
10308 start[3] = NXT(3);
10309 enc = xmlDetectCharEncoding(start, 4);
10310 if (enc != XML_CHAR_ENCODING_NONE) {
10311 xmlSwitchEncoding(ctxt, enc);
10312 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010313 }
10314
Owen Taylor3473f882001-02-23 17:55:21 +000010315 /*
10316 * Parse a possible text declaration first
10317 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010318 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010319 xmlParseTextDecl(ctxt);
10320 }
10321
10322 /*
10323 * Doing validity checking on chunk doesn't make sense
10324 */
10325 ctxt->instate = XML_PARSER_CONTENT;
10326 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010327 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010328 ctxt->loadsubset = ctx->loadsubset;
10329 ctxt->depth = ctx->depth + 1;
10330 ctxt->replaceEntities = ctx->replaceEntities;
10331 if (ctxt->validate) {
10332 ctxt->vctxt.error = ctx->vctxt.error;
10333 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010334 } else {
10335 ctxt->vctxt.error = NULL;
10336 ctxt->vctxt.warning = NULL;
10337 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010338 ctxt->vctxt.nodeTab = NULL;
10339 ctxt->vctxt.nodeNr = 0;
10340 ctxt->vctxt.nodeMax = 0;
10341 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010342 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10343 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010344 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10345 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10346 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010347 ctxt->dictNames = ctx->dictNames;
10348 ctxt->attsDefault = ctx->attsDefault;
10349 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010350
10351 xmlParseContent(ctxt);
10352
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010353 ctx->validate = ctxt->validate;
10354 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010355 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010356 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010357 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010358 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010359 }
10360 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010361 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010362 }
10363
10364 if (!ctxt->wellFormed) {
10365 if (ctxt->errNo == 0)
10366 ret = 1;
10367 else
10368 ret = ctxt->errNo;
10369 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010370 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010371 xmlNodePtr cur;
10372
10373 /*
10374 * Return the newly created nodeset after unlinking it from
10375 * they pseudo parent.
10376 */
10377 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010378 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010379 while (cur != NULL) {
10380 cur->parent = NULL;
10381 cur = cur->next;
10382 }
10383 newDoc->children->children = NULL;
10384 }
10385 ret = 0;
10386 }
10387 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010388 ctxt->dict = NULL;
10389 ctxt->attsDefault = NULL;
10390 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010391 xmlFreeParserCtxt(ctxt);
10392 newDoc->intSubset = NULL;
10393 newDoc->extSubset = NULL;
10394 xmlFreeDoc(newDoc);
10395
10396 return(ret);
10397}
10398
10399/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010400 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010401 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010402 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010403 * @sax: the SAX handler bloc (possibly NULL)
10404 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10405 * @depth: Used for loop detection, use 0
10406 * @URL: the URL for the entity to load
10407 * @ID: the System ID for the entity to load
10408 * @list: the return value for the set of parsed nodes
10409 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010410 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010411 *
10412 * Returns 0 if the entity is well formed, -1 in case of args problem and
10413 * the parser error code otherwise
10414 */
10415
Daniel Veillard7d515752003-09-26 19:12:37 +000010416static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010417xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10418 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010419 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010420 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010421 xmlParserCtxtPtr ctxt;
10422 xmlDocPtr newDoc;
10423 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010424 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010425 xmlChar start[4];
10426 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010427
10428 if (depth > 40) {
10429 return(XML_ERR_ENTITY_LOOP);
10430 }
10431
10432
10433
10434 if (list != NULL)
10435 *list = NULL;
10436 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010437 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010438 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010439 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010440
10441
10442 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010443 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010444 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010445 if (oldctxt != NULL) {
10446 ctxt->_private = oldctxt->_private;
10447 ctxt->loadsubset = oldctxt->loadsubset;
10448 ctxt->validate = oldctxt->validate;
10449 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010450 ctxt->record_info = oldctxt->record_info;
10451 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10452 ctxt->node_seq.length = oldctxt->node_seq.length;
10453 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010454 } else {
10455 /*
10456 * Doing validity checking on chunk without context
10457 * doesn't make sense
10458 */
10459 ctxt->_private = NULL;
10460 ctxt->validate = 0;
10461 ctxt->external = 2;
10462 ctxt->loadsubset = 0;
10463 }
Owen Taylor3473f882001-02-23 17:55:21 +000010464 if (sax != NULL) {
10465 oldsax = ctxt->sax;
10466 ctxt->sax = sax;
10467 if (user_data != NULL)
10468 ctxt->userData = user_data;
10469 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010470 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010471 newDoc = xmlNewDoc(BAD_CAST "1.0");
10472 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010473 ctxt->node_seq.maximum = 0;
10474 ctxt->node_seq.length = 0;
10475 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010476 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010477 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010478 }
10479 if (doc != NULL) {
10480 newDoc->intSubset = doc->intSubset;
10481 newDoc->extSubset = doc->extSubset;
10482 }
10483 if (doc->URL != NULL) {
10484 newDoc->URL = xmlStrdup(doc->URL);
10485 }
10486 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10487 if (newDoc->children == NULL) {
10488 if (sax != NULL)
10489 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010490 ctxt->node_seq.maximum = 0;
10491 ctxt->node_seq.length = 0;
10492 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010493 xmlFreeParserCtxt(ctxt);
10494 newDoc->intSubset = NULL;
10495 newDoc->extSubset = NULL;
10496 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010497 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010498 }
10499 nodePush(ctxt, newDoc->children);
10500 if (doc == NULL) {
10501 ctxt->myDoc = newDoc;
10502 } else {
10503 ctxt->myDoc = doc;
10504 newDoc->children->doc = doc;
10505 }
10506
Daniel Veillard87a764e2001-06-20 17:41:10 +000010507 /*
10508 * Get the 4 first bytes and decode the charset
10509 * if enc != XML_CHAR_ENCODING_NONE
10510 * plug some encoding conversion routines.
10511 */
10512 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010513 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10514 start[0] = RAW;
10515 start[1] = NXT(1);
10516 start[2] = NXT(2);
10517 start[3] = NXT(3);
10518 enc = xmlDetectCharEncoding(start, 4);
10519 if (enc != XML_CHAR_ENCODING_NONE) {
10520 xmlSwitchEncoding(ctxt, enc);
10521 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010522 }
10523
Owen Taylor3473f882001-02-23 17:55:21 +000010524 /*
10525 * Parse a possible text declaration first
10526 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010527 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010528 xmlParseTextDecl(ctxt);
10529 }
10530
Owen Taylor3473f882001-02-23 17:55:21 +000010531 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010532 ctxt->depth = depth;
10533
10534 xmlParseContent(ctxt);
10535
Daniel Veillard561b7f82002-03-20 21:55:57 +000010536 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010537 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010538 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010539 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010540 }
10541 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010542 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010543 }
10544
10545 if (!ctxt->wellFormed) {
10546 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010547 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010548 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010549 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010550 } else {
10551 if (list != NULL) {
10552 xmlNodePtr cur;
10553
10554 /*
10555 * Return the newly created nodeset after unlinking it from
10556 * they pseudo parent.
10557 */
10558 cur = newDoc->children->children;
10559 *list = cur;
10560 while (cur != NULL) {
10561 cur->parent = NULL;
10562 cur = cur->next;
10563 }
10564 newDoc->children->children = NULL;
10565 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010566 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010567 }
10568 if (sax != NULL)
10569 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010570 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10571 oldctxt->node_seq.length = ctxt->node_seq.length;
10572 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010573 ctxt->node_seq.maximum = 0;
10574 ctxt->node_seq.length = 0;
10575 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlFreeParserCtxt(ctxt);
10577 newDoc->intSubset = NULL;
10578 newDoc->extSubset = NULL;
10579 xmlFreeDoc(newDoc);
10580
10581 return(ret);
10582}
10583
Daniel Veillard81273902003-09-30 00:43:48 +000010584#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010585/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010586 * xmlParseExternalEntity:
10587 * @doc: the document the chunk pertains to
10588 * @sax: the SAX handler bloc (possibly NULL)
10589 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10590 * @depth: Used for loop detection, use 0
10591 * @URL: the URL for the entity to load
10592 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010593 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010594 *
10595 * Parse an external general entity
10596 * An external general parsed entity is well-formed if it matches the
10597 * production labeled extParsedEnt.
10598 *
10599 * [78] extParsedEnt ::= TextDecl? content
10600 *
10601 * Returns 0 if the entity is well formed, -1 in case of args problem and
10602 * the parser error code otherwise
10603 */
10604
10605int
10606xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010607 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010608 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010609 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010610}
10611
10612/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010613 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010614 * @doc: the document the chunk pertains to
10615 * @sax: the SAX handler bloc (possibly NULL)
10616 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10617 * @depth: Used for loop detection, use 0
10618 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010619 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010620 *
10621 * Parse a well-balanced chunk of an XML document
10622 * called by the parser
10623 * The allowed sequence for the Well Balanced Chunk is the one defined by
10624 * the content production in the XML grammar:
10625 *
10626 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10627 *
10628 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10629 * the parser error code otherwise
10630 */
10631
10632int
10633xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010634 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010635 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10636 depth, string, lst, 0 );
10637}
Daniel Veillard81273902003-09-30 00:43:48 +000010638#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010639
10640/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010641 * xmlParseBalancedChunkMemoryInternal:
10642 * @oldctxt: the existing parsing context
10643 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10644 * @user_data: the user data field for the parser context
10645 * @lst: the return value for the set of parsed nodes
10646 *
10647 *
10648 * Parse a well-balanced chunk of an XML document
10649 * called by the parser
10650 * The allowed sequence for the Well Balanced Chunk is the one defined by
10651 * the content production in the XML grammar:
10652 *
10653 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10654 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010655 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10656 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010657 *
10658 * In case recover is set to 1, the nodelist will not be empty even if
10659 * the parsed chunk is not well balanced.
10660 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010661static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010662xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10663 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10664 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010665 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010666 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010667 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010668 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010669 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010670
10671 if (oldctxt->depth > 40) {
10672 return(XML_ERR_ENTITY_LOOP);
10673 }
10674
10675
10676 if (lst != NULL)
10677 *lst = NULL;
10678 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010679 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010680
10681 size = xmlStrlen(string);
10682
10683 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010684 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010685 if (user_data != NULL)
10686 ctxt->userData = user_data;
10687 else
10688 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010689 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10690 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010691 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10692 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10693 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010694
10695 oldsax = ctxt->sax;
10696 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010697 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010698 ctxt->replaceEntities = oldctxt->replaceEntities;
10699 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010700
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010701 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010702 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010703 newDoc = xmlNewDoc(BAD_CAST "1.0");
10704 if (newDoc == NULL) {
10705 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010706 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010707 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010708 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010709 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010710 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010711 } else {
10712 ctxt->myDoc = oldctxt->myDoc;
10713 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010714 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010715 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010716 BAD_CAST "pseudoroot", NULL);
10717 if (ctxt->myDoc->children == NULL) {
10718 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010719 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010720 xmlFreeParserCtxt(ctxt);
10721 if (newDoc != NULL)
10722 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010723 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010724 }
10725 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010726 ctxt->instate = XML_PARSER_CONTENT;
10727 ctxt->depth = oldctxt->depth + 1;
10728
Daniel Veillard328f48c2002-11-15 15:24:34 +000010729 ctxt->validate = 0;
10730 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010731 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10732 /*
10733 * ID/IDREF registration will be done in xmlValidateElement below
10734 */
10735 ctxt->loadsubset |= XML_SKIP_IDS;
10736 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010737 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010738 ctxt->attsDefault = oldctxt->attsDefault;
10739 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010740
Daniel Veillard68e9e742002-11-16 15:35:11 +000010741 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010742 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010743 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010744 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010745 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010746 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010747 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010748 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010749 }
10750
10751 if (!ctxt->wellFormed) {
10752 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010753 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010754 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010755 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010756 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010757 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010758 }
10759
William M. Brack7b9154b2003-09-27 19:23:50 +000010760 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010761 xmlNodePtr cur;
10762
10763 /*
10764 * Return the newly created nodeset after unlinking it from
10765 * they pseudo parent.
10766 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010767 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010768 *lst = cur;
10769 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010770#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010771 if (oldctxt->validate && oldctxt->wellFormed &&
10772 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10773 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10774 oldctxt->myDoc, cur);
10775 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010776#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010777 cur->parent = NULL;
10778 cur = cur->next;
10779 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010780 ctxt->myDoc->children->children = NULL;
10781 }
10782 if (ctxt->myDoc != NULL) {
10783 xmlFreeNode(ctxt->myDoc->children);
10784 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010785 }
10786
10787 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010788 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010789 ctxt->attsDefault = NULL;
10790 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010791 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010792 if (newDoc != NULL)
10793 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010794
10795 return(ret);
10796}
10797
Daniel Veillard81273902003-09-30 00:43:48 +000010798#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010799/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010800 * xmlParseBalancedChunkMemoryRecover:
10801 * @doc: the document the chunk pertains to
10802 * @sax: the SAX handler bloc (possibly NULL)
10803 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10804 * @depth: Used for loop detection, use 0
10805 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10806 * @lst: the return value for the set of parsed nodes
10807 * @recover: return nodes even if the data is broken (use 0)
10808 *
10809 *
10810 * Parse a well-balanced chunk of an XML document
10811 * called by the parser
10812 * The allowed sequence for the Well Balanced Chunk is the one defined by
10813 * the content production in the XML grammar:
10814 *
10815 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10816 *
10817 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10818 * the parser error code otherwise
10819 *
10820 * In case recover is set to 1, the nodelist will not be empty even if
10821 * the parsed chunk is not well balanced.
10822 */
10823int
10824xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10825 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10826 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010827 xmlParserCtxtPtr ctxt;
10828 xmlDocPtr newDoc;
10829 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010830 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010831 int size;
10832 int ret = 0;
10833
10834 if (depth > 40) {
10835 return(XML_ERR_ENTITY_LOOP);
10836 }
10837
10838
Daniel Veillardcda96922001-08-21 10:56:31 +000010839 if (lst != NULL)
10840 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010841 if (string == NULL)
10842 return(-1);
10843
10844 size = xmlStrlen(string);
10845
10846 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10847 if (ctxt == NULL) return(-1);
10848 ctxt->userData = ctxt;
10849 if (sax != NULL) {
10850 oldsax = ctxt->sax;
10851 ctxt->sax = sax;
10852 if (user_data != NULL)
10853 ctxt->userData = user_data;
10854 }
10855 newDoc = xmlNewDoc(BAD_CAST "1.0");
10856 if (newDoc == NULL) {
10857 xmlFreeParserCtxt(ctxt);
10858 return(-1);
10859 }
10860 if (doc != NULL) {
10861 newDoc->intSubset = doc->intSubset;
10862 newDoc->extSubset = doc->extSubset;
10863 }
10864 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10865 if (newDoc->children == NULL) {
10866 if (sax != NULL)
10867 ctxt->sax = oldsax;
10868 xmlFreeParserCtxt(ctxt);
10869 newDoc->intSubset = NULL;
10870 newDoc->extSubset = NULL;
10871 xmlFreeDoc(newDoc);
10872 return(-1);
10873 }
10874 nodePush(ctxt, newDoc->children);
10875 if (doc == NULL) {
10876 ctxt->myDoc = newDoc;
10877 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010878 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010879 newDoc->children->doc = doc;
10880 }
10881 ctxt->instate = XML_PARSER_CONTENT;
10882 ctxt->depth = depth;
10883
10884 /*
10885 * Doing validity checking on chunk doesn't make sense
10886 */
10887 ctxt->validate = 0;
10888 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010889 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010890
Daniel Veillardb39bc392002-10-26 19:29:51 +000010891 if ( doc != NULL ){
10892 content = doc->children;
10893 doc->children = NULL;
10894 xmlParseContent(ctxt);
10895 doc->children = content;
10896 }
10897 else {
10898 xmlParseContent(ctxt);
10899 }
Owen Taylor3473f882001-02-23 17:55:21 +000010900 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010901 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010902 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010903 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010904 }
10905 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010906 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010907 }
10908
10909 if (!ctxt->wellFormed) {
10910 if (ctxt->errNo == 0)
10911 ret = 1;
10912 else
10913 ret = ctxt->errNo;
10914 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010915 ret = 0;
10916 }
10917
10918 if (lst != NULL && (ret == 0 || recover == 1)) {
10919 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010920
10921 /*
10922 * Return the newly created nodeset after unlinking it from
10923 * they pseudo parent.
10924 */
10925 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010926 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010927 while (cur != NULL) {
10928 cur->parent = NULL;
10929 cur = cur->next;
10930 }
10931 newDoc->children->children = NULL;
10932 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010933
Owen Taylor3473f882001-02-23 17:55:21 +000010934 if (sax != NULL)
10935 ctxt->sax = oldsax;
10936 xmlFreeParserCtxt(ctxt);
10937 newDoc->intSubset = NULL;
10938 newDoc->extSubset = NULL;
10939 xmlFreeDoc(newDoc);
10940
10941 return(ret);
10942}
10943
10944/**
10945 * xmlSAXParseEntity:
10946 * @sax: the SAX handler block
10947 * @filename: the filename
10948 *
10949 * parse an XML external entity out of context and build a tree.
10950 * It use the given SAX function block to handle the parsing callback.
10951 * If sax is NULL, fallback to the default DOM tree building routines.
10952 *
10953 * [78] extParsedEnt ::= TextDecl? content
10954 *
10955 * This correspond to a "Well Balanced" chunk
10956 *
10957 * Returns the resulting document tree
10958 */
10959
10960xmlDocPtr
10961xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10962 xmlDocPtr ret;
10963 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010964
10965 ctxt = xmlCreateFileParserCtxt(filename);
10966 if (ctxt == NULL) {
10967 return(NULL);
10968 }
10969 if (sax != NULL) {
10970 if (ctxt->sax != NULL)
10971 xmlFree(ctxt->sax);
10972 ctxt->sax = sax;
10973 ctxt->userData = NULL;
10974 }
10975
Owen Taylor3473f882001-02-23 17:55:21 +000010976 xmlParseExtParsedEnt(ctxt);
10977
10978 if (ctxt->wellFormed)
10979 ret = ctxt->myDoc;
10980 else {
10981 ret = NULL;
10982 xmlFreeDoc(ctxt->myDoc);
10983 ctxt->myDoc = NULL;
10984 }
10985 if (sax != NULL)
10986 ctxt->sax = NULL;
10987 xmlFreeParserCtxt(ctxt);
10988
10989 return(ret);
10990}
10991
10992/**
10993 * xmlParseEntity:
10994 * @filename: the filename
10995 *
10996 * parse an XML external entity out of context and build a tree.
10997 *
10998 * [78] extParsedEnt ::= TextDecl? content
10999 *
11000 * This correspond to a "Well Balanced" chunk
11001 *
11002 * Returns the resulting document tree
11003 */
11004
11005xmlDocPtr
11006xmlParseEntity(const char *filename) {
11007 return(xmlSAXParseEntity(NULL, filename));
11008}
Daniel Veillard81273902003-09-30 00:43:48 +000011009#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011010
11011/**
11012 * xmlCreateEntityParserCtxt:
11013 * @URL: the entity URL
11014 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011015 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011016 *
11017 * Create a parser context for an external entity
11018 * Automatic support for ZLIB/Compress compressed document is provided
11019 * by default if found at compile-time.
11020 *
11021 * Returns the new parser context or NULL
11022 */
11023xmlParserCtxtPtr
11024xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11025 const xmlChar *base) {
11026 xmlParserCtxtPtr ctxt;
11027 xmlParserInputPtr inputStream;
11028 char *directory = NULL;
11029 xmlChar *uri;
11030
11031 ctxt = xmlNewParserCtxt();
11032 if (ctxt == NULL) {
11033 return(NULL);
11034 }
11035
11036 uri = xmlBuildURI(URL, base);
11037
11038 if (uri == NULL) {
11039 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11040 if (inputStream == NULL) {
11041 xmlFreeParserCtxt(ctxt);
11042 return(NULL);
11043 }
11044
11045 inputPush(ctxt, inputStream);
11046
11047 if ((ctxt->directory == NULL) && (directory == NULL))
11048 directory = xmlParserGetDirectory((char *)URL);
11049 if ((ctxt->directory == NULL) && (directory != NULL))
11050 ctxt->directory = directory;
11051 } else {
11052 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11053 if (inputStream == NULL) {
11054 xmlFree(uri);
11055 xmlFreeParserCtxt(ctxt);
11056 return(NULL);
11057 }
11058
11059 inputPush(ctxt, inputStream);
11060
11061 if ((ctxt->directory == NULL) && (directory == NULL))
11062 directory = xmlParserGetDirectory((char *)uri);
11063 if ((ctxt->directory == NULL) && (directory != NULL))
11064 ctxt->directory = directory;
11065 xmlFree(uri);
11066 }
Owen Taylor3473f882001-02-23 17:55:21 +000011067 return(ctxt);
11068}
11069
11070/************************************************************************
11071 * *
11072 * Front ends when parsing from a file *
11073 * *
11074 ************************************************************************/
11075
11076/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011077 * xmlCreateURLParserCtxt:
11078 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011079 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011080 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011081 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011082 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011083 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011084 *
11085 * Returns the new parser context or NULL
11086 */
11087xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011088xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011089{
11090 xmlParserCtxtPtr ctxt;
11091 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011092 char *directory = NULL;
11093
Owen Taylor3473f882001-02-23 17:55:21 +000011094 ctxt = xmlNewParserCtxt();
11095 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011096 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011097 return(NULL);
11098 }
11099
Daniel Veillard61b93382003-11-03 14:28:31 +000011100 if (options != 0)
11101 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011102
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011103 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011104 if (inputStream == NULL) {
11105 xmlFreeParserCtxt(ctxt);
11106 return(NULL);
11107 }
11108
Owen Taylor3473f882001-02-23 17:55:21 +000011109 inputPush(ctxt, inputStream);
11110 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011111 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011112 if ((ctxt->directory == NULL) && (directory != NULL))
11113 ctxt->directory = directory;
11114
11115 return(ctxt);
11116}
11117
Daniel Veillard61b93382003-11-03 14:28:31 +000011118/**
11119 * xmlCreateFileParserCtxt:
11120 * @filename: the filename
11121 *
11122 * Create a parser context for a file content.
11123 * Automatic support for ZLIB/Compress compressed document is provided
11124 * by default if found at compile-time.
11125 *
11126 * Returns the new parser context or NULL
11127 */
11128xmlParserCtxtPtr
11129xmlCreateFileParserCtxt(const char *filename)
11130{
11131 return(xmlCreateURLParserCtxt(filename, 0));
11132}
11133
Daniel Veillard81273902003-09-30 00:43:48 +000011134#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011135/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011136 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011137 * @sax: the SAX handler block
11138 * @filename: the filename
11139 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11140 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011141 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011142 *
11143 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11144 * compressed document is provided by default if found at compile-time.
11145 * It use the given SAX function block to handle the parsing callback.
11146 * If sax is NULL, fallback to the default DOM tree building routines.
11147 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011148 * User data (void *) is stored within the parser context in the
11149 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011150 *
Owen Taylor3473f882001-02-23 17:55:21 +000011151 * Returns the resulting document tree
11152 */
11153
11154xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011155xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11156 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011157 xmlDocPtr ret;
11158 xmlParserCtxtPtr ctxt;
11159 char *directory = NULL;
11160
Daniel Veillard635ef722001-10-29 11:48:19 +000011161 xmlInitParser();
11162
Owen Taylor3473f882001-02-23 17:55:21 +000011163 ctxt = xmlCreateFileParserCtxt(filename);
11164 if (ctxt == NULL) {
11165 return(NULL);
11166 }
11167 if (sax != NULL) {
11168 if (ctxt->sax != NULL)
11169 xmlFree(ctxt->sax);
11170 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011172 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011173 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011174 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011175 }
Owen Taylor3473f882001-02-23 17:55:21 +000011176
11177 if ((ctxt->directory == NULL) && (directory == NULL))
11178 directory = xmlParserGetDirectory(filename);
11179 if ((ctxt->directory == NULL) && (directory != NULL))
11180 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11181
Daniel Veillarddad3f682002-11-17 16:47:27 +000011182 ctxt->recovery = recovery;
11183
Owen Taylor3473f882001-02-23 17:55:21 +000011184 xmlParseDocument(ctxt);
11185
William M. Brackc07329e2003-09-08 01:57:30 +000011186 if ((ctxt->wellFormed) || recovery) {
11187 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011188 if (ret != NULL) {
11189 if (ctxt->input->buf->compressed > 0)
11190 ret->compression = 9;
11191 else
11192 ret->compression = ctxt->input->buf->compressed;
11193 }
William M. Brackc07329e2003-09-08 01:57:30 +000011194 }
Owen Taylor3473f882001-02-23 17:55:21 +000011195 else {
11196 ret = NULL;
11197 xmlFreeDoc(ctxt->myDoc);
11198 ctxt->myDoc = NULL;
11199 }
11200 if (sax != NULL)
11201 ctxt->sax = NULL;
11202 xmlFreeParserCtxt(ctxt);
11203
11204 return(ret);
11205}
11206
11207/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011208 * xmlSAXParseFile:
11209 * @sax: the SAX handler block
11210 * @filename: the filename
11211 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11212 * documents
11213 *
11214 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11215 * compressed document is provided by default if found at compile-time.
11216 * It use the given SAX function block to handle the parsing callback.
11217 * If sax is NULL, fallback to the default DOM tree building routines.
11218 *
11219 * Returns the resulting document tree
11220 */
11221
11222xmlDocPtr
11223xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11224 int recovery) {
11225 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11226}
11227
11228/**
Owen Taylor3473f882001-02-23 17:55:21 +000011229 * xmlRecoverDoc:
11230 * @cur: a pointer to an array of xmlChar
11231 *
11232 * parse an XML in-memory document and build a tree.
11233 * In the case the document is not Well Formed, a tree is built anyway
11234 *
11235 * Returns the resulting document tree
11236 */
11237
11238xmlDocPtr
11239xmlRecoverDoc(xmlChar *cur) {
11240 return(xmlSAXParseDoc(NULL, cur, 1));
11241}
11242
11243/**
11244 * xmlParseFile:
11245 * @filename: the filename
11246 *
11247 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11248 * compressed document is provided by default if found at compile-time.
11249 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011250 * Returns the resulting document tree if the file was wellformed,
11251 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011252 */
11253
11254xmlDocPtr
11255xmlParseFile(const char *filename) {
11256 return(xmlSAXParseFile(NULL, filename, 0));
11257}
11258
11259/**
11260 * xmlRecoverFile:
11261 * @filename: the filename
11262 *
11263 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11264 * compressed document is provided by default if found at compile-time.
11265 * In the case the document is not Well Formed, a tree is built anyway
11266 *
11267 * Returns the resulting document tree
11268 */
11269
11270xmlDocPtr
11271xmlRecoverFile(const char *filename) {
11272 return(xmlSAXParseFile(NULL, filename, 1));
11273}
11274
11275
11276/**
11277 * xmlSetupParserForBuffer:
11278 * @ctxt: an XML parser context
11279 * @buffer: a xmlChar * buffer
11280 * @filename: a file name
11281 *
11282 * Setup the parser context to parse a new buffer; Clears any prior
11283 * contents from the parser context. The buffer parameter must not be
11284 * NULL, but the filename parameter can be
11285 */
11286void
11287xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11288 const char* filename)
11289{
11290 xmlParserInputPtr input;
11291
11292 input = xmlNewInputStream(ctxt);
11293 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011294 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011295 xmlFree(ctxt);
11296 return;
11297 }
11298
11299 xmlClearParserCtxt(ctxt);
11300 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011301 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011302 input->base = buffer;
11303 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011304 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011305 inputPush(ctxt, input);
11306}
11307
11308/**
11309 * xmlSAXUserParseFile:
11310 * @sax: a SAX handler
11311 * @user_data: The user data returned on SAX callbacks
11312 * @filename: a file name
11313 *
11314 * parse an XML file and call the given SAX handler routines.
11315 * Automatic support for ZLIB/Compress compressed document is provided
11316 *
11317 * Returns 0 in case of success or a error number otherwise
11318 */
11319int
11320xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11321 const char *filename) {
11322 int ret = 0;
11323 xmlParserCtxtPtr ctxt;
11324
11325 ctxt = xmlCreateFileParserCtxt(filename);
11326 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011327#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011328 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011329#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011330 xmlFree(ctxt->sax);
11331 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011332 xmlDetectSAX2(ctxt);
11333
Owen Taylor3473f882001-02-23 17:55:21 +000011334 if (user_data != NULL)
11335 ctxt->userData = user_data;
11336
11337 xmlParseDocument(ctxt);
11338
11339 if (ctxt->wellFormed)
11340 ret = 0;
11341 else {
11342 if (ctxt->errNo != 0)
11343 ret = ctxt->errNo;
11344 else
11345 ret = -1;
11346 }
11347 if (sax != NULL)
11348 ctxt->sax = NULL;
11349 xmlFreeParserCtxt(ctxt);
11350
11351 return ret;
11352}
Daniel Veillard81273902003-09-30 00:43:48 +000011353#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011354
11355/************************************************************************
11356 * *
11357 * Front ends when parsing from memory *
11358 * *
11359 ************************************************************************/
11360
11361/**
11362 * xmlCreateMemoryParserCtxt:
11363 * @buffer: a pointer to a char array
11364 * @size: the size of the array
11365 *
11366 * Create a parser context for an XML in-memory document.
11367 *
11368 * Returns the new parser context or NULL
11369 */
11370xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011371xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011372 xmlParserCtxtPtr ctxt;
11373 xmlParserInputPtr input;
11374 xmlParserInputBufferPtr buf;
11375
11376 if (buffer == NULL)
11377 return(NULL);
11378 if (size <= 0)
11379 return(NULL);
11380
11381 ctxt = xmlNewParserCtxt();
11382 if (ctxt == NULL)
11383 return(NULL);
11384
Daniel Veillard53350552003-09-18 13:35:51 +000011385 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011386 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011387 if (buf == NULL) {
11388 xmlFreeParserCtxt(ctxt);
11389 return(NULL);
11390 }
Owen Taylor3473f882001-02-23 17:55:21 +000011391
11392 input = xmlNewInputStream(ctxt);
11393 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011394 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011395 xmlFreeParserCtxt(ctxt);
11396 return(NULL);
11397 }
11398
11399 input->filename = NULL;
11400 input->buf = buf;
11401 input->base = input->buf->buffer->content;
11402 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011403 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011404
11405 inputPush(ctxt, input);
11406 return(ctxt);
11407}
11408
Daniel Veillard81273902003-09-30 00:43:48 +000011409#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011410/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011411 * xmlSAXParseMemoryWithData:
11412 * @sax: the SAX handler block
11413 * @buffer: an pointer to a char array
11414 * @size: the size of the array
11415 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11416 * documents
11417 * @data: the userdata
11418 *
11419 * parse an XML in-memory block and use the given SAX function block
11420 * to handle the parsing callback. If sax is NULL, fallback to the default
11421 * DOM tree building routines.
11422 *
11423 * User data (void *) is stored within the parser context in the
11424 * context's _private member, so it is available nearly everywhere in libxml
11425 *
11426 * Returns the resulting document tree
11427 */
11428
11429xmlDocPtr
11430xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11431 int size, int recovery, void *data) {
11432 xmlDocPtr ret;
11433 xmlParserCtxtPtr ctxt;
11434
11435 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11436 if (ctxt == NULL) return(NULL);
11437 if (sax != NULL) {
11438 if (ctxt->sax != NULL)
11439 xmlFree(ctxt->sax);
11440 ctxt->sax = sax;
11441 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011442 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011443 if (data!=NULL) {
11444 ctxt->_private=data;
11445 }
11446
Daniel Veillardadba5f12003-04-04 16:09:01 +000011447 ctxt->recovery = recovery;
11448
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011449 xmlParseDocument(ctxt);
11450
11451 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11452 else {
11453 ret = NULL;
11454 xmlFreeDoc(ctxt->myDoc);
11455 ctxt->myDoc = NULL;
11456 }
11457 if (sax != NULL)
11458 ctxt->sax = NULL;
11459 xmlFreeParserCtxt(ctxt);
11460
11461 return(ret);
11462}
11463
11464/**
Owen Taylor3473f882001-02-23 17:55:21 +000011465 * xmlSAXParseMemory:
11466 * @sax: the SAX handler block
11467 * @buffer: an pointer to a char array
11468 * @size: the size of the array
11469 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11470 * documents
11471 *
11472 * parse an XML in-memory block and use the given SAX function block
11473 * to handle the parsing callback. If sax is NULL, fallback to the default
11474 * DOM tree building routines.
11475 *
11476 * Returns the resulting document tree
11477 */
11478xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011479xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11480 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011481 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011482}
11483
11484/**
11485 * xmlParseMemory:
11486 * @buffer: an pointer to a char array
11487 * @size: the size of the array
11488 *
11489 * parse an XML in-memory block and build a tree.
11490 *
11491 * Returns the resulting document tree
11492 */
11493
Daniel Veillard50822cb2001-07-26 20:05:51 +000011494xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011495 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11496}
11497
11498/**
11499 * xmlRecoverMemory:
11500 * @buffer: an pointer to a char array
11501 * @size: the size of the array
11502 *
11503 * parse an XML in-memory block and build a tree.
11504 * In the case the document is not Well Formed, a tree is built anyway
11505 *
11506 * Returns the resulting document tree
11507 */
11508
Daniel Veillard50822cb2001-07-26 20:05:51 +000011509xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011510 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11511}
11512
11513/**
11514 * xmlSAXUserParseMemory:
11515 * @sax: a SAX handler
11516 * @user_data: The user data returned on SAX callbacks
11517 * @buffer: an in-memory XML document input
11518 * @size: the length of the XML document in bytes
11519 *
11520 * A better SAX parsing routine.
11521 * parse an XML in-memory buffer and call the given SAX handler routines.
11522 *
11523 * Returns 0 in case of success or a error number otherwise
11524 */
11525int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011526 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011527 int ret = 0;
11528 xmlParserCtxtPtr ctxt;
11529 xmlSAXHandlerPtr oldsax = NULL;
11530
Daniel Veillard9e923512002-08-14 08:48:52 +000011531 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011532 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11533 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011534 oldsax = ctxt->sax;
11535 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011537 if (user_data != NULL)
11538 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011539
11540 xmlParseDocument(ctxt);
11541
11542 if (ctxt->wellFormed)
11543 ret = 0;
11544 else {
11545 if (ctxt->errNo != 0)
11546 ret = ctxt->errNo;
11547 else
11548 ret = -1;
11549 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011550 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011551 xmlFreeParserCtxt(ctxt);
11552
11553 return ret;
11554}
Daniel Veillard81273902003-09-30 00:43:48 +000011555#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011556
11557/**
11558 * xmlCreateDocParserCtxt:
11559 * @cur: a pointer to an array of xmlChar
11560 *
11561 * Creates a parser context for an XML in-memory document.
11562 *
11563 * Returns the new parser context or NULL
11564 */
11565xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011566xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011567 int len;
11568
11569 if (cur == NULL)
11570 return(NULL);
11571 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011572 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011573}
11574
Daniel Veillard81273902003-09-30 00:43:48 +000011575#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011576/**
11577 * xmlSAXParseDoc:
11578 * @sax: the SAX handler block
11579 * @cur: a pointer to an array of xmlChar
11580 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11581 * documents
11582 *
11583 * parse an XML in-memory document and build a tree.
11584 * It use the given SAX function block to handle the parsing callback.
11585 * If sax is NULL, fallback to the default DOM tree building routines.
11586 *
11587 * Returns the resulting document tree
11588 */
11589
11590xmlDocPtr
11591xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11592 xmlDocPtr ret;
11593 xmlParserCtxtPtr ctxt;
11594
11595 if (cur == NULL) return(NULL);
11596
11597
11598 ctxt = xmlCreateDocParserCtxt(cur);
11599 if (ctxt == NULL) return(NULL);
11600 if (sax != NULL) {
11601 ctxt->sax = sax;
11602 ctxt->userData = NULL;
11603 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011604 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011605
11606 xmlParseDocument(ctxt);
11607 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11608 else {
11609 ret = NULL;
11610 xmlFreeDoc(ctxt->myDoc);
11611 ctxt->myDoc = NULL;
11612 }
11613 if (sax != NULL)
11614 ctxt->sax = NULL;
11615 xmlFreeParserCtxt(ctxt);
11616
11617 return(ret);
11618}
11619
11620/**
11621 * xmlParseDoc:
11622 * @cur: a pointer to an array of xmlChar
11623 *
11624 * parse an XML in-memory document and build a tree.
11625 *
11626 * Returns the resulting document tree
11627 */
11628
11629xmlDocPtr
11630xmlParseDoc(xmlChar *cur) {
11631 return(xmlSAXParseDoc(NULL, cur, 0));
11632}
Daniel Veillard81273902003-09-30 00:43:48 +000011633#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011634
Daniel Veillard81273902003-09-30 00:43:48 +000011635#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011636/************************************************************************
11637 * *
11638 * Specific function to keep track of entities references *
11639 * and used by the XSLT debugger *
11640 * *
11641 ************************************************************************/
11642
11643static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11644
11645/**
11646 * xmlAddEntityReference:
11647 * @ent : A valid entity
11648 * @firstNode : A valid first node for children of entity
11649 * @lastNode : A valid last node of children entity
11650 *
11651 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11652 */
11653static void
11654xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11655 xmlNodePtr lastNode)
11656{
11657 if (xmlEntityRefFunc != NULL) {
11658 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11659 }
11660}
11661
11662
11663/**
11664 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011665 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011666 *
11667 * Set the function to call call back when a xml reference has been made
11668 */
11669void
11670xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11671{
11672 xmlEntityRefFunc = func;
11673}
Daniel Veillard81273902003-09-30 00:43:48 +000011674#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011675
11676/************************************************************************
11677 * *
11678 * Miscellaneous *
11679 * *
11680 ************************************************************************/
11681
11682#ifdef LIBXML_XPATH_ENABLED
11683#include <libxml/xpath.h>
11684#endif
11685
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011686extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011687static int xmlParserInitialized = 0;
11688
11689/**
11690 * xmlInitParser:
11691 *
11692 * Initialization function for the XML parser.
11693 * This is not reentrant. Call once before processing in case of
11694 * use in multithreaded programs.
11695 */
11696
11697void
11698xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011699 if (xmlParserInitialized != 0)
11700 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011701
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011702 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11703 (xmlGenericError == NULL))
11704 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011705 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011706 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011707 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011708 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011709 xmlDefaultSAXHandlerInit();
11710 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011711#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011712 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011713#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011714#ifdef LIBXML_HTML_ENABLED
11715 htmlInitAutoClose();
11716 htmlDefaultSAXHandlerInit();
11717#endif
11718#ifdef LIBXML_XPATH_ENABLED
11719 xmlXPathInit();
11720#endif
11721 xmlParserInitialized = 1;
11722}
11723
11724/**
11725 * xmlCleanupParser:
11726 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011727 * Cleanup function for the XML library. It tries to reclaim all
11728 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011729 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011730 * function should not prevent reusing the library but one should
11731 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011732 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011733 */
11734
11735void
11736xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011737 if (!xmlParserInitialized)
11738 return;
11739
Owen Taylor3473f882001-02-23 17:55:21 +000011740 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011741#ifdef LIBXML_CATALOG_ENABLED
11742 xmlCatalogCleanup();
11743#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011744 xmlCleanupInputCallbacks();
11745#ifdef LIBXML_OUTPUT_ENABLED
11746 xmlCleanupOutputCallbacks();
11747#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011748 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011749 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011750 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011751 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011752 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011753}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011754
11755/************************************************************************
11756 * *
11757 * New set (2.6.0) of simpler and more flexible APIs *
11758 * *
11759 ************************************************************************/
11760
11761/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011762 * DICT_FREE:
11763 * @str: a string
11764 *
11765 * Free a string if it is not owned by the "dict" dictionnary in the
11766 * current scope
11767 */
11768#define DICT_FREE(str) \
11769 if ((str) && ((!dict) || \
11770 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11771 xmlFree((char *)(str));
11772
11773/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011774 * xmlCtxtReset:
11775 * @ctxt: an XML parser context
11776 *
11777 * Reset a parser context
11778 */
11779void
11780xmlCtxtReset(xmlParserCtxtPtr ctxt)
11781{
11782 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011783 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011784
11785 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11786 xmlFreeInputStream(input);
11787 }
11788 ctxt->inputNr = 0;
11789 ctxt->input = NULL;
11790
11791 ctxt->spaceNr = 0;
11792 ctxt->spaceTab[0] = -1;
11793 ctxt->space = &ctxt->spaceTab[0];
11794
11795
11796 ctxt->nodeNr = 0;
11797 ctxt->node = NULL;
11798
11799 ctxt->nameNr = 0;
11800 ctxt->name = NULL;
11801
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011802 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011803 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011804 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011805 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011806 DICT_FREE(ctxt->directory);
11807 ctxt->directory = NULL;
11808 DICT_FREE(ctxt->extSubURI);
11809 ctxt->extSubURI = NULL;
11810 DICT_FREE(ctxt->extSubSystem);
11811 ctxt->extSubSystem = NULL;
11812 if (ctxt->myDoc != NULL)
11813 xmlFreeDoc(ctxt->myDoc);
11814 ctxt->myDoc = NULL;
11815
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011816 ctxt->standalone = -1;
11817 ctxt->hasExternalSubset = 0;
11818 ctxt->hasPErefs = 0;
11819 ctxt->html = 0;
11820 ctxt->external = 0;
11821 ctxt->instate = XML_PARSER_START;
11822 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011823
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011824 ctxt->wellFormed = 1;
11825 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011826 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011827 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011828#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011829 ctxt->vctxt.userData = ctxt;
11830 ctxt->vctxt.error = xmlParserValidityError;
11831 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011832#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011833 ctxt->record_info = 0;
11834 ctxt->nbChars = 0;
11835 ctxt->checkIndex = 0;
11836 ctxt->inSubset = 0;
11837 ctxt->errNo = XML_ERR_OK;
11838 ctxt->depth = 0;
11839 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11840 ctxt->catalogs = NULL;
11841 xmlInitNodeInfoSeq(&ctxt->node_seq);
11842
11843 if (ctxt->attsDefault != NULL) {
11844 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11845 ctxt->attsDefault = NULL;
11846 }
11847 if (ctxt->attsSpecial != NULL) {
11848 xmlHashFree(ctxt->attsSpecial, NULL);
11849 ctxt->attsSpecial = NULL;
11850 }
11851
Daniel Veillard4432df22003-09-28 18:58:27 +000011852#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011853 if (ctxt->catalogs != NULL)
11854 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011855#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011856 if (ctxt->lastError.code != XML_ERR_OK)
11857 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011858}
11859
11860/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011861 * xmlCtxtResetPush:
11862 * @ctxt: an XML parser context
11863 * @chunk: a pointer to an array of chars
11864 * @size: number of chars in the array
11865 * @filename: an optional file name or URI
11866 * @encoding: the document encoding, or NULL
11867 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011868 * Reset a push parser context
11869 *
11870 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011871 */
11872int
11873xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11874 int size, const char *filename, const char *encoding)
11875{
11876 xmlParserInputPtr inputStream;
11877 xmlParserInputBufferPtr buf;
11878 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11879
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011880 if (ctxt == NULL)
11881 return(1);
11882
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011883 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11884 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11885
11886 buf = xmlAllocParserInputBuffer(enc);
11887 if (buf == NULL)
11888 return(1);
11889
11890 if (ctxt == NULL) {
11891 xmlFreeParserInputBuffer(buf);
11892 return(1);
11893 }
11894
11895 xmlCtxtReset(ctxt);
11896
11897 if (ctxt->pushTab == NULL) {
11898 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11899 sizeof(xmlChar *));
11900 if (ctxt->pushTab == NULL) {
11901 xmlErrMemory(ctxt, NULL);
11902 xmlFreeParserInputBuffer(buf);
11903 return(1);
11904 }
11905 }
11906
11907 if (filename == NULL) {
11908 ctxt->directory = NULL;
11909 } else {
11910 ctxt->directory = xmlParserGetDirectory(filename);
11911 }
11912
11913 inputStream = xmlNewInputStream(ctxt);
11914 if (inputStream == NULL) {
11915 xmlFreeParserInputBuffer(buf);
11916 return(1);
11917 }
11918
11919 if (filename == NULL)
11920 inputStream->filename = NULL;
11921 else
11922 inputStream->filename = (char *)
11923 xmlCanonicPath((const xmlChar *) filename);
11924 inputStream->buf = buf;
11925 inputStream->base = inputStream->buf->buffer->content;
11926 inputStream->cur = inputStream->buf->buffer->content;
11927 inputStream->end =
11928 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11929
11930 inputPush(ctxt, inputStream);
11931
11932 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11933 (ctxt->input->buf != NULL)) {
11934 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11935 int cur = ctxt->input->cur - ctxt->input->base;
11936
11937 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11938
11939 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11940 ctxt->input->cur = ctxt->input->base + cur;
11941 ctxt->input->end =
11942 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11943 use];
11944#ifdef DEBUG_PUSH
11945 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11946#endif
11947 }
11948
11949 if (encoding != NULL) {
11950 xmlCharEncodingHandlerPtr hdlr;
11951
11952 hdlr = xmlFindCharEncodingHandler(encoding);
11953 if (hdlr != NULL) {
11954 xmlSwitchToEncoding(ctxt, hdlr);
11955 } else {
11956 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11957 "Unsupported encoding %s\n", BAD_CAST encoding);
11958 }
11959 } else if (enc != XML_CHAR_ENCODING_NONE) {
11960 xmlSwitchEncoding(ctxt, enc);
11961 }
11962
11963 return(0);
11964}
11965
11966/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011967 * xmlCtxtUseOptions:
11968 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011969 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011970 *
11971 * Applies the options to the parser context
11972 *
11973 * Returns 0 in case of success, the set of unknown or unimplemented options
11974 * in case of error.
11975 */
11976int
11977xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11978{
11979 if (options & XML_PARSE_RECOVER) {
11980 ctxt->recovery = 1;
11981 options -= XML_PARSE_RECOVER;
11982 } else
11983 ctxt->recovery = 0;
11984 if (options & XML_PARSE_DTDLOAD) {
11985 ctxt->loadsubset = XML_DETECT_IDS;
11986 options -= XML_PARSE_DTDLOAD;
11987 } else
11988 ctxt->loadsubset = 0;
11989 if (options & XML_PARSE_DTDATTR) {
11990 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11991 options -= XML_PARSE_DTDATTR;
11992 }
11993 if (options & XML_PARSE_NOENT) {
11994 ctxt->replaceEntities = 1;
11995 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11996 options -= XML_PARSE_NOENT;
11997 } else
11998 ctxt->replaceEntities = 0;
11999 if (options & XML_PARSE_NOWARNING) {
12000 ctxt->sax->warning = NULL;
12001 options -= XML_PARSE_NOWARNING;
12002 }
12003 if (options & XML_PARSE_NOERROR) {
12004 ctxt->sax->error = NULL;
12005 ctxt->sax->fatalError = NULL;
12006 options -= XML_PARSE_NOERROR;
12007 }
12008 if (options & XML_PARSE_PEDANTIC) {
12009 ctxt->pedantic = 1;
12010 options -= XML_PARSE_PEDANTIC;
12011 } else
12012 ctxt->pedantic = 0;
12013 if (options & XML_PARSE_NOBLANKS) {
12014 ctxt->keepBlanks = 0;
12015 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12016 options -= XML_PARSE_NOBLANKS;
12017 } else
12018 ctxt->keepBlanks = 1;
12019 if (options & XML_PARSE_DTDVALID) {
12020 ctxt->validate = 1;
12021 if (options & XML_PARSE_NOWARNING)
12022 ctxt->vctxt.warning = NULL;
12023 if (options & XML_PARSE_NOERROR)
12024 ctxt->vctxt.error = NULL;
12025 options -= XML_PARSE_DTDVALID;
12026 } else
12027 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012028#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012029 if (options & XML_PARSE_SAX1) {
12030 ctxt->sax->startElement = xmlSAX2StartElement;
12031 ctxt->sax->endElement = xmlSAX2EndElement;
12032 ctxt->sax->startElementNs = NULL;
12033 ctxt->sax->endElementNs = NULL;
12034 ctxt->sax->initialized = 1;
12035 options -= XML_PARSE_SAX1;
12036 }
Daniel Veillard81273902003-09-30 00:43:48 +000012037#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012038 if (options & XML_PARSE_NODICT) {
12039 ctxt->dictNames = 0;
12040 options -= XML_PARSE_NODICT;
12041 } else {
12042 ctxt->dictNames = 1;
12043 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012044 if (options & XML_PARSE_NOCDATA) {
12045 ctxt->sax->cdataBlock = NULL;
12046 options -= XML_PARSE_NOCDATA;
12047 }
12048 if (options & XML_PARSE_NSCLEAN) {
12049 ctxt->options |= XML_PARSE_NSCLEAN;
12050 options -= XML_PARSE_NSCLEAN;
12051 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012052 if (options & XML_PARSE_NONET) {
12053 ctxt->options |= XML_PARSE_NONET;
12054 options -= XML_PARSE_NONET;
12055 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012056 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012057 return (options);
12058}
12059
12060/**
12061 * xmlDoRead:
12062 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012063 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012064 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012065 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012066 * @reuse: keep the context for reuse
12067 *
12068 * Common front-end for the xmlRead functions
12069 *
12070 * Returns the resulting document tree or NULL
12071 */
12072static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012073xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12074 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012075{
12076 xmlDocPtr ret;
12077
12078 xmlCtxtUseOptions(ctxt, options);
12079 if (encoding != NULL) {
12080 xmlCharEncodingHandlerPtr hdlr;
12081
12082 hdlr = xmlFindCharEncodingHandler(encoding);
12083 if (hdlr != NULL)
12084 xmlSwitchToEncoding(ctxt, hdlr);
12085 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012086 if ((URL != NULL) && (ctxt->input != NULL) &&
12087 (ctxt->input->filename == NULL))
12088 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012089 xmlParseDocument(ctxt);
12090 if ((ctxt->wellFormed) || ctxt->recovery)
12091 ret = ctxt->myDoc;
12092 else {
12093 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012094 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012095 xmlFreeDoc(ctxt->myDoc);
12096 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012097 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012098 ctxt->myDoc = NULL;
12099 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012100 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012101 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012102
12103 return (ret);
12104}
12105
12106/**
12107 * xmlReadDoc:
12108 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012109 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012110 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012111 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 *
12113 * parse an XML in-memory document and build a tree.
12114 *
12115 * Returns the resulting document tree
12116 */
12117xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012118xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012119{
12120 xmlParserCtxtPtr ctxt;
12121
12122 if (cur == NULL)
12123 return (NULL);
12124
12125 ctxt = xmlCreateDocParserCtxt(cur);
12126 if (ctxt == NULL)
12127 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012128 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012129}
12130
12131/**
12132 * xmlReadFile:
12133 * @filename: a file or URL
12134 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012135 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012136 *
12137 * parse an XML file from the filesystem or the network.
12138 *
12139 * Returns the resulting document tree
12140 */
12141xmlDocPtr
12142xmlReadFile(const char *filename, const char *encoding, int options)
12143{
12144 xmlParserCtxtPtr ctxt;
12145
Daniel Veillard61b93382003-11-03 14:28:31 +000012146 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012147 if (ctxt == NULL)
12148 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012149 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012150}
12151
12152/**
12153 * xmlReadMemory:
12154 * @buffer: a pointer to a char array
12155 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012156 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012157 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012158 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012159 *
12160 * parse an XML in-memory document and build a tree.
12161 *
12162 * Returns the resulting document tree
12163 */
12164xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012165xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012166{
12167 xmlParserCtxtPtr ctxt;
12168
12169 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12170 if (ctxt == NULL)
12171 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012172 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012173}
12174
12175/**
12176 * xmlReadFd:
12177 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012178 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012179 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012180 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012181 *
12182 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012183 * NOTE that the file descriptor will not be closed when the
12184 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012185 *
12186 * Returns the resulting document tree
12187 */
12188xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012189xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012190{
12191 xmlParserCtxtPtr ctxt;
12192 xmlParserInputBufferPtr input;
12193 xmlParserInputPtr stream;
12194
12195 if (fd < 0)
12196 return (NULL);
12197
12198 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12199 if (input == NULL)
12200 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012201 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012202 ctxt = xmlNewParserCtxt();
12203 if (ctxt == NULL) {
12204 xmlFreeParserInputBuffer(input);
12205 return (NULL);
12206 }
12207 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12208 if (stream == NULL) {
12209 xmlFreeParserInputBuffer(input);
12210 xmlFreeParserCtxt(ctxt);
12211 return (NULL);
12212 }
12213 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012214 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012215}
12216
12217/**
12218 * xmlReadIO:
12219 * @ioread: an I/O read function
12220 * @ioclose: an I/O close function
12221 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012222 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012223 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012224 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012225 *
12226 * parse an XML document from I/O functions and source and build a tree.
12227 *
12228 * Returns the resulting document tree
12229 */
12230xmlDocPtr
12231xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012232 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012233{
12234 xmlParserCtxtPtr ctxt;
12235 xmlParserInputBufferPtr input;
12236 xmlParserInputPtr stream;
12237
12238 if (ioread == NULL)
12239 return (NULL);
12240
12241 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12242 XML_CHAR_ENCODING_NONE);
12243 if (input == NULL)
12244 return (NULL);
12245 ctxt = xmlNewParserCtxt();
12246 if (ctxt == NULL) {
12247 xmlFreeParserInputBuffer(input);
12248 return (NULL);
12249 }
12250 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12251 if (stream == NULL) {
12252 xmlFreeParserInputBuffer(input);
12253 xmlFreeParserCtxt(ctxt);
12254 return (NULL);
12255 }
12256 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012257 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258}
12259
12260/**
12261 * xmlCtxtReadDoc:
12262 * @ctxt: an XML parser context
12263 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012264 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012265 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012266 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012267 *
12268 * parse an XML in-memory document and build a tree.
12269 * This reuses the existing @ctxt parser context
12270 *
12271 * Returns the resulting document tree
12272 */
12273xmlDocPtr
12274xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012275 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012276{
12277 xmlParserInputPtr stream;
12278
12279 if (cur == NULL)
12280 return (NULL);
12281 if (ctxt == NULL)
12282 return (NULL);
12283
12284 xmlCtxtReset(ctxt);
12285
12286 stream = xmlNewStringInputStream(ctxt, cur);
12287 if (stream == NULL) {
12288 return (NULL);
12289 }
12290 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012291 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012292}
12293
12294/**
12295 * xmlCtxtReadFile:
12296 * @ctxt: an XML parser context
12297 * @filename: a file or URL
12298 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012299 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012300 *
12301 * parse an XML file from the filesystem or the network.
12302 * This reuses the existing @ctxt parser context
12303 *
12304 * Returns the resulting document tree
12305 */
12306xmlDocPtr
12307xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12308 const char *encoding, int options)
12309{
12310 xmlParserInputPtr stream;
12311
12312 if (filename == NULL)
12313 return (NULL);
12314 if (ctxt == NULL)
12315 return (NULL);
12316
12317 xmlCtxtReset(ctxt);
12318
12319 stream = xmlNewInputFromFile(ctxt, filename);
12320 if (stream == NULL) {
12321 return (NULL);
12322 }
12323 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012324 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325}
12326
12327/**
12328 * xmlCtxtReadMemory:
12329 * @ctxt: an XML parser context
12330 * @buffer: a pointer to a char array
12331 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012332 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012333 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012334 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012335 *
12336 * parse an XML in-memory document and build a tree.
12337 * This reuses the existing @ctxt parser context
12338 *
12339 * Returns the resulting document tree
12340 */
12341xmlDocPtr
12342xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012343 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012344{
12345 xmlParserInputBufferPtr input;
12346 xmlParserInputPtr stream;
12347
12348 if (ctxt == NULL)
12349 return (NULL);
12350 if (buffer == NULL)
12351 return (NULL);
12352
12353 xmlCtxtReset(ctxt);
12354
12355 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12356 if (input == NULL) {
12357 return(NULL);
12358 }
12359
12360 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12361 if (stream == NULL) {
12362 xmlFreeParserInputBuffer(input);
12363 return(NULL);
12364 }
12365
12366 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012367 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012368}
12369
12370/**
12371 * xmlCtxtReadFd:
12372 * @ctxt: an XML parser context
12373 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012374 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012375 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012376 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012377 *
12378 * parse an XML from a file descriptor and build a tree.
12379 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012380 * NOTE that the file descriptor will not be closed when the
12381 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012382 *
12383 * Returns the resulting document tree
12384 */
12385xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012386xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12387 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012388{
12389 xmlParserInputBufferPtr input;
12390 xmlParserInputPtr stream;
12391
12392 if (fd < 0)
12393 return (NULL);
12394 if (ctxt == NULL)
12395 return (NULL);
12396
12397 xmlCtxtReset(ctxt);
12398
12399
12400 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12401 if (input == NULL)
12402 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012403 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012404 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12405 if (stream == NULL) {
12406 xmlFreeParserInputBuffer(input);
12407 return (NULL);
12408 }
12409 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012410 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012411}
12412
12413/**
12414 * xmlCtxtReadIO:
12415 * @ctxt: an XML parser context
12416 * @ioread: an I/O read function
12417 * @ioclose: an I/O close function
12418 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012419 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012420 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012421 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012422 *
12423 * parse an XML document from I/O functions and source and build a tree.
12424 * This reuses the existing @ctxt parser context
12425 *
12426 * Returns the resulting document tree
12427 */
12428xmlDocPtr
12429xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12430 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012431 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012432 const char *encoding, int options)
12433{
12434 xmlParserInputBufferPtr input;
12435 xmlParserInputPtr stream;
12436
12437 if (ioread == NULL)
12438 return (NULL);
12439 if (ctxt == NULL)
12440 return (NULL);
12441
12442 xmlCtxtReset(ctxt);
12443
12444 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12445 XML_CHAR_ENCODING_NONE);
12446 if (input == NULL)
12447 return (NULL);
12448 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12449 if (stream == NULL) {
12450 xmlFreeParserInputBuffer(input);
12451 return (NULL);
12452 }
12453 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012454 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012455}