blob: b5ecd2efdb60ce11dbcfb01e23fc512e74027723 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
609 (ctxt->str_xml_ns == NULL)) {
610 xmlErrMemory(ctxt, NULL);
611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000612}
613
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614typedef struct _xmlDefAttrs xmlDefAttrs;
615typedef xmlDefAttrs *xmlDefAttrsPtr;
616struct _xmlDefAttrs {
617 int nbAttrs; /* number of defaulted attributes on that element */
618 int maxAttrs; /* the size of the array */
619 const xmlChar *values[4]; /* array of localname/prefix/values */
620};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000621
622/**
623 * xmlAddDefAttrs:
624 * @ctxt: an XML parser context
625 * @fullname: the element fullname
626 * @fullattr: the attribute fullname
627 * @value: the attribute value
628 *
629 * Add a defaulted attribute for an element
630 */
631static void
632xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
633 const xmlChar *fullname,
634 const xmlChar *fullattr,
635 const xmlChar *value) {
636 xmlDefAttrsPtr defaults;
637 int len;
638 const xmlChar *name;
639 const xmlChar *prefix;
640
641 if (ctxt->attsDefault == NULL) {
642 ctxt->attsDefault = xmlHashCreate(10);
643 if (ctxt->attsDefault == NULL)
644 goto mem_error;
645 }
646
647 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000648 * split the element name into prefix:localname , the string found
649 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000650 */
651 name = xmlSplitQName3(fullname, &len);
652 if (name == NULL) {
653 name = xmlDictLookup(ctxt->dict, fullname, -1);
654 prefix = NULL;
655 } else {
656 name = xmlDictLookup(ctxt->dict, name, -1);
657 prefix = xmlDictLookup(ctxt->dict, fullname, len);
658 }
659
660 /*
661 * make sure there is some storage
662 */
663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
664 if (defaults == NULL) {
665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000666 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000667 if (defaults == NULL)
668 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000670 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
672 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 xmlDefAttrsPtr temp;
674
675 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000676 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000677 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 defaults->maxAttrs *= 2;
681 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
682 }
683
684 /*
685 * plit the element name into prefix:localname , the string found
686 * are within the DTD and hen not associated to namespace names.
687 */
688 name = xmlSplitQName3(fullattr, &len);
689 if (name == NULL) {
690 name = xmlDictLookup(ctxt->dict, fullattr, -1);
691 prefix = NULL;
692 } else {
693 name = xmlDictLookup(ctxt->dict, name, -1);
694 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
695 }
696
697 defaults->values[4 * defaults->nbAttrs] = name;
698 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
699 /* intern the string and precompute the end */
700 len = xmlStrlen(value);
701 value = xmlDictLookup(ctxt->dict, value, len);
702 defaults->values[4 * defaults->nbAttrs + 2] = value;
703 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
704 defaults->nbAttrs++;
705
706 return;
707
708mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000709 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000710 return;
711}
712
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000713/**
714 * xmlAddSpecialAttr:
715 * @ctxt: an XML parser context
716 * @fullname: the element fullname
717 * @fullattr: the attribute fullname
718 * @type: the attribute type
719 *
720 * Register that this attribute is not CDATA
721 */
722static void
723xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
724 const xmlChar *fullname,
725 const xmlChar *fullattr,
726 int type)
727{
728 if (ctxt->attsSpecial == NULL) {
729 ctxt->attsSpecial = xmlHashCreate(10);
730 if (ctxt->attsSpecial == NULL)
731 goto mem_error;
732 }
733
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000734 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
735 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000736 return;
737
738mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000739 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000740 return;
741}
742
Daniel Veillard4432df22003-09-28 18:58:27 +0000743/**
744 * xmlCheckLanguageID:
745 * @lang: pointer to the string value
746 *
747 * Checks that the value conforms to the LanguageID production:
748 *
749 * NOTE: this is somewhat deprecated, those productions were removed from
750 * the XML Second edition.
751 *
752 * [33] LanguageID ::= Langcode ('-' Subcode)*
753 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
754 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
755 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
756 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
757 * [38] Subcode ::= ([a-z] | [A-Z])+
758 *
759 * Returns 1 if correct 0 otherwise
760 **/
761int
762xmlCheckLanguageID(const xmlChar * lang)
763{
764 const xmlChar *cur = lang;
765
766 if (cur == NULL)
767 return (0);
768 if (((cur[0] == 'i') && (cur[1] == '-')) ||
769 ((cur[0] == 'I') && (cur[1] == '-'))) {
770 /*
771 * IANA code
772 */
773 cur += 2;
774 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
775 ((cur[0] >= 'a') && (cur[0] <= 'z')))
776 cur++;
777 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
778 ((cur[0] == 'X') && (cur[1] == '-'))) {
779 /*
780 * User code
781 */
782 cur += 2;
783 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
784 ((cur[0] >= 'a') && (cur[0] <= 'z')))
785 cur++;
786 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
787 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
788 /*
789 * ISO639
790 */
791 cur++;
792 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
793 ((cur[0] >= 'a') && (cur[0] <= 'z')))
794 cur++;
795 else
796 return (0);
797 } else
798 return (0);
799 while (cur[0] != 0) { /* non input consuming */
800 if (cur[0] != '-')
801 return (0);
802 cur++;
803 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
804 ((cur[0] >= 'a') && (cur[0] <= 'z')))
805 cur++;
806 else
807 return (0);
808 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
809 ((cur[0] >= 'a') && (cur[0] <= 'z')))
810 cur++;
811 }
812 return (1);
813}
814
Owen Taylor3473f882001-02-23 17:55:21 +0000815/************************************************************************
816 * *
817 * Parser stacks related functions and macros *
818 * *
819 ************************************************************************/
820
821xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
822 const xmlChar ** str);
823
Daniel Veillard0fb18932003-09-07 09:14:37 +0000824#ifdef SAX2
825/**
826 * nsPush:
827 * @ctxt: an XML parser context
828 * @prefix: the namespace prefix or NULL
829 * @URL: the namespace name
830 *
831 * Pushes a new parser namespace on top of the ns stack
832 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000833 * Returns -1 in case of error, -2 if the namespace should be discarded
834 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000835 */
836static int
837nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
838{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000839 if (ctxt->options & XML_PARSE_NSCLEAN) {
840 int i;
841 for (i = 0;i < ctxt->nsNr;i += 2) {
842 if (ctxt->nsTab[i] == prefix) {
843 /* in scope */
844 if (ctxt->nsTab[i + 1] == URL)
845 return(-2);
846 /* out of scope keep it */
847 break;
848 }
849 }
850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
852 ctxt->nsMax = 10;
853 ctxt->nsNr = 0;
854 ctxt->nsTab = (const xmlChar **)
855 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
856 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000857 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax = 0;
859 return (-1);
860 }
861 } else if (ctxt->nsNr >= ctxt->nsMax) {
862 ctxt->nsMax *= 2;
863 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000864 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
866 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000867 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax /= 2;
869 return (-1);
870 }
871 }
872 ctxt->nsTab[ctxt->nsNr++] = prefix;
873 ctxt->nsTab[ctxt->nsNr++] = URL;
874 return (ctxt->nsNr);
875}
876/**
877 * nsPop:
878 * @ctxt: an XML parser context
879 * @nr: the number to pop
880 *
881 * Pops the top @nr parser prefix/namespace from the ns stack
882 *
883 * Returns the number of namespaces removed
884 */
885static int
886nsPop(xmlParserCtxtPtr ctxt, int nr)
887{
888 int i;
889
890 if (ctxt->nsTab == NULL) return(0);
891 if (ctxt->nsNr < nr) {
892 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
893 nr = ctxt->nsNr;
894 }
895 if (ctxt->nsNr <= 0)
896 return (0);
897
898 for (i = 0;i < nr;i++) {
899 ctxt->nsNr--;
900 ctxt->nsTab[ctxt->nsNr] = NULL;
901 }
902 return(nr);
903}
904#endif
905
906static int
907xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
908 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 int maxatts;
911
912 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000913 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 atts = (const xmlChar **)
915 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
919 if (attallocs == NULL) goto mem_error;
920 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000921 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000922 } else if (nr + 5 > ctxt->maxatts) {
923 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
925 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
929 (maxatts / 5) * sizeof(int));
930 if (attallocs == NULL) goto mem_error;
931 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000932 ctxt->maxatts = maxatts;
933 }
934 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000935mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000936 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000938}
939
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000940/**
941 * inputPush:
942 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000944 *
945 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 *
947 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000948 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000949int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000950inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
951{
952 if (ctxt->inputNr >= ctxt->inputMax) {
953 ctxt->inputMax *= 2;
954 ctxt->inputTab =
955 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
956 ctxt->inputMax *
957 sizeof(ctxt->inputTab[0]));
958 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000959 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000960 return (0);
961 }
962 }
963 ctxt->inputTab[ctxt->inputNr] = value;
964 ctxt->input = value;
965 return (ctxt->inputNr++);
966}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000969 * @ctxt: an XML parser context
970 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000971 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000972 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000975xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000976inputPop(xmlParserCtxtPtr ctxt)
977{
978 xmlParserInputPtr ret;
979
980 if (ctxt->inputNr <= 0)
981 return (0);
982 ctxt->inputNr--;
983 if (ctxt->inputNr > 0)
984 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
985 else
986 ctxt->input = NULL;
987 ret = ctxt->inputTab[ctxt->inputNr];
988 ctxt->inputTab[ctxt->inputNr] = 0;
989 return (ret);
990}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000991/**
992 * nodePush:
993 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000994 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000995 *
996 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000997 *
998 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000999 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001000int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001001nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1002{
1003 if (ctxt->nodeNr >= ctxt->nodeMax) {
1004 ctxt->nodeMax *= 2;
1005 ctxt->nodeTab =
1006 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1007 ctxt->nodeMax *
1008 sizeof(ctxt->nodeTab[0]));
1009 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001010 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001011 return (0);
1012 }
1013 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001014 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001015 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001016 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1017 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001018 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001019 return(0);
1020 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001021 ctxt->nodeTab[ctxt->nodeNr] = value;
1022 ctxt->node = value;
1023 return (ctxt->nodeNr++);
1024}
1025/**
1026 * nodePop:
1027 * @ctxt: an XML parser context
1028 *
1029 * Pops the top element node from the node stack
1030 *
1031 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001032 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001033xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001034nodePop(xmlParserCtxtPtr ctxt)
1035{
1036 xmlNodePtr ret;
1037
1038 if (ctxt->nodeNr <= 0)
1039 return (0);
1040 ctxt->nodeNr--;
1041 if (ctxt->nodeNr > 0)
1042 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1043 else
1044 ctxt->node = NULL;
1045 ret = ctxt->nodeTab[ctxt->nodeNr];
1046 ctxt->nodeTab[ctxt->nodeNr] = 0;
1047 return (ret);
1048}
Daniel Veillarda2351322004-06-27 12:08:10 +00001049
1050#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001051/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001052 * nameNsPush:
1053 * @ctxt: an XML parser context
1054 * @value: the element name
1055 * @prefix: the element prefix
1056 * @URI: the element namespace name
1057 *
1058 * Pushes a new element name/prefix/URL on top of the name stack
1059 *
1060 * Returns -1 in case of error, the index in the stack otherwise
1061 */
1062static int
1063nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1064 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1065{
1066 if (ctxt->nameNr >= ctxt->nameMax) {
1067 const xmlChar * *tmp;
1068 void **tmp2;
1069 ctxt->nameMax *= 2;
1070 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1071 ctxt->nameMax *
1072 sizeof(ctxt->nameTab[0]));
1073 if (tmp == NULL) {
1074 ctxt->nameMax /= 2;
1075 goto mem_error;
1076 }
1077 ctxt->nameTab = tmp;
1078 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1079 ctxt->nameMax * 3 *
1080 sizeof(ctxt->pushTab[0]));
1081 if (tmp2 == NULL) {
1082 ctxt->nameMax /= 2;
1083 goto mem_error;
1084 }
1085 ctxt->pushTab = tmp2;
1086 }
1087 ctxt->nameTab[ctxt->nameNr] = value;
1088 ctxt->name = value;
1089 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1090 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001091 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001092 return (ctxt->nameNr++);
1093mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001094 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 return (-1);
1096}
1097/**
1098 * nameNsPop:
1099 * @ctxt: an XML parser context
1100 *
1101 * Pops the top element/prefix/URI name from the name stack
1102 *
1103 * Returns the name just removed
1104 */
1105static const xmlChar *
1106nameNsPop(xmlParserCtxtPtr ctxt)
1107{
1108 const xmlChar *ret;
1109
1110 if (ctxt->nameNr <= 0)
1111 return (0);
1112 ctxt->nameNr--;
1113 if (ctxt->nameNr > 0)
1114 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1115 else
1116 ctxt->name = NULL;
1117 ret = ctxt->nameTab[ctxt->nameNr];
1118 ctxt->nameTab[ctxt->nameNr] = NULL;
1119 return (ret);
1120}
Daniel Veillarda2351322004-06-27 12:08:10 +00001121#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122
1123/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124 * namePush:
1125 * @ctxt: an XML parser context
1126 * @value: the element name
1127 *
1128 * Pushes a new element name on top of the name stack
1129 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001131 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001132int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001133namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134{
1135 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001137 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001138 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001139 ctxt->nameMax *
1140 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 if (tmp == NULL) {
1142 ctxt->nameMax /= 2;
1143 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001146 }
1147 ctxt->nameTab[ctxt->nameNr] = value;
1148 ctxt->name = value;
1149 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001151 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153}
1154/**
1155 * namePop:
1156 * @ctxt: an XML parser context
1157 *
1158 * Pops the top element name from the name stack
1159 *
1160 * Returns the name just removed
1161 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001162const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001163namePop(xmlParserCtxtPtr ctxt)
1164{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001165 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001166
1167 if (ctxt->nameNr <= 0)
1168 return (0);
1169 ctxt->nameNr--;
1170 if (ctxt->nameNr > 0)
1171 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1172 else
1173 ctxt->name = NULL;
1174 ret = ctxt->nameTab[ctxt->nameNr];
1175 ctxt->nameTab[ctxt->nameNr] = 0;
1176 return (ret);
1177}
Owen Taylor3473f882001-02-23 17:55:21 +00001178
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001179static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001180 if (ctxt->spaceNr >= ctxt->spaceMax) {
1181 ctxt->spaceMax *= 2;
1182 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1183 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1184 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001185 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001186 return(0);
1187 }
1188 }
1189 ctxt->spaceTab[ctxt->spaceNr] = val;
1190 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1191 return(ctxt->spaceNr++);
1192}
1193
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001194static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001195 int ret;
1196 if (ctxt->spaceNr <= 0) return(0);
1197 ctxt->spaceNr--;
1198 if (ctxt->spaceNr > 0)
1199 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1200 else
1201 ctxt->space = NULL;
1202 ret = ctxt->spaceTab[ctxt->spaceNr];
1203 ctxt->spaceTab[ctxt->spaceNr] = -1;
1204 return(ret);
1205}
1206
1207/*
1208 * Macros for accessing the content. Those should be used only by the parser,
1209 * and not exported.
1210 *
1211 * Dirty macros, i.e. one often need to make assumption on the context to
1212 * use them
1213 *
1214 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1215 * To be used with extreme caution since operations consuming
1216 * characters may move the input buffer to a different location !
1217 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1218 * This should be used internally by the parser
1219 * only to compare to ASCII values otherwise it would break when
1220 * running with UTF-8 encoding.
1221 * RAW same as CUR but in the input buffer, bypass any token
1222 * extraction that may have been done
1223 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1224 * to compare on ASCII based substring.
1225 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001226 * strings without newlines within the parser.
1227 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1228 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001229 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1230 *
1231 * NEXT Skip to the next character, this does the proper decoding
1232 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001233 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001234 * CUR_CHAR(l) returns the current unicode character (int), set l
1235 * to the number of xmlChars used for the encoding [0-5].
1236 * CUR_SCHAR same but operate on a string instead of the context
1237 * COPY_BUF copy the current unicode char to the target buffer, increment
1238 * the index
1239 * GROW, SHRINK handling of input buffers
1240 */
1241
Daniel Veillardfdc91562002-07-01 21:52:03 +00001242#define RAW (*ctxt->input->cur)
1243#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001244#define NXT(val) ctxt->input->cur[(val)]
1245#define CUR_PTR ctxt->input->cur
1246
Daniel Veillarda07050d2003-10-19 14:46:32 +00001247#define CMP4( s, c1, c2, c3, c4 ) \
1248 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1249 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1250#define CMP5( s, c1, c2, c3, c4, c5 ) \
1251 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1252#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1253 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1254#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1255 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1256#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1257 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1258#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1259 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1260 ((unsigned char *) s)[ 8 ] == c9 )
1261#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1262 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1263 ((unsigned char *) s)[ 9 ] == c10 )
1264
Owen Taylor3473f882001-02-23 17:55:21 +00001265#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001266 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001267 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001268 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001269 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1270 xmlPopInput(ctxt); \
1271 } while (0)
1272
Daniel Veillard0b787f32004-03-26 17:29:53 +00001273#define SKIPL(val) do { \
1274 int skipl; \
1275 for(skipl=0; skipl<val; skipl++) { \
1276 if (*(ctxt->input->cur) == '\n') { \
1277 ctxt->input->line++; ctxt->input->col = 1; \
1278 } else ctxt->input->col++; \
1279 ctxt->nbChars++; \
1280 ctxt->input->cur++; \
1281 } \
1282 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1283 if ((*ctxt->input->cur == 0) && \
1284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1285 xmlPopInput(ctxt); \
1286 } while (0)
1287
Daniel Veillarda880b122003-04-21 21:36:41 +00001288#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001289 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1290 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001291 xmlSHRINK (ctxt);
1292
1293static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1294 xmlParserInputShrink(ctxt->input);
1295 if ((*ctxt->input->cur == 0) &&
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1297 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001298 }
Owen Taylor3473f882001-02-23 17:55:21 +00001299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define GROW if ((ctxt->progressive == 0) && \
1301 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001302 xmlGROW (ctxt);
1303
1304static void xmlGROW (xmlParserCtxtPtr ctxt) {
1305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1306 if ((*ctxt->input->cur == 0) &&
1307 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1308 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001309}
Owen Taylor3473f882001-02-23 17:55:21 +00001310
1311#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1312
1313#define NEXT xmlNextChar(ctxt)
1314
Daniel Veillard21a0f912001-02-25 19:54:14 +00001315#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001316 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001317 ctxt->input->cur++; \
1318 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001319 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001320 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1321 }
1322
Owen Taylor3473f882001-02-23 17:55:21 +00001323#define NEXTL(l) do { \
1324 if (*(ctxt->input->cur) == '\n') { \
1325 ctxt->input->line++; ctxt->input->col = 1; \
1326 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001327 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001328 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001329 } while (0)
1330
1331#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1332#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1333
1334#define COPY_BUF(l,b,i,v) \
1335 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001336 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001337
1338/**
1339 * xmlSkipBlankChars:
1340 * @ctxt: the XML parser context
1341 *
1342 * skip all blanks character found at that point in the input streams.
1343 * It pops up finished entities in the process if allowable at that point.
1344 *
1345 * Returns the number of space chars skipped
1346 */
1347
1348int
1349xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001350 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001351
1352 /*
1353 * It's Okay to use CUR/NEXT here since all the blanks are on
1354 * the ASCII range.
1355 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001356 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1357 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001358 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001359 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001360 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001361 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001362 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 if (*cur == '\n') {
1364 ctxt->input->line++; ctxt->input->col = 1;
1365 }
1366 cur++;
1367 res++;
1368 if (*cur == 0) {
1369 ctxt->input->cur = cur;
1370 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1371 cur = ctxt->input->cur;
1372 }
1373 }
1374 ctxt->input->cur = cur;
1375 } else {
1376 int cur;
1377 do {
1378 cur = CUR;
1379 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1380 NEXT;
1381 cur = CUR;
1382 res++;
1383 }
1384 while ((cur == 0) && (ctxt->inputNr > 1) &&
1385 (ctxt->instate != XML_PARSER_COMMENT)) {
1386 xmlPopInput(ctxt);
1387 cur = CUR;
1388 }
1389 /*
1390 * Need to handle support of entities branching here
1391 */
1392 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1393 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1394 }
Owen Taylor3473f882001-02-23 17:55:21 +00001395 return(res);
1396}
1397
1398/************************************************************************
1399 * *
1400 * Commodity functions to handle entities *
1401 * *
1402 ************************************************************************/
1403
1404/**
1405 * xmlPopInput:
1406 * @ctxt: an XML parser context
1407 *
1408 * xmlPopInput: the current input pointed by ctxt->input came to an end
1409 * pop it and return the next char.
1410 *
1411 * Returns the current xmlChar in the parser context
1412 */
1413xmlChar
1414xmlPopInput(xmlParserCtxtPtr ctxt) {
1415 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1416 if (xmlParserDebugEntities)
1417 xmlGenericError(xmlGenericErrorContext,
1418 "Popping input %d\n", ctxt->inputNr);
1419 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001420 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001421 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1422 return(xmlPopInput(ctxt));
1423 return(CUR);
1424}
1425
1426/**
1427 * xmlPushInput:
1428 * @ctxt: an XML parser context
1429 * @input: an XML parser input fragment (entity, XML fragment ...).
1430 *
1431 * xmlPushInput: switch to a new input stream which is stacked on top
1432 * of the previous one(s).
1433 */
1434void
1435xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1436 if (input == NULL) return;
1437
1438 if (xmlParserDebugEntities) {
1439 if ((ctxt->input != NULL) && (ctxt->input->filename))
1440 xmlGenericError(xmlGenericErrorContext,
1441 "%s(%d): ", ctxt->input->filename,
1442 ctxt->input->line);
1443 xmlGenericError(xmlGenericErrorContext,
1444 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1445 }
1446 inputPush(ctxt, input);
1447 GROW;
1448}
1449
1450/**
1451 * xmlParseCharRef:
1452 * @ctxt: an XML parser context
1453 *
1454 * parse Reference declarations
1455 *
1456 * [66] CharRef ::= '&#' [0-9]+ ';' |
1457 * '&#x' [0-9a-fA-F]+ ';'
1458 *
1459 * [ WFC: Legal Character ]
1460 * Characters referred to using character references must match the
1461 * production for Char.
1462 *
1463 * Returns the value parsed (as an int), 0 in case of error
1464 */
1465int
1466xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001467 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001468 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001469 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001470
Owen Taylor3473f882001-02-23 17:55:21 +00001471 /*
1472 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1473 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001474 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001475 (NXT(2) == 'x')) {
1476 SKIP(3);
1477 GROW;
1478 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001479 if (count++ > 20) {
1480 count = 0;
1481 GROW;
1482 }
1483 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001484 val = val * 16 + (CUR - '0');
1485 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1486 val = val * 16 + (CUR - 'a') + 10;
1487 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1488 val = val * 16 + (CUR - 'A') + 10;
1489 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001490 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001491 val = 0;
1492 break;
1493 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001494 if (val > 0x10FFFF)
1495 outofrange = val;
1496
Owen Taylor3473f882001-02-23 17:55:21 +00001497 NEXT;
1498 count++;
1499 }
1500 if (RAW == ';') {
1501 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001502 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001503 ctxt->nbChars ++;
1504 ctxt->input->cur++;
1505 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001506 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001507 SKIP(2);
1508 GROW;
1509 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001510 if (count++ > 20) {
1511 count = 0;
1512 GROW;
1513 }
1514 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001515 val = val * 10 + (CUR - '0');
1516 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001517 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 val = 0;
1519 break;
1520 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001521 if (val > 0x10FFFF)
1522 outofrange = val;
1523
Owen Taylor3473f882001-02-23 17:55:21 +00001524 NEXT;
1525 count++;
1526 }
1527 if (RAW == ';') {
1528 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001529 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001530 ctxt->nbChars ++;
1531 ctxt->input->cur++;
1532 }
1533 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001534 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001535 }
1536
1537 /*
1538 * [ WFC: Legal Character ]
1539 * Characters referred to using character references must match the
1540 * production for Char.
1541 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001542 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001543 return(val);
1544 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001545 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1546 "xmlParseCharRef: invalid xmlChar value %d\n",
1547 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001548 }
1549 return(0);
1550}
1551
1552/**
1553 * xmlParseStringCharRef:
1554 * @ctxt: an XML parser context
1555 * @str: a pointer to an index in the string
1556 *
1557 * parse Reference declarations, variant parsing from a string rather
1558 * than an an input flow.
1559 *
1560 * [66] CharRef ::= '&#' [0-9]+ ';' |
1561 * '&#x' [0-9a-fA-F]+ ';'
1562 *
1563 * [ WFC: Legal Character ]
1564 * Characters referred to using character references must match the
1565 * production for Char.
1566 *
1567 * Returns the value parsed (as an int), 0 in case of error, str will be
1568 * updated to the current value of the index
1569 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001570static int
Owen Taylor3473f882001-02-23 17:55:21 +00001571xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1572 const xmlChar *ptr;
1573 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001574 unsigned int val = 0;
1575 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001576
1577 if ((str == NULL) || (*str == NULL)) return(0);
1578 ptr = *str;
1579 cur = *ptr;
1580 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1581 ptr += 3;
1582 cur = *ptr;
1583 while (cur != ';') { /* Non input consuming loop */
1584 if ((cur >= '0') && (cur <= '9'))
1585 val = val * 16 + (cur - '0');
1586 else if ((cur >= 'a') && (cur <= 'f'))
1587 val = val * 16 + (cur - 'a') + 10;
1588 else if ((cur >= 'A') && (cur <= 'F'))
1589 val = val * 16 + (cur - 'A') + 10;
1590 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001591 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001592 val = 0;
1593 break;
1594 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001595 if (val > 0x10FFFF)
1596 outofrange = val;
1597
Owen Taylor3473f882001-02-23 17:55:21 +00001598 ptr++;
1599 cur = *ptr;
1600 }
1601 if (cur == ';')
1602 ptr++;
1603 } else if ((cur == '&') && (ptr[1] == '#')){
1604 ptr += 2;
1605 cur = *ptr;
1606 while (cur != ';') { /* Non input consuming loops */
1607 if ((cur >= '0') && (cur <= '9'))
1608 val = val * 10 + (cur - '0');
1609 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001610 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001611 val = 0;
1612 break;
1613 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001614 if (val > 0x10FFFF)
1615 outofrange = val;
1616
Owen Taylor3473f882001-02-23 17:55:21 +00001617 ptr++;
1618 cur = *ptr;
1619 }
1620 if (cur == ';')
1621 ptr++;
1622 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001624 return(0);
1625 }
1626 *str = ptr;
1627
1628 /*
1629 * [ WFC: Legal Character ]
1630 * Characters referred to using character references must match the
1631 * production for Char.
1632 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001633 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001634 return(val);
1635 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001636 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1637 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1638 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001639 }
1640 return(0);
1641}
1642
1643/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001644 * xmlNewBlanksWrapperInputStream:
1645 * @ctxt: an XML parser context
1646 * @entity: an Entity pointer
1647 *
1648 * Create a new input stream for wrapping
1649 * blanks around a PEReference
1650 *
1651 * Returns the new input stream or NULL
1652 */
1653
1654static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1655
Daniel Veillardf4862f02002-09-10 11:13:43 +00001656static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001657xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1658 xmlParserInputPtr input;
1659 xmlChar *buffer;
1660 size_t length;
1661 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001662 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1663 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001664 return(NULL);
1665 }
1666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "new blanks wrapper for entity: %s\n", entity->name);
1669 input = xmlNewInputStream(ctxt);
1670 if (input == NULL) {
1671 return(NULL);
1672 }
1673 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001674 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001675 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001676 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001677 return(NULL);
1678 }
1679 buffer [0] = ' ';
1680 buffer [1] = '%';
1681 buffer [length-3] = ';';
1682 buffer [length-2] = ' ';
1683 buffer [length-1] = 0;
1684 memcpy(buffer + 2, entity->name, length - 5);
1685 input->free = deallocblankswrapper;
1686 input->base = buffer;
1687 input->cur = buffer;
1688 input->length = length;
1689 input->end = &buffer[length];
1690 return(input);
1691}
1692
1693/**
Owen Taylor3473f882001-02-23 17:55:21 +00001694 * xmlParserHandlePEReference:
1695 * @ctxt: the parser context
1696 *
1697 * [69] PEReference ::= '%' Name ';'
1698 *
1699 * [ WFC: No Recursion ]
1700 * A parsed entity must not contain a recursive
1701 * reference to itself, either directly or indirectly.
1702 *
1703 * [ WFC: Entity Declared ]
1704 * In a document without any DTD, a document with only an internal DTD
1705 * subset which contains no parameter entity references, or a document
1706 * with "standalone='yes'", ... ... The declaration of a parameter
1707 * entity must precede any reference to it...
1708 *
1709 * [ VC: Entity Declared ]
1710 * In a document with an external subset or external parameter entities
1711 * with "standalone='no'", ... ... The declaration of a parameter entity
1712 * must precede any reference to it...
1713 *
1714 * [ WFC: In DTD ]
1715 * Parameter-entity references may only appear in the DTD.
1716 * NOTE: misleading but this is handled.
1717 *
1718 * A PEReference may have been detected in the current input stream
1719 * the handling is done accordingly to
1720 * http://www.w3.org/TR/REC-xml#entproc
1721 * i.e.
1722 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001723 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001724 */
1725void
1726xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001727 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001728 xmlEntityPtr entity = NULL;
1729 xmlParserInputPtr input;
1730
Owen Taylor3473f882001-02-23 17:55:21 +00001731 if (RAW != '%') return;
1732 switch(ctxt->instate) {
1733 case XML_PARSER_CDATA_SECTION:
1734 return;
1735 case XML_PARSER_COMMENT:
1736 return;
1737 case XML_PARSER_START_TAG:
1738 return;
1739 case XML_PARSER_END_TAG:
1740 return;
1741 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001742 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001743 return;
1744 case XML_PARSER_PROLOG:
1745 case XML_PARSER_START:
1746 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001747 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001748 return;
1749 case XML_PARSER_ENTITY_DECL:
1750 case XML_PARSER_CONTENT:
1751 case XML_PARSER_ATTRIBUTE_VALUE:
1752 case XML_PARSER_PI:
1753 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001754 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001755 /* we just ignore it there */
1756 return;
1757 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001758 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001759 return;
1760 case XML_PARSER_ENTITY_VALUE:
1761 /*
1762 * NOTE: in the case of entity values, we don't do the
1763 * substitution here since we need the literal
1764 * entity value to be able to save the internal
1765 * subset of the document.
1766 * This will be handled by xmlStringDecodeEntities
1767 */
1768 return;
1769 case XML_PARSER_DTD:
1770 /*
1771 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1772 * In the internal DTD subset, parameter-entity references
1773 * can occur only where markup declarations can occur, not
1774 * within markup declarations.
1775 * In that case this is handled in xmlParseMarkupDecl
1776 */
1777 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1778 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001779 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001780 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001781 break;
1782 case XML_PARSER_IGNORE:
1783 return;
1784 }
1785
1786 NEXT;
1787 name = xmlParseName(ctxt);
1788 if (xmlParserDebugEntities)
1789 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001790 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001791 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001792 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001793 } else {
1794 if (RAW == ';') {
1795 NEXT;
1796 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1797 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1798 if (entity == NULL) {
1799
1800 /*
1801 * [ WFC: Entity Declared ]
1802 * In a document without any DTD, a document with only an
1803 * internal DTD subset which contains no parameter entity
1804 * references, or a document with "standalone='yes'", ...
1805 * ... The declaration of a parameter entity must precede
1806 * any reference to it...
1807 */
1808 if ((ctxt->standalone == 1) ||
1809 ((ctxt->hasExternalSubset == 0) &&
1810 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001811 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001812 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001813 } else {
1814 /*
1815 * [ VC: Entity Declared ]
1816 * In a document with an external subset or external
1817 * parameter entities with "standalone='no'", ...
1818 * ... The declaration of a parameter entity must precede
1819 * any reference to it...
1820 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001821 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1822 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1823 "PEReference: %%%s; not found\n",
1824 name);
1825 } else
1826 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1827 "PEReference: %%%s; not found\n",
1828 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 ctxt->valid = 0;
1830 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001831 } else if (ctxt->input->free != deallocblankswrapper) {
1832 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1833 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001834 } else {
1835 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1836 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001837 xmlChar start[4];
1838 xmlCharEncoding enc;
1839
Owen Taylor3473f882001-02-23 17:55:21 +00001840 /*
1841 * handle the extra spaces added before and after
1842 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001843 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001844 */
1845 input = xmlNewEntityInputStream(ctxt, entity);
1846 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001847
1848 /*
1849 * Get the 4 first bytes and decode the charset
1850 * if enc != XML_CHAR_ENCODING_NONE
1851 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001852 * Note that, since we may have some non-UTF8
1853 * encoding (like UTF16, bug 135229), the 'length'
1854 * is not known, but we can calculate based upon
1855 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001856 */
1857 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001858 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001859 start[0] = RAW;
1860 start[1] = NXT(1);
1861 start[2] = NXT(2);
1862 start[3] = NXT(3);
1863 enc = xmlDetectCharEncoding(start, 4);
1864 if (enc != XML_CHAR_ENCODING_NONE) {
1865 xmlSwitchEncoding(ctxt, enc);
1866 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001870 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1871 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001872 xmlParseTextDecl(ctxt);
1873 }
Owen Taylor3473f882001-02-23 17:55:21 +00001874 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001875 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1876 "PEReference: %s is not a parameter entity\n",
1877 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001878 }
1879 }
1880 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001881 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001882 }
Owen Taylor3473f882001-02-23 17:55:21 +00001883 }
1884}
1885
1886/*
1887 * Macro used to grow the current buffer.
1888 */
1889#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001890 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001891 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001892 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001893 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001894 if (tmp == NULL) goto mem_error; \
1895 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001896}
1897
1898/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001899 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001900 * @ctxt: the parser context
1901 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001903 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1904 * @end: an end marker xmlChar, 0 if none
1905 * @end2: an end marker xmlChar, 0 if none
1906 * @end3: an end marker xmlChar, 0 if none
1907 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001908 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001909 *
1910 * [67] Reference ::= EntityRef | CharRef
1911 *
1912 * [69] PEReference ::= '%' Name ';'
1913 *
1914 * Returns A newly allocated string with the substitution done. The caller
1915 * must deallocate it !
1916 */
1917xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1919 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001920 xmlChar *buffer = NULL;
1921 int buffer_size = 0;
1922
1923 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 xmlEntityPtr ent;
1926 int c,l;
1927 int nbchars = 0;
1928
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001930 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001932
1933 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001934 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001935 return(NULL);
1936 }
1937
1938 /*
1939 * allocate a translation buffer.
1940 */
1941 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001942 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001943 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001946 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001947 * we are operating on already parsed values.
1948 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001949 if (str < last)
1950 c = CUR_SCHAR(str, l);
1951 else
1952 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001953 while ((c != 0) && (c != end) && /* non input consuming loop */
1954 (c != end2) && (c != end3)) {
1955
1956 if (c == 0) break;
1957 if ((c == '&') && (str[1] == '#')) {
1958 int val = xmlParseStringCharRef(ctxt, &str);
1959 if (val != 0) {
1960 COPY_BUF(0,buffer,nbchars,val);
1961 }
1962 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1963 if (xmlParserDebugEntities)
1964 xmlGenericError(xmlGenericErrorContext,
1965 "String decoding Entity Reference: %.30s\n",
1966 str);
1967 ent = xmlParseStringEntityRef(ctxt, &str);
1968 if ((ent != NULL) &&
1969 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1970 if (ent->content != NULL) {
1971 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1972 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001973 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1974 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001975 }
1976 } else if ((ent != NULL) && (ent->content != NULL)) {
1977 xmlChar *rep;
1978
1979 ctxt->depth++;
1980 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1981 0, 0, 0);
1982 ctxt->depth--;
1983 if (rep != NULL) {
1984 current = rep;
1985 while (*current != 0) { /* non input consuming loop */
1986 buffer[nbchars++] = *current++;
1987 if (nbchars >
1988 buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
1992 xmlFree(rep);
1993 }
1994 } else if (ent != NULL) {
1995 int i = xmlStrlen(ent->name);
1996 const xmlChar *cur = ent->name;
1997
1998 buffer[nbchars++] = '&';
1999 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2000 growBuffer(buffer);
2001 }
2002 for (;i > 0;i--)
2003 buffer[nbchars++] = *cur++;
2004 buffer[nbchars++] = ';';
2005 }
2006 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2007 if (xmlParserDebugEntities)
2008 xmlGenericError(xmlGenericErrorContext,
2009 "String decoding PE Reference: %.30s\n", str);
2010 ent = xmlParseStringPEReference(ctxt, &str);
2011 if (ent != NULL) {
2012 xmlChar *rep;
2013
2014 ctxt->depth++;
2015 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2016 0, 0, 0);
2017 ctxt->depth--;
2018 if (rep != NULL) {
2019 current = rep;
2020 while (*current != 0) { /* non input consuming loop */
2021 buffer[nbchars++] = *current++;
2022 if (nbchars >
2023 buffer_size - XML_PARSER_BUFFER_SIZE) {
2024 growBuffer(buffer);
2025 }
2026 }
2027 xmlFree(rep);
2028 }
2029 }
2030 } else {
2031 COPY_BUF(l,buffer,nbchars,c);
2032 str += l;
2033 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2034 growBuffer(buffer);
2035 }
2036 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002037 if (str < last)
2038 c = CUR_SCHAR(str, l);
2039 else
2040 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002041 }
2042 buffer[nbchars++] = 0;
2043 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002044
2045mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002046 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002047 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002048}
2049
Daniel Veillarde57ec792003-09-10 10:50:59 +00002050/**
2051 * xmlStringDecodeEntities:
2052 * @ctxt: the parser context
2053 * @str: the input string
2054 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2055 * @end: an end marker xmlChar, 0 if none
2056 * @end2: an end marker xmlChar, 0 if none
2057 * @end3: an end marker xmlChar, 0 if none
2058 *
2059 * Takes a entity string content and process to do the adequate substitutions.
2060 *
2061 * [67] Reference ::= EntityRef | CharRef
2062 *
2063 * [69] PEReference ::= '%' Name ';'
2064 *
2065 * Returns A newly allocated string with the substitution done. The caller
2066 * must deallocate it !
2067 */
2068xmlChar *
2069xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2070 xmlChar end, xmlChar end2, xmlChar end3) {
2071 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2072 end, end2, end3));
2073}
Owen Taylor3473f882001-02-23 17:55:21 +00002074
2075/************************************************************************
2076 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002077 * Commodity functions, cleanup needed ? *
2078 * *
2079 ************************************************************************/
2080
2081/**
2082 * areBlanks:
2083 * @ctxt: an XML parser context
2084 * @str: a xmlChar *
2085 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002086 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002087 *
2088 * Is this a sequence of blank chars that one can ignore ?
2089 *
2090 * Returns 1 if ignorable 0 otherwise.
2091 */
2092
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002093static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2094 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002095 int i, ret;
2096 xmlNodePtr lastChild;
2097
Daniel Veillard05c13a22001-09-09 08:38:09 +00002098 /*
2099 * Don't spend time trying to differentiate them, the same callback is
2100 * used !
2101 */
2102 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002103 return(0);
2104
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
2106 * Check for xml:space value.
2107 */
2108 if (*(ctxt->space) == 1)
2109 return(0);
2110
2111 /*
2112 * Check that the string is made of blanks
2113 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002114 if (blank_chars == 0) {
2115 for (i = 0;i < len;i++)
2116 if (!(IS_BLANK_CH(str[i]))) return(0);
2117 }
Owen Taylor3473f882001-02-23 17:55:21 +00002118
2119 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002120 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002121 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002122 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002123 if (ctxt->myDoc != NULL) {
2124 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2125 if (ret == 0) return(1);
2126 if (ret == 1) return(0);
2127 }
2128
2129 /*
2130 * Otherwise, heuristic :-\
2131 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002132 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002133 if ((ctxt->node->children == NULL) &&
2134 (RAW == '<') && (NXT(1) == '/')) return(0);
2135
2136 lastChild = xmlGetLastChild(ctxt->node);
2137 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002138 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2139 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002140 } else if (xmlNodeIsText(lastChild))
2141 return(0);
2142 else if ((ctxt->node->children != NULL) &&
2143 (xmlNodeIsText(ctxt->node->children)))
2144 return(0);
2145 return(1);
2146}
2147
Owen Taylor3473f882001-02-23 17:55:21 +00002148/************************************************************************
2149 * *
2150 * Extra stuff for namespace support *
2151 * Relates to http://www.w3.org/TR/WD-xml-names *
2152 * *
2153 ************************************************************************/
2154
2155/**
2156 * xmlSplitQName:
2157 * @ctxt: an XML parser context
2158 * @name: an XML parser context
2159 * @prefix: a xmlChar **
2160 *
2161 * parse an UTF8 encoded XML qualified name string
2162 *
2163 * [NS 5] QName ::= (Prefix ':')? LocalPart
2164 *
2165 * [NS 6] Prefix ::= NCName
2166 *
2167 * [NS 7] LocalPart ::= NCName
2168 *
2169 * Returns the local part, and prefix is updated
2170 * to get the Prefix if any.
2171 */
2172
2173xmlChar *
2174xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2175 xmlChar buf[XML_MAX_NAMELEN + 5];
2176 xmlChar *buffer = NULL;
2177 int len = 0;
2178 int max = XML_MAX_NAMELEN;
2179 xmlChar *ret = NULL;
2180 const xmlChar *cur = name;
2181 int c;
2182
2183 *prefix = NULL;
2184
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002185 if (cur == NULL) return(NULL);
2186
Owen Taylor3473f882001-02-23 17:55:21 +00002187#ifndef XML_XML_NAMESPACE
2188 /* xml: prefix is not really a namespace */
2189 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2190 (cur[2] == 'l') && (cur[3] == ':'))
2191 return(xmlStrdup(name));
2192#endif
2193
Daniel Veillard597bc482003-07-24 16:08:28 +00002194 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002195 if (cur[0] == ':')
2196 return(xmlStrdup(name));
2197
2198 c = *cur++;
2199 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2200 buf[len++] = c;
2201 c = *cur++;
2202 }
2203 if (len >= max) {
2204 /*
2205 * Okay someone managed to make a huge name, so he's ready to pay
2206 * for the processing speed.
2207 */
2208 max = len * 2;
2209
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002210 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002211 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002212 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002213 return(NULL);
2214 }
2215 memcpy(buffer, buf, len);
2216 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2217 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002218 xmlChar *tmp;
2219
Owen Taylor3473f882001-02-23 17:55:21 +00002220 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002221 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002222 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002223 if (tmp == NULL) {
2224 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 return(NULL);
2227 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002228 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002229 }
2230 buffer[len++] = c;
2231 c = *cur++;
2232 }
2233 buffer[len] = 0;
2234 }
2235
Daniel Veillard597bc482003-07-24 16:08:28 +00002236 /* nasty but well=formed
2237 if ((c == ':') && (*cur == 0)) {
2238 return(xmlStrdup(name));
2239 } */
2240
Owen Taylor3473f882001-02-23 17:55:21 +00002241 if (buffer == NULL)
2242 ret = xmlStrndup(buf, len);
2243 else {
2244 ret = buffer;
2245 buffer = NULL;
2246 max = XML_MAX_NAMELEN;
2247 }
2248
2249
2250 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002251 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002252 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002253 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002254 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002255 }
Owen Taylor3473f882001-02-23 17:55:21 +00002256 len = 0;
2257
Daniel Veillardbb284f42002-10-16 18:02:47 +00002258 /*
2259 * Check that the first character is proper to start
2260 * a new name
2261 */
2262 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2263 ((c >= 0x41) && (c <= 0x5A)) ||
2264 (c == '_') || (c == ':'))) {
2265 int l;
2266 int first = CUR_SCHAR(cur, l);
2267
2268 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002269 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002270 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002271 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 }
2273 }
2274 cur++;
2275
Owen Taylor3473f882001-02-23 17:55:21 +00002276 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2277 buf[len++] = c;
2278 c = *cur++;
2279 }
2280 if (len >= max) {
2281 /*
2282 * Okay someone managed to make a huge name, so he's ready to pay
2283 * for the processing speed.
2284 */
2285 max = len * 2;
2286
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002287 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002288 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002289 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002290 return(NULL);
2291 }
2292 memcpy(buffer, buf, len);
2293 while (c != 0) { /* tested bigname2.xml */
2294 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002295 xmlChar *tmp;
2296
Owen Taylor3473f882001-02-23 17:55:21 +00002297 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002298 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002299 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002300 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002301 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002302 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002303 return(NULL);
2304 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002305 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002306 }
2307 buffer[len++] = c;
2308 c = *cur++;
2309 }
2310 buffer[len] = 0;
2311 }
2312
2313 if (buffer == NULL)
2314 ret = xmlStrndup(buf, len);
2315 else {
2316 ret = buffer;
2317 }
2318 }
2319
2320 return(ret);
2321}
2322
2323/************************************************************************
2324 * *
2325 * The parser itself *
2326 * Relates to http://www.w3.org/TR/REC-xml *
2327 * *
2328 ************************************************************************/
2329
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002330static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002332 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002333
Owen Taylor3473f882001-02-23 17:55:21 +00002334/**
2335 * xmlParseName:
2336 * @ctxt: an XML parser context
2337 *
2338 * parse an XML name.
2339 *
2340 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2341 * CombiningChar | Extender
2342 *
2343 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2344 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002345 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002346 *
2347 * Returns the Name parsed or NULL
2348 */
2349
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002350const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002351xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002352 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002353 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002354 int count = 0;
2355
2356 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002357
2358 /*
2359 * Accelerator for simple ASCII names
2360 */
2361 in = ctxt->input->cur;
2362 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2363 ((*in >= 0x41) && (*in <= 0x5A)) ||
2364 (*in == '_') || (*in == ':')) {
2365 in++;
2366 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2367 ((*in >= 0x41) && (*in <= 0x5A)) ||
2368 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002369 (*in == '_') || (*in == '-') ||
2370 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002372 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002373 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002374 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002375 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002376 ctxt->nbChars += count;
2377 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002378 if (ret == NULL)
2379 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002380 return(ret);
2381 }
2382 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002383 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002384}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385
Daniel Veillard46de64e2002-05-29 08:21:33 +00002386/**
2387 * xmlParseNameAndCompare:
2388 * @ctxt: an XML parser context
2389 *
2390 * parse an XML name and compares for match
2391 * (specialized for endtag parsing)
2392 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002393 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2394 * and the name for mismatch
2395 */
2396
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002397static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002398xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002399 register const xmlChar *cmp = other;
2400 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002401 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002402
2403 GROW;
2404
2405 in = ctxt->input->cur;
2406 while (*in != 0 && *in == *cmp) {
2407 ++in;
2408 ++cmp;
2409 }
William M. Brack76e95df2003-10-18 16:20:14 +00002410 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002411 /* success */
2412 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002413 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002414 }
2415 /* failure (or end of input buffer), check with full function */
2416 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002417 /* strings coming from the dictionnary direct compare possible */
2418 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002419 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002420 }
2421 return ret;
2422}
2423
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002424static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002425xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002426 int len = 0, l;
2427 int c;
2428 int count = 0;
2429
2430 /*
2431 * Handler for more complex cases
2432 */
2433 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002434 c = CUR_CHAR(l);
2435 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2436 (!IS_LETTER(c) && (c != '_') &&
2437 (c != ':'))) {
2438 return(NULL);
2439 }
2440
2441 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002442 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002443 (c == '.') || (c == '-') ||
2444 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002445 (IS_COMBINING(c)) ||
2446 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002447 if (count++ > 100) {
2448 count = 0;
2449 GROW;
2450 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002451 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002452 NEXTL(l);
2453 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002454 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002455 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002456}
2457
2458/**
2459 * xmlParseStringName:
2460 * @ctxt: an XML parser context
2461 * @str: a pointer to the string pointer (IN/OUT)
2462 *
2463 * parse an XML name.
2464 *
2465 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2466 * CombiningChar | Extender
2467 *
2468 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2469 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002470 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002471 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002472 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002473 * is updated to the current location in the string.
2474 */
2475
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002476static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002477xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2478 xmlChar buf[XML_MAX_NAMELEN + 5];
2479 const xmlChar *cur = *str;
2480 int len = 0, l;
2481 int c;
2482
2483 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002484 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002485 (c != ':')) {
2486 return(NULL);
2487 }
2488
William M. Brack871611b2003-10-18 04:53:14 +00002489 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002490 (c == '.') || (c == '-') ||
2491 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002492 (IS_COMBINING(c)) ||
2493 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002494 COPY_BUF(l,buf,len,c);
2495 cur += l;
2496 c = CUR_SCHAR(cur, l);
2497 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2498 /*
2499 * Okay someone managed to make a huge name, so he's ready to pay
2500 * for the processing speed.
2501 */
2502 xmlChar *buffer;
2503 int max = len * 2;
2504
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002505 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002506 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002507 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002508 return(NULL);
2509 }
2510 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002511 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002512 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002513 (c == '.') || (c == '-') ||
2514 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002515 (IS_COMBINING(c)) ||
2516 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002517 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002518 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002519 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002520 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002521 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002523 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002525 return(NULL);
2526 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002527 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002528 }
2529 COPY_BUF(l,buffer,len,c);
2530 cur += l;
2531 c = CUR_SCHAR(cur, l);
2532 }
2533 buffer[len] = 0;
2534 *str = cur;
2535 return(buffer);
2536 }
2537 }
2538 *str = cur;
2539 return(xmlStrndup(buf, len));
2540}
2541
2542/**
2543 * xmlParseNmtoken:
2544 * @ctxt: an XML parser context
2545 *
2546 * parse an XML Nmtoken.
2547 *
2548 * [7] Nmtoken ::= (NameChar)+
2549 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002550 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002551 *
2552 * Returns the Nmtoken parsed or NULL
2553 */
2554
2555xmlChar *
2556xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2557 xmlChar buf[XML_MAX_NAMELEN + 5];
2558 int len = 0, l;
2559 int c;
2560 int count = 0;
2561
2562 GROW;
2563 c = CUR_CHAR(l);
2564
William M. Brack871611b2003-10-18 04:53:14 +00002565 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002566 (c == '.') || (c == '-') ||
2567 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002568 (IS_COMBINING(c)) ||
2569 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002570 if (count++ > 100) {
2571 count = 0;
2572 GROW;
2573 }
2574 COPY_BUF(l,buf,len,c);
2575 NEXTL(l);
2576 c = CUR_CHAR(l);
2577 if (len >= XML_MAX_NAMELEN) {
2578 /*
2579 * Okay someone managed to make a huge token, so he's ready to pay
2580 * for the processing speed.
2581 */
2582 xmlChar *buffer;
2583 int max = len * 2;
2584
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002585 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002586 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002587 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 return(NULL);
2589 }
2590 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002591 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002592 (c == '.') || (c == '-') ||
2593 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002594 (IS_COMBINING(c)) ||
2595 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002596 if (count++ > 100) {
2597 count = 0;
2598 GROW;
2599 }
2600 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002601 xmlChar *tmp;
2602
Owen Taylor3473f882001-02-23 17:55:21 +00002603 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002604 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002605 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002606 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002607 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002608 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002609 return(NULL);
2610 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002611 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 COPY_BUF(l,buffer,len,c);
2614 NEXTL(l);
2615 c = CUR_CHAR(l);
2616 }
2617 buffer[len] = 0;
2618 return(buffer);
2619 }
2620 }
2621 if (len == 0)
2622 return(NULL);
2623 return(xmlStrndup(buf, len));
2624}
2625
2626/**
2627 * xmlParseEntityValue:
2628 * @ctxt: an XML parser context
2629 * @orig: if non-NULL store a copy of the original entity value
2630 *
2631 * parse a value for ENTITY declarations
2632 *
2633 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2634 * "'" ([^%&'] | PEReference | Reference)* "'"
2635 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002636 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002637 */
2638
2639xmlChar *
2640xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2641 xmlChar *buf = NULL;
2642 int len = 0;
2643 int size = XML_PARSER_BUFFER_SIZE;
2644 int c, l;
2645 xmlChar stop;
2646 xmlChar *ret = NULL;
2647 const xmlChar *cur = NULL;
2648 xmlParserInputPtr input;
2649
2650 if (RAW == '"') stop = '"';
2651 else if (RAW == '\'') stop = '\'';
2652 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002653 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002654 return(NULL);
2655 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002656 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002657 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002658 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002659 return(NULL);
2660 }
2661
2662 /*
2663 * The content of the entity definition is copied in a buffer.
2664 */
2665
2666 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2667 input = ctxt->input;
2668 GROW;
2669 NEXT;
2670 c = CUR_CHAR(l);
2671 /*
2672 * NOTE: 4.4.5 Included in Literal
2673 * When a parameter entity reference appears in a literal entity
2674 * value, ... a single or double quote character in the replacement
2675 * text is always treated as a normal data character and will not
2676 * terminate the literal.
2677 * In practice it means we stop the loop only when back at parsing
2678 * the initial entity and the quote is found
2679 */
William M. Brack871611b2003-10-18 04:53:14 +00002680 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002681 (ctxt->input != input))) {
2682 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002683 xmlChar *tmp;
2684
Owen Taylor3473f882001-02-23 17:55:21 +00002685 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002686 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2687 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002688 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002689 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002690 return(NULL);
2691 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002692 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 }
2694 COPY_BUF(l,buf,len,c);
2695 NEXTL(l);
2696 /*
2697 * Pop-up of finished entities.
2698 */
2699 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2700 xmlPopInput(ctxt);
2701
2702 GROW;
2703 c = CUR_CHAR(l);
2704 if (c == 0) {
2705 GROW;
2706 c = CUR_CHAR(l);
2707 }
2708 }
2709 buf[len] = 0;
2710
2711 /*
2712 * Raise problem w.r.t. '&' and '%' being used in non-entities
2713 * reference constructs. Note Charref will be handled in
2714 * xmlStringDecodeEntities()
2715 */
2716 cur = buf;
2717 while (*cur != 0) { /* non input consuming */
2718 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2719 xmlChar *name;
2720 xmlChar tmp = *cur;
2721
2722 cur++;
2723 name = xmlParseStringName(ctxt, &cur);
2724 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002725 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002726 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002727 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002728 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002729 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2730 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002731 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002732 }
2733 if (name != NULL)
2734 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002735 if (*cur == 0)
2736 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002737 }
2738 cur++;
2739 }
2740
2741 /*
2742 * Then PEReference entities are substituted.
2743 */
2744 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002745 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002746 xmlFree(buf);
2747 } else {
2748 NEXT;
2749 /*
2750 * NOTE: 4.4.7 Bypassed
2751 * When a general entity reference appears in the EntityValue in
2752 * an entity declaration, it is bypassed and left as is.
2753 * so XML_SUBSTITUTE_REF is not set here.
2754 */
2755 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2756 0, 0, 0);
2757 if (orig != NULL)
2758 *orig = buf;
2759 else
2760 xmlFree(buf);
2761 }
2762
2763 return(ret);
2764}
2765
2766/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002767 * xmlParseAttValueComplex:
2768 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002769 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002770 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002771 *
2772 * parse a value for an attribute, this is the fallback function
2773 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002774 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002775 *
2776 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2777 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002778static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002779xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002780 xmlChar limit = 0;
2781 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002782 int len = 0;
2783 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002784 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002785 xmlChar *current = NULL;
2786 xmlEntityPtr ent;
2787
Owen Taylor3473f882001-02-23 17:55:21 +00002788 if (NXT(0) == '"') {
2789 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2790 limit = '"';
2791 NEXT;
2792 } else if (NXT(0) == '\'') {
2793 limit = '\'';
2794 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2795 NEXT;
2796 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800
2801 /*
2802 * allocate a translation buffer.
2803 */
2804 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002805 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002806 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002807
2808 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002809 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002810 */
2811 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002812 while ((NXT(0) != limit) && /* checked */
2813 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002814 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002815 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002816 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002817 if (NXT(1) == '#') {
2818 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002819
Owen Taylor3473f882001-02-23 17:55:21 +00002820 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002821 if (ctxt->replaceEntities) {
2822 if (len > buf_size - 10) {
2823 growBuffer(buf);
2824 }
2825 buf[len++] = '&';
2826 } else {
2827 /*
2828 * The reparsing will be done in xmlStringGetNodeList()
2829 * called by the attribute() function in SAX.c
2830 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002831 if (len > buf_size - 10) {
2832 growBuffer(buf);
2833 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834 buf[len++] = '&';
2835 buf[len++] = '#';
2836 buf[len++] = '3';
2837 buf[len++] = '8';
2838 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002839 }
2840 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002841 if (len > buf_size - 10) {
2842 growBuffer(buf);
2843 }
Owen Taylor3473f882001-02-23 17:55:21 +00002844 len += xmlCopyChar(0, &buf[len], val);
2845 }
2846 } else {
2847 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002848 if ((ent != NULL) &&
2849 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2850 if (len > buf_size - 10) {
2851 growBuffer(buf);
2852 }
2853 if ((ctxt->replaceEntities == 0) &&
2854 (ent->content[0] == '&')) {
2855 buf[len++] = '&';
2856 buf[len++] = '#';
2857 buf[len++] = '3';
2858 buf[len++] = '8';
2859 buf[len++] = ';';
2860 } else {
2861 buf[len++] = ent->content[0];
2862 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002863 } else if ((ent != NULL) &&
2864 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002865 xmlChar *rep;
2866
2867 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2868 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002869 XML_SUBSTITUTE_REF,
2870 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002871 if (rep != NULL) {
2872 current = rep;
2873 while (*current != 0) { /* non input consuming */
2874 buf[len++] = *current++;
2875 if (len > buf_size - 10) {
2876 growBuffer(buf);
2877 }
2878 }
2879 xmlFree(rep);
2880 }
2881 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002882 if (len > buf_size - 10) {
2883 growBuffer(buf);
2884 }
Owen Taylor3473f882001-02-23 17:55:21 +00002885 if (ent->content != NULL)
2886 buf[len++] = ent->content[0];
2887 }
2888 } else if (ent != NULL) {
2889 int i = xmlStrlen(ent->name);
2890 const xmlChar *cur = ent->name;
2891
2892 /*
2893 * This may look absurd but is needed to detect
2894 * entities problems
2895 */
2896 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2897 (ent->content != NULL)) {
2898 xmlChar *rep;
2899 rep = xmlStringDecodeEntities(ctxt, ent->content,
2900 XML_SUBSTITUTE_REF, 0, 0, 0);
2901 if (rep != NULL)
2902 xmlFree(rep);
2903 }
2904
2905 /*
2906 * Just output the reference
2907 */
2908 buf[len++] = '&';
2909 if (len > buf_size - i - 10) {
2910 growBuffer(buf);
2911 }
2912 for (;i > 0;i--)
2913 buf[len++] = *cur++;
2914 buf[len++] = ';';
2915 }
2916 }
2917 } else {
2918 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002919 if ((len != 0) || (!normalize)) {
2920 if ((!normalize) || (!in_space)) {
2921 COPY_BUF(l,buf,len,0x20);
2922 if (len > buf_size - 10) {
2923 growBuffer(buf);
2924 }
2925 }
2926 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002927 }
2928 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002929 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002930 COPY_BUF(l,buf,len,c);
2931 if (len > buf_size - 10) {
2932 growBuffer(buf);
2933 }
2934 }
2935 NEXTL(l);
2936 }
2937 GROW;
2938 c = CUR_CHAR(l);
2939 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002940 if ((in_space) && (normalize)) {
2941 while (buf[len - 1] == 0x20) len--;
2942 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002943 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002944 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002945 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002946 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002947 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2948 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002949 } else
2950 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002951 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002952 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002953
2954mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002955 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002956 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002957}
2958
2959/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002960 * xmlParseAttValue:
2961 * @ctxt: an XML parser context
2962 *
2963 * parse a value for an attribute
2964 * Note: the parser won't do substitution of entities here, this
2965 * will be handled later in xmlStringGetNodeList
2966 *
2967 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2968 * "'" ([^<&'] | Reference)* "'"
2969 *
2970 * 3.3.3 Attribute-Value Normalization:
2971 * Before the value of an attribute is passed to the application or
2972 * checked for validity, the XML processor must normalize it as follows:
2973 * - a character reference is processed by appending the referenced
2974 * character to the attribute value
2975 * - an entity reference is processed by recursively processing the
2976 * replacement text of the entity
2977 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2978 * appending #x20 to the normalized value, except that only a single
2979 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2980 * parsed entity or the literal entity value of an internal parsed entity
2981 * - other characters are processed by appending them to the normalized value
2982 * If the declared value is not CDATA, then the XML processor must further
2983 * process the normalized attribute value by discarding any leading and
2984 * trailing space (#x20) characters, and by replacing sequences of space
2985 * (#x20) characters by a single space (#x20) character.
2986 * All attributes for which no declaration has been read should be treated
2987 * by a non-validating parser as if declared CDATA.
2988 *
2989 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2990 */
2991
2992
2993xmlChar *
2994xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002995 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002996}
2997
2998/**
Owen Taylor3473f882001-02-23 17:55:21 +00002999 * xmlParseSystemLiteral:
3000 * @ctxt: an XML parser context
3001 *
3002 * parse an XML Literal
3003 *
3004 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3005 *
3006 * Returns the SystemLiteral parsed or NULL
3007 */
3008
3009xmlChar *
3010xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3011 xmlChar *buf = NULL;
3012 int len = 0;
3013 int size = XML_PARSER_BUFFER_SIZE;
3014 int cur, l;
3015 xmlChar stop;
3016 int state = ctxt->instate;
3017 int count = 0;
3018
3019 SHRINK;
3020 if (RAW == '"') {
3021 NEXT;
3022 stop = '"';
3023 } else if (RAW == '\'') {
3024 NEXT;
3025 stop = '\'';
3026 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003027 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003028 return(NULL);
3029 }
3030
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003031 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003032 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003033 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003034 return(NULL);
3035 }
3036 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3037 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003038 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003039 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003040 xmlChar *tmp;
3041
Owen Taylor3473f882001-02-23 17:55:21 +00003042 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003043 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3044 if (tmp == NULL) {
3045 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003046 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003047 ctxt->instate = (xmlParserInputState) state;
3048 return(NULL);
3049 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003050 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003051 }
3052 count++;
3053 if (count > 50) {
3054 GROW;
3055 count = 0;
3056 }
3057 COPY_BUF(l,buf,len,cur);
3058 NEXTL(l);
3059 cur = CUR_CHAR(l);
3060 if (cur == 0) {
3061 GROW;
3062 SHRINK;
3063 cur = CUR_CHAR(l);
3064 }
3065 }
3066 buf[len] = 0;
3067 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003068 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003069 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003070 } else {
3071 NEXT;
3072 }
3073 return(buf);
3074}
3075
3076/**
3077 * xmlParsePubidLiteral:
3078 * @ctxt: an XML parser context
3079 *
3080 * parse an XML public literal
3081 *
3082 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3083 *
3084 * Returns the PubidLiteral parsed or NULL.
3085 */
3086
3087xmlChar *
3088xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3089 xmlChar *buf = NULL;
3090 int len = 0;
3091 int size = XML_PARSER_BUFFER_SIZE;
3092 xmlChar cur;
3093 xmlChar stop;
3094 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003095 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003096
3097 SHRINK;
3098 if (RAW == '"') {
3099 NEXT;
3100 stop = '"';
3101 } else if (RAW == '\'') {
3102 NEXT;
3103 stop = '\'';
3104 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003105 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003106 return(NULL);
3107 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003108 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003109 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003110 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003111 return(NULL);
3112 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003113 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003114 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003115 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003116 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 xmlChar *tmp;
3118
Owen Taylor3473f882001-02-23 17:55:21 +00003119 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003120 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3121 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003122 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003123 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003126 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 }
3128 buf[len++] = cur;
3129 count++;
3130 if (count > 50) {
3131 GROW;
3132 count = 0;
3133 }
3134 NEXT;
3135 cur = CUR;
3136 if (cur == 0) {
3137 GROW;
3138 SHRINK;
3139 cur = CUR;
3140 }
3141 }
3142 buf[len] = 0;
3143 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003144 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003145 } else {
3146 NEXT;
3147 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003148 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003149 return(buf);
3150}
3151
Daniel Veillard48b2f892001-02-25 16:11:03 +00003152void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003153/**
3154 * xmlParseCharData:
3155 * @ctxt: an XML parser context
3156 * @cdata: int indicating whether we are within a CDATA section
3157 *
3158 * parse a CharData section.
3159 * if we are within a CDATA section ']]>' marks an end of section.
3160 *
3161 * The right angle bracket (>) may be represented using the string "&gt;",
3162 * and must, for compatibility, be escaped using "&gt;" or a character
3163 * reference when it appears in the string "]]>" in content, when that
3164 * string is not marking the end of a CDATA section.
3165 *
3166 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3167 */
3168
3169void
3170xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003171 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003172 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003173 int line = ctxt->input->line;
3174 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003175
3176 SHRINK;
3177 GROW;
3178 /*
3179 * Accelerated common case where input don't need to be
3180 * modified before passing it to the handler.
3181 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003182 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003183 in = ctxt->input->cur;
3184 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003185get_more_space:
3186 while (*in == 0x20) in++;
3187 if (*in == 0xA) {
3188 ctxt->input->line++;
3189 in++;
3190 while (*in == 0xA) {
3191 ctxt->input->line++;
3192 in++;
3193 }
3194 goto get_more_space;
3195 }
3196 if (*in == '<') {
3197 nbchar = in - ctxt->input->cur;
3198 if (nbchar > 0) {
3199 const xmlChar *tmp = ctxt->input->cur;
3200 ctxt->input->cur = in;
3201
3202 if (ctxt->sax->ignorableWhitespace !=
3203 ctxt->sax->characters) {
3204 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3205 ctxt->sax->ignorableWhitespace(ctxt->userData,
3206 tmp, nbchar);
3207 } else if (ctxt->sax->characters != NULL)
3208 ctxt->sax->characters(ctxt->userData,
3209 tmp, nbchar);
3210 } else if (ctxt->sax->characters != NULL) {
3211 ctxt->sax->characters(ctxt->userData,
3212 tmp, nbchar);
3213 }
3214 }
3215 return;
3216 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003217get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003218 while (((*in > ']') && (*in <= 0x7F)) ||
3219 ((*in > '&') && (*in < '<')) ||
3220 ((*in > '<') && (*in < ']')) ||
3221 ((*in >= 0x20) && (*in < '&')) ||
3222 (*in == 0x09))
3223 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003224 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003225 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003226 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003227 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003228 ctxt->input->line++;
3229 in++;
3230 }
3231 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003232 }
3233 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003234 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003235 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003236 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003237 return;
3238 }
3239 in++;
3240 goto get_more;
3241 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003242 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003243 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003244 if ((ctxt->sax->ignorableWhitespace !=
3245 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003246 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003247 const xmlChar *tmp = ctxt->input->cur;
3248 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003249
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003250 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003251 ctxt->sax->ignorableWhitespace(ctxt->userData,
3252 tmp, nbchar);
3253 } else if (ctxt->sax->characters != NULL)
3254 ctxt->sax->characters(ctxt->userData,
3255 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003256 line = ctxt->input->line;
3257 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003258 } else {
3259 if (ctxt->sax->characters != NULL)
3260 ctxt->sax->characters(ctxt->userData,
3261 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003262 line = ctxt->input->line;
3263 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003264 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003265 }
3266 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003267 if (*in == 0xD) {
3268 in++;
3269 if (*in == 0xA) {
3270 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003271 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003272 ctxt->input->line++;
3273 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003274 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003275 in--;
3276 }
3277 if (*in == '<') {
3278 return;
3279 }
3280 if (*in == '&') {
3281 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003282 }
3283 SHRINK;
3284 GROW;
3285 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003286 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287 nbchar = 0;
3288 }
Daniel Veillard50582112001-03-26 22:52:16 +00003289 ctxt->input->line = line;
3290 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003291 xmlParseCharDataComplex(ctxt, cdata);
3292}
3293
Daniel Veillard01c13b52002-12-10 15:19:08 +00003294/**
3295 * xmlParseCharDataComplex:
3296 * @ctxt: an XML parser context
3297 * @cdata: int indicating whether we are within a CDATA section
3298 *
3299 * parse a CharData section.this is the fallback function
3300 * of xmlParseCharData() when the parsing requires handling
3301 * of non-ASCII characters.
3302 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003303void
3304xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003305 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3306 int nbchar = 0;
3307 int cur, l;
3308 int count = 0;
3309
3310 SHRINK;
3311 GROW;
3312 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003313 while ((cur != '<') && /* checked */
3314 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003315 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003316 if ((cur == ']') && (NXT(1) == ']') &&
3317 (NXT(2) == '>')) {
3318 if (cdata) break;
3319 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003320 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003321 }
3322 }
3323 COPY_BUF(l,buf,nbchar,cur);
3324 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003325 buf[nbchar] = 0;
3326
Owen Taylor3473f882001-02-23 17:55:21 +00003327 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003328 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003329 */
3330 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003331 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 if (ctxt->sax->ignorableWhitespace != NULL)
3333 ctxt->sax->ignorableWhitespace(ctxt->userData,
3334 buf, nbchar);
3335 } else {
3336 if (ctxt->sax->characters != NULL)
3337 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3338 }
3339 }
3340 nbchar = 0;
3341 }
3342 count++;
3343 if (count > 50) {
3344 GROW;
3345 count = 0;
3346 }
3347 NEXTL(l);
3348 cur = CUR_CHAR(l);
3349 }
3350 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003351 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003352 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003353 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003354 */
3355 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003356 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003357 if (ctxt->sax->ignorableWhitespace != NULL)
3358 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3359 } else {
3360 if (ctxt->sax->characters != NULL)
3361 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3362 }
3363 }
3364 }
3365}
3366
3367/**
3368 * xmlParseExternalID:
3369 * @ctxt: an XML parser context
3370 * @publicID: a xmlChar** receiving PubidLiteral
3371 * @strict: indicate whether we should restrict parsing to only
3372 * production [75], see NOTE below
3373 *
3374 * Parse an External ID or a Public ID
3375 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003376 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003377 * 'PUBLIC' S PubidLiteral S SystemLiteral
3378 *
3379 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3380 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3381 *
3382 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3383 *
3384 * Returns the function returns SystemLiteral and in the second
3385 * case publicID receives PubidLiteral, is strict is off
3386 * it is possible to return NULL and have publicID set.
3387 */
3388
3389xmlChar *
3390xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3391 xmlChar *URI = NULL;
3392
3393 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003394
3395 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003396 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003397 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003398 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3400 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003401 }
3402 SKIP_BLANKS;
3403 URI = xmlParseSystemLiteral(ctxt);
3404 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003405 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003406 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003407 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003408 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003409 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003411 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003412 }
3413 SKIP_BLANKS;
3414 *publicID = xmlParsePubidLiteral(ctxt);
3415 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003416 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003417 }
3418 if (strict) {
3419 /*
3420 * We don't handle [83] so "S SystemLiteral" is required.
3421 */
William M. Brack76e95df2003-10-18 16:20:14 +00003422 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003424 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003425 }
3426 } else {
3427 /*
3428 * We handle [83] so we return immediately, if
3429 * "S SystemLiteral" is not detected. From a purely parsing
3430 * point of view that's a nice mess.
3431 */
3432 const xmlChar *ptr;
3433 GROW;
3434
3435 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003436 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003437
William M. Brack76e95df2003-10-18 16:20:14 +00003438 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003439 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3440 }
3441 SKIP_BLANKS;
3442 URI = xmlParseSystemLiteral(ctxt);
3443 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003444 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003445 }
3446 }
3447 return(URI);
3448}
3449
3450/**
3451 * xmlParseComment:
3452 * @ctxt: an XML parser context
3453 *
3454 * Skip an XML (SGML) comment <!-- .... -->
3455 * The spec says that "For compatibility, the string "--" (double-hyphen)
3456 * must not occur within comments. "
3457 *
3458 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3459 */
3460void
3461xmlParseComment(xmlParserCtxtPtr ctxt) {
3462 xmlChar *buf = NULL;
3463 int len;
3464 int size = XML_PARSER_BUFFER_SIZE;
3465 int q, ql;
3466 int r, rl;
3467 int cur, l;
3468 xmlParserInputState state;
3469 xmlParserInputPtr input = ctxt->input;
3470 int count = 0;
3471
3472 /*
3473 * Check that there is a comment right here.
3474 */
3475 if ((RAW != '<') || (NXT(1) != '!') ||
3476 (NXT(2) != '-') || (NXT(3) != '-')) return;
3477
3478 state = ctxt->instate;
3479 ctxt->instate = XML_PARSER_COMMENT;
3480 SHRINK;
3481 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003482 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003483 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003485 ctxt->instate = state;
3486 return;
3487 }
3488 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003489 if (q == 0)
3490 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003491 NEXTL(ql);
3492 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003493 if (r == 0)
3494 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 NEXTL(rl);
3496 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003497 if (cur == 0)
3498 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003500 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003501 ((cur != '>') ||
3502 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003503 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003504 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 }
3506 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003507 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003509 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3510 if (new_buf == NULL) {
3511 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003512 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 ctxt->instate = state;
3514 return;
3515 }
William M. Bracka3215c72004-07-31 16:24:01 +00003516 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003517 }
3518 COPY_BUF(ql,buf,len,q);
3519 q = r;
3520 ql = rl;
3521 r = cur;
3522 rl = l;
3523
3524 count++;
3525 if (count > 50) {
3526 GROW;
3527 count = 0;
3528 }
3529 NEXTL(l);
3530 cur = CUR_CHAR(l);
3531 if (cur == 0) {
3532 SHRINK;
3533 GROW;
3534 cur = CUR_CHAR(l);
3535 }
3536 }
3537 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003538 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003539 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003540 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003541 xmlFree(buf);
3542 } else {
3543 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003544 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3545 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003546 }
3547 NEXT;
3548 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3549 (!ctxt->disableSAX))
3550 ctxt->sax->comment(ctxt->userData, buf);
3551 xmlFree(buf);
3552 }
3553 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003554 return;
3555not_terminated:
3556 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3557 "Comment not terminated\n", NULL);
3558 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003559}
3560
3561/**
3562 * xmlParsePITarget:
3563 * @ctxt: an XML parser context
3564 *
3565 * parse the name of a PI
3566 *
3567 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3568 *
3569 * Returns the PITarget name or NULL
3570 */
3571
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003572const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003573xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003574 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003575
3576 name = xmlParseName(ctxt);
3577 if ((name != NULL) &&
3578 ((name[0] == 'x') || (name[0] == 'X')) &&
3579 ((name[1] == 'm') || (name[1] == 'M')) &&
3580 ((name[2] == 'l') || (name[2] == 'L'))) {
3581 int i;
3582 if ((name[0] == 'x') && (name[1] == 'm') &&
3583 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003584 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003585 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003586 return(name);
3587 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003588 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003589 return(name);
3590 }
3591 for (i = 0;;i++) {
3592 if (xmlW3CPIs[i] == NULL) break;
3593 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3594 return(name);
3595 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003596 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3597 "xmlParsePITarget: invalid name prefix 'xml'\n",
3598 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003599 }
3600 return(name);
3601}
3602
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003603#ifdef LIBXML_CATALOG_ENABLED
3604/**
3605 * xmlParseCatalogPI:
3606 * @ctxt: an XML parser context
3607 * @catalog: the PI value string
3608 *
3609 * parse an XML Catalog Processing Instruction.
3610 *
3611 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3612 *
3613 * Occurs only if allowed by the user and if happening in the Misc
3614 * part of the document before any doctype informations
3615 * This will add the given catalog to the parsing context in order
3616 * to be used if there is a resolution need further down in the document
3617 */
3618
3619static void
3620xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3621 xmlChar *URL = NULL;
3622 const xmlChar *tmp, *base;
3623 xmlChar marker;
3624
3625 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003626 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003627 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3628 goto error;
3629 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003630 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003631 if (*tmp != '=') {
3632 return;
3633 }
3634 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003635 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003636 marker = *tmp;
3637 if ((marker != '\'') && (marker != '"'))
3638 goto error;
3639 tmp++;
3640 base = tmp;
3641 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3642 if (*tmp == 0)
3643 goto error;
3644 URL = xmlStrndup(base, tmp - base);
3645 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003646 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003647 if (*tmp != 0)
3648 goto error;
3649
3650 if (URL != NULL) {
3651 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3652 xmlFree(URL);
3653 }
3654 return;
3655
3656error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003657 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3658 "Catalog PI syntax error: %s\n",
3659 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003660 if (URL != NULL)
3661 xmlFree(URL);
3662}
3663#endif
3664
Owen Taylor3473f882001-02-23 17:55:21 +00003665/**
3666 * xmlParsePI:
3667 * @ctxt: an XML parser context
3668 *
3669 * parse an XML Processing Instruction.
3670 *
3671 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3672 *
3673 * The processing is transfered to SAX once parsed.
3674 */
3675
3676void
3677xmlParsePI(xmlParserCtxtPtr ctxt) {
3678 xmlChar *buf = NULL;
3679 int len = 0;
3680 int size = XML_PARSER_BUFFER_SIZE;
3681 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003682 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003683 xmlParserInputState state;
3684 int count = 0;
3685
3686 if ((RAW == '<') && (NXT(1) == '?')) {
3687 xmlParserInputPtr input = ctxt->input;
3688 state = ctxt->instate;
3689 ctxt->instate = XML_PARSER_PI;
3690 /*
3691 * this is a Processing Instruction.
3692 */
3693 SKIP(2);
3694 SHRINK;
3695
3696 /*
3697 * Parse the target name and check for special support like
3698 * namespace.
3699 */
3700 target = xmlParsePITarget(ctxt);
3701 if (target != NULL) {
3702 if ((RAW == '?') && (NXT(1) == '>')) {
3703 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003704 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3705 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003706 }
3707 SKIP(2);
3708
3709 /*
3710 * SAX: PI detected.
3711 */
3712 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3713 (ctxt->sax->processingInstruction != NULL))
3714 ctxt->sax->processingInstruction(ctxt->userData,
3715 target, NULL);
3716 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003717 return;
3718 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003719 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003721 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003722 ctxt->instate = state;
3723 return;
3724 }
3725 cur = CUR;
3726 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003727 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3728 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 SKIP_BLANKS;
3731 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003732 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003733 ((cur != '?') || (NXT(1) != '>'))) {
3734 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003735 xmlChar *tmp;
3736
Owen Taylor3473f882001-02-23 17:55:21 +00003737 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003738 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3739 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003740 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003741 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 ctxt->instate = state;
3743 return;
3744 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003745 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 count++;
3748 if (count > 50) {
3749 GROW;
3750 count = 0;
3751 }
3752 COPY_BUF(l,buf,len,cur);
3753 NEXTL(l);
3754 cur = CUR_CHAR(l);
3755 if (cur == 0) {
3756 SHRINK;
3757 GROW;
3758 cur = CUR_CHAR(l);
3759 }
3760 }
3761 buf[len] = 0;
3762 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003763 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3764 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 } else {
3766 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3768 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 SKIP(2);
3771
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003772#ifdef LIBXML_CATALOG_ENABLED
3773 if (((state == XML_PARSER_MISC) ||
3774 (state == XML_PARSER_START)) &&
3775 (xmlStrEqual(target, XML_CATALOG_PI))) {
3776 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3777 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3778 (allow == XML_CATA_ALLOW_ALL))
3779 xmlParseCatalogPI(ctxt, buf);
3780 }
3781#endif
3782
3783
Owen Taylor3473f882001-02-23 17:55:21 +00003784 /*
3785 * SAX: PI detected.
3786 */
3787 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3788 (ctxt->sax->processingInstruction != NULL))
3789 ctxt->sax->processingInstruction(ctxt->userData,
3790 target, buf);
3791 }
3792 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003793 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003794 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003795 }
3796 ctxt->instate = state;
3797 }
3798}
3799
3800/**
3801 * xmlParseNotationDecl:
3802 * @ctxt: an XML parser context
3803 *
3804 * parse a notation declaration
3805 *
3806 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3807 *
3808 * Hence there is actually 3 choices:
3809 * 'PUBLIC' S PubidLiteral
3810 * 'PUBLIC' S PubidLiteral S SystemLiteral
3811 * and 'SYSTEM' S SystemLiteral
3812 *
3813 * See the NOTE on xmlParseExternalID().
3814 */
3815
3816void
3817xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003818 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 xmlChar *Pubid;
3820 xmlChar *Systemid;
3821
Daniel Veillarda07050d2003-10-19 14:46:32 +00003822 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003823 xmlParserInputPtr input = ctxt->input;
3824 SHRINK;
3825 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003826 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003827 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3828 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003829 return;
3830 }
3831 SKIP_BLANKS;
3832
Daniel Veillard76d66f42001-05-16 21:05:17 +00003833 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003834 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003835 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return;
3837 }
William M. Brack76e95df2003-10-18 16:20:14 +00003838 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003839 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003840 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003841 return;
3842 }
3843 SKIP_BLANKS;
3844
3845 /*
3846 * Parse the IDs.
3847 */
3848 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3849 SKIP_BLANKS;
3850
3851 if (RAW == '>') {
3852 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003853 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3854 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003855 }
3856 NEXT;
3857 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3858 (ctxt->sax->notationDecl != NULL))
3859 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3860 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003861 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003862 }
Owen Taylor3473f882001-02-23 17:55:21 +00003863 if (Systemid != NULL) xmlFree(Systemid);
3864 if (Pubid != NULL) xmlFree(Pubid);
3865 }
3866}
3867
3868/**
3869 * xmlParseEntityDecl:
3870 * @ctxt: an XML parser context
3871 *
3872 * parse <!ENTITY declarations
3873 *
3874 * [70] EntityDecl ::= GEDecl | PEDecl
3875 *
3876 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3877 *
3878 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3879 *
3880 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3881 *
3882 * [74] PEDef ::= EntityValue | ExternalID
3883 *
3884 * [76] NDataDecl ::= S 'NDATA' S Name
3885 *
3886 * [ VC: Notation Declared ]
3887 * The Name must match the declared name of a notation.
3888 */
3889
3890void
3891xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003892 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003893 xmlChar *value = NULL;
3894 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003895 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003896 int isParameter = 0;
3897 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003898 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003899
3900 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003901 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003902 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003903 SHRINK;
3904 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003905 skipped = SKIP_BLANKS;
3906 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003907 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3908 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003909 }
Owen Taylor3473f882001-02-23 17:55:21 +00003910
3911 if (RAW == '%') {
3912 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003913 skipped = SKIP_BLANKS;
3914 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3916 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003917 }
Owen Taylor3473f882001-02-23 17:55:21 +00003918 isParameter = 1;
3919 }
3920
Daniel Veillard76d66f42001-05-16 21:05:17 +00003921 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003922 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003923 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3924 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003925 return;
3926 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003927 skipped = SKIP_BLANKS;
3928 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3930 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
Owen Taylor3473f882001-02-23 17:55:21 +00003932
Daniel Veillardf5582f12002-06-11 10:08:16 +00003933 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003934 /*
3935 * handle the various case of definitions...
3936 */
3937 if (isParameter) {
3938 if ((RAW == '"') || (RAW == '\'')) {
3939 value = xmlParseEntityValue(ctxt, &orig);
3940 if (value) {
3941 if ((ctxt->sax != NULL) &&
3942 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3943 ctxt->sax->entityDecl(ctxt->userData, name,
3944 XML_INTERNAL_PARAMETER_ENTITY,
3945 NULL, NULL, value);
3946 }
3947 } else {
3948 URI = xmlParseExternalID(ctxt, &literal, 1);
3949 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003950 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003951 }
3952 if (URI) {
3953 xmlURIPtr uri;
3954
3955 uri = xmlParseURI((const char *) URI);
3956 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003957 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3958 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003959 /*
3960 * This really ought to be a well formedness error
3961 * but the XML Core WG decided otherwise c.f. issue
3962 * E26 of the XML erratas.
3963 */
Owen Taylor3473f882001-02-23 17:55:21 +00003964 } else {
3965 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003966 /*
3967 * Okay this is foolish to block those but not
3968 * invalid URIs.
3969 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003970 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003971 } else {
3972 if ((ctxt->sax != NULL) &&
3973 (!ctxt->disableSAX) &&
3974 (ctxt->sax->entityDecl != NULL))
3975 ctxt->sax->entityDecl(ctxt->userData, name,
3976 XML_EXTERNAL_PARAMETER_ENTITY,
3977 literal, URI, NULL);
3978 }
3979 xmlFreeURI(uri);
3980 }
3981 }
3982 }
3983 } else {
3984 if ((RAW == '"') || (RAW == '\'')) {
3985 value = xmlParseEntityValue(ctxt, &orig);
3986 if ((ctxt->sax != NULL) &&
3987 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3988 ctxt->sax->entityDecl(ctxt->userData, name,
3989 XML_INTERNAL_GENERAL_ENTITY,
3990 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003991 /*
3992 * For expat compatibility in SAX mode.
3993 */
3994 if ((ctxt->myDoc == NULL) ||
3995 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3996 if (ctxt->myDoc == NULL) {
3997 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3998 }
3999 if (ctxt->myDoc->intSubset == NULL)
4000 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4001 BAD_CAST "fake", NULL, NULL);
4002
Daniel Veillard1af9a412003-08-20 22:54:39 +00004003 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4004 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004005 }
Owen Taylor3473f882001-02-23 17:55:21 +00004006 } else {
4007 URI = xmlParseExternalID(ctxt, &literal, 1);
4008 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004009 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
4011 if (URI) {
4012 xmlURIPtr uri;
4013
4014 uri = xmlParseURI((const char *)URI);
4015 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004016 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4017 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004018 /*
4019 * This really ought to be a well formedness error
4020 * but the XML Core WG decided otherwise c.f. issue
4021 * E26 of the XML erratas.
4022 */
Owen Taylor3473f882001-02-23 17:55:21 +00004023 } else {
4024 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004025 /*
4026 * Okay this is foolish to block those but not
4027 * invalid URIs.
4028 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004029 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
4031 xmlFreeURI(uri);
4032 }
4033 }
William M. Brack76e95df2003-10-18 16:20:14 +00004034 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004035 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4036 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
4038 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004039 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004040 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004041 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004042 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4043 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004046 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4048 (ctxt->sax->unparsedEntityDecl != NULL))
4049 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4050 literal, URI, ndata);
4051 } else {
4052 if ((ctxt->sax != NULL) &&
4053 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4054 ctxt->sax->entityDecl(ctxt->userData, name,
4055 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4056 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004057 /*
4058 * For expat compatibility in SAX mode.
4059 * assuming the entity repalcement was asked for
4060 */
4061 if ((ctxt->replaceEntities != 0) &&
4062 ((ctxt->myDoc == NULL) ||
4063 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4064 if (ctxt->myDoc == NULL) {
4065 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4066 }
4067
4068 if (ctxt->myDoc->intSubset == NULL)
4069 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4070 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004071 xmlSAX2EntityDecl(ctxt, name,
4072 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4073 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004074 }
Owen Taylor3473f882001-02-23 17:55:21 +00004075 }
4076 }
4077 }
4078 SKIP_BLANKS;
4079 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004080 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004081 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004082 } else {
4083 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004084 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4085 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004086 }
4087 NEXT;
4088 }
4089 if (orig != NULL) {
4090 /*
4091 * Ugly mechanism to save the raw entity value.
4092 */
4093 xmlEntityPtr cur = NULL;
4094
4095 if (isParameter) {
4096 if ((ctxt->sax != NULL) &&
4097 (ctxt->sax->getParameterEntity != NULL))
4098 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4099 } else {
4100 if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->getEntity != NULL))
4102 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004103 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004104 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004105 }
Owen Taylor3473f882001-02-23 17:55:21 +00004106 }
4107 if (cur != NULL) {
4108 if (cur->orig != NULL)
4109 xmlFree(orig);
4110 else
4111 cur->orig = orig;
4112 } else
4113 xmlFree(orig);
4114 }
Owen Taylor3473f882001-02-23 17:55:21 +00004115 if (value != NULL) xmlFree(value);
4116 if (URI != NULL) xmlFree(URI);
4117 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 }
4119}
4120
4121/**
4122 * xmlParseDefaultDecl:
4123 * @ctxt: an XML parser context
4124 * @value: Receive a possible fixed default value for the attribute
4125 *
4126 * Parse an attribute default declaration
4127 *
4128 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4129 *
4130 * [ VC: Required Attribute ]
4131 * if the default declaration is the keyword #REQUIRED, then the
4132 * attribute must be specified for all elements of the type in the
4133 * attribute-list declaration.
4134 *
4135 * [ VC: Attribute Default Legal ]
4136 * The declared default value must meet the lexical constraints of
4137 * the declared attribute type c.f. xmlValidateAttributeDecl()
4138 *
4139 * [ VC: Fixed Attribute Default ]
4140 * if an attribute has a default value declared with the #FIXED
4141 * keyword, instances of that attribute must match the default value.
4142 *
4143 * [ WFC: No < in Attribute Values ]
4144 * handled in xmlParseAttValue()
4145 *
4146 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4147 * or XML_ATTRIBUTE_FIXED.
4148 */
4149
4150int
4151xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4152 int val;
4153 xmlChar *ret;
4154
4155 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004156 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004157 SKIP(9);
4158 return(XML_ATTRIBUTE_REQUIRED);
4159 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004160 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004161 SKIP(8);
4162 return(XML_ATTRIBUTE_IMPLIED);
4163 }
4164 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004165 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004166 SKIP(6);
4167 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004168 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004169 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4170 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004171 }
4172 SKIP_BLANKS;
4173 }
4174 ret = xmlParseAttValue(ctxt);
4175 ctxt->instate = XML_PARSER_DTD;
4176 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004177 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004178 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004179 } else
4180 *value = ret;
4181 return(val);
4182}
4183
4184/**
4185 * xmlParseNotationType:
4186 * @ctxt: an XML parser context
4187 *
4188 * parse an Notation attribute type.
4189 *
4190 * Note: the leading 'NOTATION' S part has already being parsed...
4191 *
4192 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4193 *
4194 * [ VC: Notation Attributes ]
4195 * Values of this type must match one of the notation names included
4196 * in the declaration; all notation names in the declaration must be declared.
4197 *
4198 * Returns: the notation attribute tree built while parsing
4199 */
4200
4201xmlEnumerationPtr
4202xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004203 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004204 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4205
4206 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004207 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004208 return(NULL);
4209 }
4210 SHRINK;
4211 do {
4212 NEXT;
4213 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004214 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004216 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4217 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004218 return(ret);
4219 }
4220 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 if (cur == NULL) return(ret);
4222 if (last == NULL) ret = last = cur;
4223 else {
4224 last->next = cur;
4225 last = cur;
4226 }
4227 SKIP_BLANKS;
4228 } while (RAW == '|');
4229 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004230 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004231 if ((last != NULL) && (last != ret))
4232 xmlFreeEnumeration(last);
4233 return(ret);
4234 }
4235 NEXT;
4236 return(ret);
4237}
4238
4239/**
4240 * xmlParseEnumerationType:
4241 * @ctxt: an XML parser context
4242 *
4243 * parse an Enumeration attribute type.
4244 *
4245 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4246 *
4247 * [ VC: Enumeration ]
4248 * Values of this type must match one of the Nmtoken tokens in
4249 * the declaration
4250 *
4251 * Returns: the enumeration attribute tree built while parsing
4252 */
4253
4254xmlEnumerationPtr
4255xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4256 xmlChar *name;
4257 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4258
4259 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004260 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004261 return(NULL);
4262 }
4263 SHRINK;
4264 do {
4265 NEXT;
4266 SKIP_BLANKS;
4267 name = xmlParseNmtoken(ctxt);
4268 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004269 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004270 return(ret);
4271 }
4272 cur = xmlCreateEnumeration(name);
4273 xmlFree(name);
4274 if (cur == NULL) return(ret);
4275 if (last == NULL) ret = last = cur;
4276 else {
4277 last->next = cur;
4278 last = cur;
4279 }
4280 SKIP_BLANKS;
4281 } while (RAW == '|');
4282 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004283 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004284 return(ret);
4285 }
4286 NEXT;
4287 return(ret);
4288}
4289
4290/**
4291 * xmlParseEnumeratedType:
4292 * @ctxt: an XML parser context
4293 * @tree: the enumeration tree built while parsing
4294 *
4295 * parse an Enumerated attribute type.
4296 *
4297 * [57] EnumeratedType ::= NotationType | Enumeration
4298 *
4299 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4300 *
4301 *
4302 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4303 */
4304
4305int
4306xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004307 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004308 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004309 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4311 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004312 return(0);
4313 }
4314 SKIP_BLANKS;
4315 *tree = xmlParseNotationType(ctxt);
4316 if (*tree == NULL) return(0);
4317 return(XML_ATTRIBUTE_NOTATION);
4318 }
4319 *tree = xmlParseEnumerationType(ctxt);
4320 if (*tree == NULL) return(0);
4321 return(XML_ATTRIBUTE_ENUMERATION);
4322}
4323
4324/**
4325 * xmlParseAttributeType:
4326 * @ctxt: an XML parser context
4327 * @tree: the enumeration tree built while parsing
4328 *
4329 * parse the Attribute list def for an element
4330 *
4331 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4332 *
4333 * [55] StringType ::= 'CDATA'
4334 *
4335 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4336 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4337 *
4338 * Validity constraints for attribute values syntax are checked in
4339 * xmlValidateAttributeValue()
4340 *
4341 * [ VC: ID ]
4342 * Values of type ID must match the Name production. A name must not
4343 * appear more than once in an XML document as a value of this type;
4344 * i.e., ID values must uniquely identify the elements which bear them.
4345 *
4346 * [ VC: One ID per Element Type ]
4347 * No element type may have more than one ID attribute specified.
4348 *
4349 * [ VC: ID Attribute Default ]
4350 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4351 *
4352 * [ VC: IDREF ]
4353 * Values of type IDREF must match the Name production, and values
4354 * of type IDREFS must match Names; each IDREF Name must match the value
4355 * of an ID attribute on some element in the XML document; i.e. IDREF
4356 * values must match the value of some ID attribute.
4357 *
4358 * [ VC: Entity Name ]
4359 * Values of type ENTITY must match the Name production, values
4360 * of type ENTITIES must match Names; each Entity Name must match the
4361 * name of an unparsed entity declared in the DTD.
4362 *
4363 * [ VC: Name Token ]
4364 * Values of type NMTOKEN must match the Nmtoken production; values
4365 * of type NMTOKENS must match Nmtokens.
4366 *
4367 * Returns the attribute type
4368 */
4369int
4370xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4371 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004372 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SKIP(5);
4374 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004375 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004376 SKIP(6);
4377 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004378 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004379 SKIP(5);
4380 return(XML_ATTRIBUTE_IDREF);
4381 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4382 SKIP(2);
4383 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004384 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004385 SKIP(6);
4386 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004387 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 SKIP(8);
4389 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004390 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004391 SKIP(8);
4392 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004393 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004394 SKIP(7);
4395 return(XML_ATTRIBUTE_NMTOKEN);
4396 }
4397 return(xmlParseEnumeratedType(ctxt, tree));
4398}
4399
4400/**
4401 * xmlParseAttributeListDecl:
4402 * @ctxt: an XML parser context
4403 *
4404 * : parse the Attribute list def for an element
4405 *
4406 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4407 *
4408 * [53] AttDef ::= S Name S AttType S DefaultDecl
4409 *
4410 */
4411void
4412xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004413 const xmlChar *elemName;
4414 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 xmlEnumerationPtr tree;
4416
Daniel Veillarda07050d2003-10-19 14:46:32 +00004417 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004418 xmlParserInputPtr input = ctxt->input;
4419
4420 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004421 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004423 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004424 }
4425 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004426 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004427 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004428 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4429 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004430 return;
4431 }
4432 SKIP_BLANKS;
4433 GROW;
4434 while (RAW != '>') {
4435 const xmlChar *check = CUR_PTR;
4436 int type;
4437 int def;
4438 xmlChar *defaultValue = NULL;
4439
4440 GROW;
4441 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004442 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004443 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4445 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004446 break;
4447 }
4448 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004449 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004451 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004452 if (defaultValue != NULL)
4453 xmlFree(defaultValue);
4454 break;
4455 }
4456 SKIP_BLANKS;
4457
4458 type = xmlParseAttributeType(ctxt, &tree);
4459 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004460 if (defaultValue != NULL)
4461 xmlFree(defaultValue);
4462 break;
4463 }
4464
4465 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004466 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4468 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004469 if (defaultValue != NULL)
4470 xmlFree(defaultValue);
4471 if (tree != NULL)
4472 xmlFreeEnumeration(tree);
4473 break;
4474 }
4475 SKIP_BLANKS;
4476
4477 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4478 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004479 if (defaultValue != NULL)
4480 xmlFree(defaultValue);
4481 if (tree != NULL)
4482 xmlFreeEnumeration(tree);
4483 break;
4484 }
4485
4486 GROW;
4487 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004488 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004489 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004490 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004491 if (defaultValue != NULL)
4492 xmlFree(defaultValue);
4493 if (tree != NULL)
4494 xmlFreeEnumeration(tree);
4495 break;
4496 }
4497 SKIP_BLANKS;
4498 }
4499 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004500 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4501 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004502 if (defaultValue != NULL)
4503 xmlFree(defaultValue);
4504 if (tree != NULL)
4505 xmlFreeEnumeration(tree);
4506 break;
4507 }
4508 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4509 (ctxt->sax->attributeDecl != NULL))
4510 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4511 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004512 else if (tree != NULL)
4513 xmlFreeEnumeration(tree);
4514
4515 if ((ctxt->sax2) && (defaultValue != NULL) &&
4516 (def != XML_ATTRIBUTE_IMPLIED) &&
4517 (def != XML_ATTRIBUTE_REQUIRED)) {
4518 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4519 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004520 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4521 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4522 }
Owen Taylor3473f882001-02-23 17:55:21 +00004523 if (defaultValue != NULL)
4524 xmlFree(defaultValue);
4525 GROW;
4526 }
4527 if (RAW == '>') {
4528 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004529 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4530 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532 NEXT;
4533 }
Owen Taylor3473f882001-02-23 17:55:21 +00004534 }
4535}
4536
4537/**
4538 * xmlParseElementMixedContentDecl:
4539 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004540 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004541 *
4542 * parse the declaration for a Mixed Element content
4543 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4544 *
4545 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4546 * '(' S? '#PCDATA' S? ')'
4547 *
4548 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4549 *
4550 * [ VC: No Duplicate Types ]
4551 * The same name must not appear more than once in a single
4552 * mixed-content declaration.
4553 *
4554 * returns: the list of the xmlElementContentPtr describing the element choices
4555 */
4556xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004557xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004558 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004559 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004560
4561 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004562 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004563 SKIP(7);
4564 SKIP_BLANKS;
4565 SHRINK;
4566 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004567 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004568 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4569"Element content declaration doesn't start and stop in the same entity\n",
4570 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004571 }
Owen Taylor3473f882001-02-23 17:55:21 +00004572 NEXT;
4573 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4574 if (RAW == '*') {
4575 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4576 NEXT;
4577 }
4578 return(ret);
4579 }
4580 if ((RAW == '(') || (RAW == '|')) {
4581 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4582 if (ret == NULL) return(NULL);
4583 }
4584 while (RAW == '|') {
4585 NEXT;
4586 if (elem == NULL) {
4587 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4588 if (ret == NULL) return(NULL);
4589 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004590 if (cur != NULL)
4591 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004592 cur = ret;
4593 } else {
4594 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4595 if (n == NULL) return(NULL);
4596 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004597 if (n->c1 != NULL)
4598 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004600 if (n != NULL)
4601 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004602 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004605 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004606 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004608 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004609 xmlFreeElementContent(cur);
4610 return(NULL);
4611 }
4612 SKIP_BLANKS;
4613 GROW;
4614 }
4615 if ((RAW == ')') && (NXT(1) == '*')) {
4616 if (elem != NULL) {
4617 cur->c2 = xmlNewElementContent(elem,
4618 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004619 if (cur->c2 != NULL)
4620 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 }
4622 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004623 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004624 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4625"Element content declaration doesn't start and stop in the same entity\n",
4626 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004627 }
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(2);
4629 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004630 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004631 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 return(NULL);
4633 }
4634
4635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004636 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
4638 return(ret);
4639}
4640
4641/**
4642 * xmlParseElementChildrenContentDecl:
4643 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004644 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004645 *
4646 * parse the declaration for a Mixed Element content
4647 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4648 *
4649 *
4650 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4651 *
4652 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4653 *
4654 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4655 *
4656 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4657 *
4658 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4659 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004660 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004661 * opening or closing parentheses in a choice, seq, or Mixed
4662 * construct is contained in the replacement text for a parameter
4663 * entity, both must be contained in the same replacement text. For
4664 * interoperability, if a parameter-entity reference appears in a
4665 * choice, seq, or Mixed construct, its replacement text should not
4666 * be empty, and neither the first nor last non-blank character of
4667 * the replacement text should be a connector (| or ,).
4668 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004669 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004670 * hierarchy.
4671 */
4672xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004673xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004674 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004675 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 xmlChar type = 0;
4677
4678 SKIP_BLANKS;
4679 GROW;
4680 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004681 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on first child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 GROW;
4689 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004690 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004692 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004693 return(NULL);
4694 }
4695 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004696 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004697 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004698 return(NULL);
4699 }
Owen Taylor3473f882001-02-23 17:55:21 +00004700 GROW;
4701 if (RAW == '?') {
4702 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4703 NEXT;
4704 } else if (RAW == '*') {
4705 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4706 NEXT;
4707 } else if (RAW == '+') {
4708 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4709 NEXT;
4710 } else {
4711 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4712 }
Owen Taylor3473f882001-02-23 17:55:21 +00004713 GROW;
4714 }
4715 SKIP_BLANKS;
4716 SHRINK;
4717 while (RAW != ')') {
4718 /*
4719 * Each loop we parse one separator and one element.
4720 */
4721 if (RAW == ',') {
4722 if (type == 0) type = CUR;
4723
4724 /*
4725 * Detect "Name | Name , Name" error
4726 */
4727 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004728 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004729 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004730 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004731 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004732 xmlFreeElementContent(last);
4733 if (ret != NULL)
4734 xmlFreeElementContent(ret);
4735 return(NULL);
4736 }
4737 NEXT;
4738
4739 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4740 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004741 if ((last != NULL) && (last != ret))
4742 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 xmlFreeElementContent(ret);
4744 return(NULL);
4745 }
4746 if (last == NULL) {
4747 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004748 if (ret != NULL)
4749 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004750 ret = cur = op;
4751 } else {
4752 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004753 if (op != NULL)
4754 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004756 if (last != NULL)
4757 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004758 cur =op;
4759 last = NULL;
4760 }
4761 } else if (RAW == '|') {
4762 if (type == 0) type = CUR;
4763
4764 /*
4765 * Detect "Name , Name | Name" error
4766 */
4767 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004768 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004769 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004770 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004771 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004772 xmlFreeElementContent(last);
4773 if (ret != NULL)
4774 xmlFreeElementContent(ret);
4775 return(NULL);
4776 }
4777 NEXT;
4778
4779 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4780 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004781 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004782 xmlFreeElementContent(last);
4783 if (ret != NULL)
4784 xmlFreeElementContent(ret);
4785 return(NULL);
4786 }
4787 if (last == NULL) {
4788 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004789 if (ret != NULL)
4790 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004791 ret = cur = op;
4792 } else {
4793 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004794 if (op != NULL)
4795 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004796 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004797 if (last != NULL)
4798 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004799 cur =op;
4800 last = NULL;
4801 }
4802 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004803 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004804 if (ret != NULL)
4805 xmlFreeElementContent(ret);
4806 return(NULL);
4807 }
4808 GROW;
4809 SKIP_BLANKS;
4810 GROW;
4811 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004812 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004813 /* Recurse on second child */
4814 NEXT;
4815 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004816 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 SKIP_BLANKS;
4818 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004819 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004821 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004822 if (ret != NULL)
4823 xmlFreeElementContent(ret);
4824 return(NULL);
4825 }
4826 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004827 if (RAW == '?') {
4828 last->ocur = XML_ELEMENT_CONTENT_OPT;
4829 NEXT;
4830 } else if (RAW == '*') {
4831 last->ocur = XML_ELEMENT_CONTENT_MULT;
4832 NEXT;
4833 } else if (RAW == '+') {
4834 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4835 NEXT;
4836 } else {
4837 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4838 }
4839 }
4840 SKIP_BLANKS;
4841 GROW;
4842 }
4843 if ((cur != NULL) && (last != NULL)) {
4844 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004845 if (last != NULL)
4846 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004847 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004848 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004849 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4850"Element content declaration doesn't start and stop in the same entity\n",
4851 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004852 }
Owen Taylor3473f882001-02-23 17:55:21 +00004853 NEXT;
4854 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004855 if (ret != NULL) {
4856 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4857 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4858 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4859 else
4860 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4861 }
Owen Taylor3473f882001-02-23 17:55:21 +00004862 NEXT;
4863 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004864 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004865 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004866 cur = ret;
4867 /*
4868 * Some normalization:
4869 * (a | b* | c?)* == (a | b | c)*
4870 */
4871 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4872 if ((cur->c1 != NULL) &&
4873 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4874 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4875 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4876 if ((cur->c2 != NULL) &&
4877 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4878 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4879 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4880 cur = cur->c2;
4881 }
4882 }
Owen Taylor3473f882001-02-23 17:55:21 +00004883 NEXT;
4884 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004885 if (ret != NULL) {
4886 int found = 0;
4887
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004888 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4889 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4890 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004891 else
4892 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004893 /*
4894 * Some normalization:
4895 * (a | b*)+ == (a | b)*
4896 * (a | b?)+ == (a | b)*
4897 */
4898 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4899 if ((cur->c1 != NULL) &&
4900 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4901 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4902 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4903 found = 1;
4904 }
4905 if ((cur->c2 != NULL) &&
4906 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4907 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4908 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4909 found = 1;
4910 }
4911 cur = cur->c2;
4912 }
4913 if (found)
4914 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4915 }
Owen Taylor3473f882001-02-23 17:55:21 +00004916 NEXT;
4917 }
4918 return(ret);
4919}
4920
4921/**
4922 * xmlParseElementContentDecl:
4923 * @ctxt: an XML parser context
4924 * @name: the name of the element being defined.
4925 * @result: the Element Content pointer will be stored here if any
4926 *
4927 * parse the declaration for an Element content either Mixed or Children,
4928 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4929 *
4930 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4931 *
4932 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4933 */
4934
4935int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004936xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004937 xmlElementContentPtr *result) {
4938
4939 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004941 int res;
4942
4943 *result = NULL;
4944
4945 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004946 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004947 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004948 return(-1);
4949 }
4950 NEXT;
4951 GROW;
4952 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004953 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004954 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 res = XML_ELEMENT_TYPE_MIXED;
4956 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004957 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004958 res = XML_ELEMENT_TYPE_ELEMENT;
4959 }
Owen Taylor3473f882001-02-23 17:55:21 +00004960 SKIP_BLANKS;
4961 *result = tree;
4962 return(res);
4963}
4964
4965/**
4966 * xmlParseElementDecl:
4967 * @ctxt: an XML parser context
4968 *
4969 * parse an Element declaration.
4970 *
4971 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4972 *
4973 * [ VC: Unique Element Type Declaration ]
4974 * No element type may be declared more than once
4975 *
4976 * Returns the type of the element, or -1 in case of error
4977 */
4978int
4979xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004980 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 int ret = -1;
4982 xmlElementContentPtr content = NULL;
4983
4984 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004985 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004986 xmlParserInputPtr input = ctxt->input;
4987
4988 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004989 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004990 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4991 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004992 }
4993 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004994 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004996 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4997 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004998 return(-1);
4999 }
5000 while ((RAW == 0) && (ctxt->inputNr > 1))
5001 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005002 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005005 }
5006 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005007 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005008 SKIP(5);
5009 /*
5010 * Element must always be empty.
5011 */
5012 ret = XML_ELEMENT_TYPE_EMPTY;
5013 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5014 (NXT(2) == 'Y')) {
5015 SKIP(3);
5016 /*
5017 * Element is a generic container.
5018 */
5019 ret = XML_ELEMENT_TYPE_ANY;
5020 } else if (RAW == '(') {
5021 ret = xmlParseElementContentDecl(ctxt, name, &content);
5022 } else {
5023 /*
5024 * [ WFC: PEs in Internal Subset ] error handling.
5025 */
5026 if ((RAW == '%') && (ctxt->external == 0) &&
5027 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005028 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005030 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005031 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005032 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5033 }
Owen Taylor3473f882001-02-23 17:55:21 +00005034 return(-1);
5035 }
5036
5037 SKIP_BLANKS;
5038 /*
5039 * Pop-up of finished entities.
5040 */
5041 while ((RAW == 0) && (ctxt->inputNr > 1))
5042 xmlPopInput(ctxt);
5043 SKIP_BLANKS;
5044
5045 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005046 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 } else {
5048 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005049 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5050 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005051 }
5052
5053 NEXT;
5054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5055 (ctxt->sax->elementDecl != NULL))
5056 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5057 content);
5058 }
5059 if (content != NULL) {
5060 xmlFreeElementContent(content);
5061 }
Owen Taylor3473f882001-02-23 17:55:21 +00005062 }
5063 return(ret);
5064}
5065
5066/**
Owen Taylor3473f882001-02-23 17:55:21 +00005067 * xmlParseConditionalSections
5068 * @ctxt: an XML parser context
5069 *
5070 * [61] conditionalSect ::= includeSect | ignoreSect
5071 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5072 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5073 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5074 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5075 */
5076
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005077static void
Owen Taylor3473f882001-02-23 17:55:21 +00005078xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5079 SKIP(3);
5080 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005081 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005082 SKIP(7);
5083 SKIP_BLANKS;
5084 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005085 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005086 } else {
5087 NEXT;
5088 }
5089 if (xmlParserDebugEntities) {
5090 if ((ctxt->input != NULL) && (ctxt->input->filename))
5091 xmlGenericError(xmlGenericErrorContext,
5092 "%s(%d): ", ctxt->input->filename,
5093 ctxt->input->line);
5094 xmlGenericError(xmlGenericErrorContext,
5095 "Entering INCLUDE Conditional Section\n");
5096 }
5097
5098 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5099 (NXT(2) != '>'))) {
5100 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005101 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005102
5103 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5104 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005105 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005106 NEXT;
5107 } else if (RAW == '%') {
5108 xmlParsePEReference(ctxt);
5109 } else
5110 xmlParseMarkupDecl(ctxt);
5111
5112 /*
5113 * Pop-up of finished entities.
5114 */
5115 while ((RAW == 0) && (ctxt->inputNr > 1))
5116 xmlPopInput(ctxt);
5117
Daniel Veillardfdc91562002-07-01 21:52:03 +00005118 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005119 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005120 break;
5121 }
5122 }
5123 if (xmlParserDebugEntities) {
5124 if ((ctxt->input != NULL) && (ctxt->input->filename))
5125 xmlGenericError(xmlGenericErrorContext,
5126 "%s(%d): ", ctxt->input->filename,
5127 ctxt->input->line);
5128 xmlGenericError(xmlGenericErrorContext,
5129 "Leaving INCLUDE Conditional Section\n");
5130 }
5131
Daniel Veillarda07050d2003-10-19 14:46:32 +00005132 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005133 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005134 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 int depth = 0;
5136
5137 SKIP(6);
5138 SKIP_BLANKS;
5139 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005140 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005141 } else {
5142 NEXT;
5143 }
5144 if (xmlParserDebugEntities) {
5145 if ((ctxt->input != NULL) && (ctxt->input->filename))
5146 xmlGenericError(xmlGenericErrorContext,
5147 "%s(%d): ", ctxt->input->filename,
5148 ctxt->input->line);
5149 xmlGenericError(xmlGenericErrorContext,
5150 "Entering IGNORE Conditional Section\n");
5151 }
5152
5153 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005154 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005155 * But disable SAX event generating DTD building in the meantime
5156 */
5157 state = ctxt->disableSAX;
5158 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005159 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005160 ctxt->instate = XML_PARSER_IGNORE;
5161
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005162 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005163 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5164 depth++;
5165 SKIP(3);
5166 continue;
5167 }
5168 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5169 if (--depth >= 0) SKIP(3);
5170 continue;
5171 }
5172 NEXT;
5173 continue;
5174 }
5175
5176 ctxt->disableSAX = state;
5177 ctxt->instate = instate;
5178
5179 if (xmlParserDebugEntities) {
5180 if ((ctxt->input != NULL) && (ctxt->input->filename))
5181 xmlGenericError(xmlGenericErrorContext,
5182 "%s(%d): ", ctxt->input->filename,
5183 ctxt->input->line);
5184 xmlGenericError(xmlGenericErrorContext,
5185 "Leaving IGNORE Conditional Section\n");
5186 }
5187
5188 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005189 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 }
5191
5192 if (RAW == 0)
5193 SHRINK;
5194
5195 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005196 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 } else {
5198 SKIP(3);
5199 }
5200}
5201
5202/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005203 * xmlParseMarkupDecl:
5204 * @ctxt: an XML parser context
5205 *
5206 * parse Markup declarations
5207 *
5208 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5209 * NotationDecl | PI | Comment
5210 *
5211 * [ VC: Proper Declaration/PE Nesting ]
5212 * Parameter-entity replacement text must be properly nested with
5213 * markup declarations. That is to say, if either the first character
5214 * or the last character of a markup declaration (markupdecl above) is
5215 * contained in the replacement text for a parameter-entity reference,
5216 * both must be contained in the same replacement text.
5217 *
5218 * [ WFC: PEs in Internal Subset ]
5219 * In the internal DTD subset, parameter-entity references can occur
5220 * only where markup declarations can occur, not within markup declarations.
5221 * (This does not apply to references that occur in external parameter
5222 * entities or to the external subset.)
5223 */
5224void
5225xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5226 GROW;
5227 xmlParseElementDecl(ctxt);
5228 xmlParseAttributeListDecl(ctxt);
5229 xmlParseEntityDecl(ctxt);
5230 xmlParseNotationDecl(ctxt);
5231 xmlParsePI(ctxt);
5232 xmlParseComment(ctxt);
5233 /*
5234 * This is only for internal subset. On external entities,
5235 * the replacement is done before parsing stage
5236 */
5237 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5238 xmlParsePEReference(ctxt);
5239
5240 /*
5241 * Conditional sections are allowed from entities included
5242 * by PE References in the internal subset.
5243 */
5244 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5245 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5246 xmlParseConditionalSections(ctxt);
5247 }
5248 }
5249
5250 ctxt->instate = XML_PARSER_DTD;
5251}
5252
5253/**
5254 * xmlParseTextDecl:
5255 * @ctxt: an XML parser context
5256 *
5257 * parse an XML declaration header for external entities
5258 *
5259 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5260 *
5261 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5262 */
5263
5264void
5265xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5266 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005267 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005268
5269 /*
5270 * We know that '<?xml' is here.
5271 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005272 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005273 SKIP(5);
5274 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005275 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 return;
5277 }
5278
William M. Brack76e95df2003-10-18 16:20:14 +00005279 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005282 }
5283 SKIP_BLANKS;
5284
5285 /*
5286 * We may have the VersionInfo here.
5287 */
5288 version = xmlParseVersionInfo(ctxt);
5289 if (version == NULL)
5290 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005291 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005292 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005295 }
5296 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005297 ctxt->input->version = version;
5298
5299 /*
5300 * We must have the encoding declaration
5301 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005302 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005303 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5304 /*
5305 * The XML REC instructs us to stop parsing right here
5306 */
5307 return;
5308 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005309 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5310 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5311 "Missing encoding in text declaration\n");
5312 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005313
5314 SKIP_BLANKS;
5315 if ((RAW == '?') && (NXT(1) == '>')) {
5316 SKIP(2);
5317 } else if (RAW == '>') {
5318 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005319 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005320 NEXT;
5321 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005322 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005323 MOVETO_ENDTAG(CUR_PTR);
5324 NEXT;
5325 }
5326}
5327
5328/**
Owen Taylor3473f882001-02-23 17:55:21 +00005329 * xmlParseExternalSubset:
5330 * @ctxt: an XML parser context
5331 * @ExternalID: the external identifier
5332 * @SystemID: the system identifier (or URL)
5333 *
5334 * parse Markup declarations from an external subset
5335 *
5336 * [30] extSubset ::= textDecl? extSubsetDecl
5337 *
5338 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5339 */
5340void
5341xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5342 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005343 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005344 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005345 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005346 xmlParseTextDecl(ctxt);
5347 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5348 /*
5349 * The XML REC instructs us to stop parsing right here
5350 */
5351 ctxt->instate = XML_PARSER_EOF;
5352 return;
5353 }
5354 }
5355 if (ctxt->myDoc == NULL) {
5356 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5357 }
5358 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5359 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5360
5361 ctxt->instate = XML_PARSER_DTD;
5362 ctxt->external = 1;
5363 while (((RAW == '<') && (NXT(1) == '?')) ||
5364 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005365 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005366 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005367 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005368
5369 GROW;
5370 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5371 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005372 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005373 NEXT;
5374 } else if (RAW == '%') {
5375 xmlParsePEReference(ctxt);
5376 } else
5377 xmlParseMarkupDecl(ctxt);
5378
5379 /*
5380 * Pop-up of finished entities.
5381 */
5382 while ((RAW == 0) && (ctxt->inputNr > 1))
5383 xmlPopInput(ctxt);
5384
Daniel Veillardfdc91562002-07-01 21:52:03 +00005385 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005386 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005387 break;
5388 }
5389 }
5390
5391 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005392 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
5394
5395}
5396
5397/**
5398 * xmlParseReference:
5399 * @ctxt: an XML parser context
5400 *
5401 * parse and handle entity references in content, depending on the SAX
5402 * interface, this may end-up in a call to character() if this is a
5403 * CharRef, a predefined entity, if there is no reference() callback.
5404 * or if the parser was asked to switch to that mode.
5405 *
5406 * [67] Reference ::= EntityRef | CharRef
5407 */
5408void
5409xmlParseReference(xmlParserCtxtPtr ctxt) {
5410 xmlEntityPtr ent;
5411 xmlChar *val;
5412 if (RAW != '&') return;
5413
5414 if (NXT(1) == '#') {
5415 int i = 0;
5416 xmlChar out[10];
5417 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005418 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005419
5420 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5421 /*
5422 * So we are using non-UTF-8 buffers
5423 * Check that the char fit on 8bits, if not
5424 * generate a CharRef.
5425 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005426 if (value <= 0xFF) {
5427 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 out[1] = 0;
5429 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->characters(ctxt->userData, out, 1);
5432 } else {
5433 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005434 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005435 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005436 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005437 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5438 (!ctxt->disableSAX))
5439 ctxt->sax->reference(ctxt->userData, out);
5440 }
5441 } else {
5442 /*
5443 * Just encode the value in UTF-8
5444 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005445 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005446 out[i] = 0;
5447 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5448 (!ctxt->disableSAX))
5449 ctxt->sax->characters(ctxt->userData, out, i);
5450 }
5451 } else {
5452 ent = xmlParseEntityRef(ctxt);
5453 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005454 if (!ctxt->wellFormed)
5455 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if ((ent->name != NULL) &&
5457 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5458 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005459 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005460
5461
5462 /*
5463 * The first reference to the entity trigger a parsing phase
5464 * where the ent->children is filled with the result from
5465 * the parsing.
5466 */
5467 if (ent->children == NULL) {
5468 xmlChar *value;
5469 value = ent->content;
5470
5471 /*
5472 * Check that this entity is well formed
5473 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005474 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005475 (value[1] == 0) && (value[0] == '<') &&
5476 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5477 /*
5478 * DONE: get definite answer on this !!!
5479 * Lots of entity decls are used to declare a single
5480 * char
5481 * <!ENTITY lt "<">
5482 * Which seems to be valid since
5483 * 2.4: The ampersand character (&) and the left angle
5484 * bracket (<) may appear in their literal form only
5485 * when used ... They are also legal within the literal
5486 * entity value of an internal entity declaration;i
5487 * see "4.3.2 Well-Formed Parsed Entities".
5488 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5489 * Looking at the OASIS test suite and James Clark
5490 * tests, this is broken. However the XML REC uses
5491 * it. Is the XML REC not well-formed ????
5492 * This is a hack to avoid this problem
5493 *
5494 * ANSWER: since lt gt amp .. are already defined,
5495 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005496 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005497 * is lousy but acceptable.
5498 */
5499 list = xmlNewDocText(ctxt->myDoc, value);
5500 if (list != NULL) {
5501 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5502 (ent->children == NULL)) {
5503 ent->children = list;
5504 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005505 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005506 list->parent = (xmlNodePtr) ent;
5507 } else {
5508 xmlFreeNodeList(list);
5509 }
5510 } else if (list != NULL) {
5511 xmlFreeNodeList(list);
5512 }
5513 } else {
5514 /*
5515 * 4.3.2: An internal general parsed entity is well-formed
5516 * if its replacement text matches the production labeled
5517 * content.
5518 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005519
5520 void *user_data;
5521 /*
5522 * This is a bit hackish but this seems the best
5523 * way to make sure both SAX and DOM entity support
5524 * behaves okay.
5525 */
5526 if (ctxt->userData == ctxt)
5527 user_data = NULL;
5528 else
5529 user_data = ctxt->userData;
5530
Owen Taylor3473f882001-02-23 17:55:21 +00005531 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5532 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005533 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5534 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 ctxt->depth--;
5536 } else if (ent->etype ==
5537 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5538 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005539 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005540 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005541 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 ctxt->depth--;
5543 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005544 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005545 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5546 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 }
5548 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005549 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005550 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005551 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005552 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5553 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005554 (ent->children == NULL)) {
5555 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 if (ctxt->replaceEntities) {
5557 /*
5558 * Prune it directly in the generated document
5559 * except for single text nodes.
5560 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005561 if (((list->type == XML_TEXT_NODE) &&
5562 (list->next == NULL)) ||
5563 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 list->parent = (xmlNodePtr) ent;
5565 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005566 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005567 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005568 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005569 while (list != NULL) {
5570 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005571 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 if (list->next == NULL)
5573 ent->last = list;
5574 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005575 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005577#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005578 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5579 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005580#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005581 }
5582 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005583 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005584 while (list != NULL) {
5585 list->parent = (xmlNodePtr) ent;
5586 if (list->next == NULL)
5587 ent->last = list;
5588 list = list->next;
5589 }
Owen Taylor3473f882001-02-23 17:55:21 +00005590 }
5591 } else {
5592 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005593 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005594 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005595 } else if ((ret != XML_ERR_OK) &&
5596 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005597 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 } else if (list != NULL) {
5599 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005601 }
5602 }
5603 }
5604 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5605 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5606 /*
5607 * Create a node.
5608 */
5609 ctxt->sax->reference(ctxt->userData, ent->name);
5610 return;
5611 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005612 /*
5613 * There is a problem on the handling of _private for entities
5614 * (bug 155816): Should we copy the content of the field from
5615 * the entity (possibly overwriting some value set by the user
5616 * when a copy is created), should we leave it alone, or should
5617 * we try to take care of different situations? The problem
5618 * is exacerbated by the usage of this field by the xmlReader.
5619 * To fix this bug, we look at _private on the created node
5620 * and, if it's NULL, we copy in whatever was in the entity.
5621 * If it's not NULL we leave it alone. This is somewhat of a
5622 * hack - maybe we should have further tests to determine
5623 * what to do.
5624 */
Owen Taylor3473f882001-02-23 17:55:21 +00005625 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5626 /*
5627 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005628 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005629 * In the first occurrence list contains the replacement.
5630 * progressive == 2 means we are operating on the Reader
5631 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005632 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005633 if (((list == NULL) && (ent->owner == 0)) ||
5634 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005635 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005636
5637 /*
5638 * when operating on a reader, the entities definitions
5639 * are always owning the entities subtree.
5640 if (ctxt->parseMode == XML_PARSE_READER)
5641 ent->owner = 1;
5642 */
5643
Daniel Veillard62f313b2001-07-04 19:49:14 +00005644 cur = ent->children;
5645 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005646 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005647 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005648 if (nw->_private == NULL)
5649 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005650 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005651 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005652 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005653 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005654 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005655 if (cur == ent->last) {
5656 /*
5657 * needed to detect some strange empty
5658 * node cases in the reader tests
5659 */
5660 if ((ctxt->parseMode == XML_PARSE_READER) &&
5661 (nw->type == XML_ELEMENT_NODE) &&
5662 (nw->children == NULL))
5663 nw->extra = 1;
5664
Daniel Veillard62f313b2001-07-04 19:49:14 +00005665 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005666 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005667 cur = cur->next;
5668 }
Daniel Veillard81273902003-09-30 00:43:48 +00005669#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005670 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005671 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005672#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005673 } else if (list == NULL) {
5674 xmlNodePtr nw = NULL, cur, next, last,
5675 firstChild = NULL;
5676 /*
5677 * Copy the entity child list and make it the new
5678 * entity child list. The goal is to make sure any
5679 * ID or REF referenced will be the one from the
5680 * document content and not the entity copy.
5681 */
5682 cur = ent->children;
5683 ent->children = NULL;
5684 last = ent->last;
5685 ent->last = NULL;
5686 while (cur != NULL) {
5687 next = cur->next;
5688 cur->next = NULL;
5689 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005690 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005691 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005692 if (nw->_private == NULL)
5693 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005694 if (firstChild == NULL){
5695 firstChild = cur;
5696 }
5697 xmlAddChild((xmlNodePtr) ent, nw);
5698 xmlAddChild(ctxt->node, cur);
5699 }
5700 if (cur == last)
5701 break;
5702 cur = next;
5703 }
5704 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005705#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005706 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5707 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005708#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005709 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005710 const xmlChar *nbktext;
5711
Daniel Veillard62f313b2001-07-04 19:49:14 +00005712 /*
5713 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005714 * node with a possible previous text one which
5715 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005716 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005717 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5718 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005719 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005720 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005721 if ((ent->last != ent->children) &&
5722 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005723 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005724 xmlAddChildList(ctxt->node, ent->children);
5725 }
5726
Owen Taylor3473f882001-02-23 17:55:21 +00005727 /*
5728 * This is to avoid a nasty side effect, see
5729 * characters() in SAX.c
5730 */
5731 ctxt->nodemem = 0;
5732 ctxt->nodelen = 0;
5733 return;
5734 } else {
5735 /*
5736 * Probably running in SAX mode
5737 */
5738 xmlParserInputPtr input;
5739
5740 input = xmlNewEntityInputStream(ctxt, ent);
5741 xmlPushInput(ctxt, input);
5742 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005743 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5744 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005745 xmlParseTextDecl(ctxt);
5746 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5747 /*
5748 * The XML REC instructs us to stop parsing right here
5749 */
5750 ctxt->instate = XML_PARSER_EOF;
5751 return;
5752 }
5753 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005754 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5755 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 }
5757 }
5758 return;
5759 }
5760 }
5761 } else {
5762 val = ent->content;
5763 if (val == NULL) return;
5764 /*
5765 * inline the entity.
5766 */
5767 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5768 (!ctxt->disableSAX))
5769 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5770 }
5771 }
5772}
5773
5774/**
5775 * xmlParseEntityRef:
5776 * @ctxt: an XML parser context
5777 *
5778 * parse ENTITY references declarations
5779 *
5780 * [68] EntityRef ::= '&' Name ';'
5781 *
5782 * [ WFC: Entity Declared ]
5783 * In a document without any DTD, a document with only an internal DTD
5784 * subset which contains no parameter entity references, or a document
5785 * with "standalone='yes'", the Name given in the entity reference
5786 * must match that in an entity declaration, except that well-formed
5787 * documents need not declare any of the following entities: amp, lt,
5788 * gt, apos, quot. The declaration of a parameter entity must precede
5789 * any reference to it. Similarly, the declaration of a general entity
5790 * must precede any reference to it which appears in a default value in an
5791 * attribute-list declaration. Note that if entities are declared in the
5792 * external subset or in external parameter entities, a non-validating
5793 * processor is not obligated to read and process their declarations;
5794 * for such documents, the rule that an entity must be declared is a
5795 * well-formedness constraint only if standalone='yes'.
5796 *
5797 * [ WFC: Parsed Entity ]
5798 * An entity reference must not contain the name of an unparsed entity
5799 *
5800 * Returns the xmlEntityPtr if found, or NULL otherwise.
5801 */
5802xmlEntityPtr
5803xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005804 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005805 xmlEntityPtr ent = NULL;
5806
5807 GROW;
5808
5809 if (RAW == '&') {
5810 NEXT;
5811 name = xmlParseName(ctxt);
5812 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005813 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5814 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005815 } else {
5816 if (RAW == ';') {
5817 NEXT;
5818 /*
5819 * Ask first SAX for entity resolution, otherwise try the
5820 * predefined set.
5821 */
5822 if (ctxt->sax != NULL) {
5823 if (ctxt->sax->getEntity != NULL)
5824 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005825 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005826 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005827 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5828 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005829 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005830 }
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
5832 /*
5833 * [ WFC: Entity Declared ]
5834 * In a document without any DTD, a document with only an
5835 * internal DTD subset which contains no parameter entity
5836 * references, or a document with "standalone='yes'", the
5837 * Name given in the entity reference must match that in an
5838 * entity declaration, except that well-formed documents
5839 * need not declare any of the following entities: amp, lt,
5840 * gt, apos, quot.
5841 * The declaration of a parameter entity must precede any
5842 * reference to it.
5843 * Similarly, the declaration of a general entity must
5844 * precede any reference to it which appears in a default
5845 * value in an attribute-list declaration. Note that if
5846 * entities are declared in the external subset or in
5847 * external parameter entities, a non-validating processor
5848 * is not obligated to read and process their declarations;
5849 * for such documents, the rule that an entity must be
5850 * declared is a well-formedness constraint only if
5851 * standalone='yes'.
5852 */
5853 if (ent == NULL) {
5854 if ((ctxt->standalone == 1) ||
5855 ((ctxt->hasExternalSubset == 0) &&
5856 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005857 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005858 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005859 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005860 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005861 "Entity '%s' not defined\n", name);
5862 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005863 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005864 }
5865
5866 /*
5867 * [ WFC: Parsed Entity ]
5868 * An entity reference must not contain the name of an
5869 * unparsed entity
5870 */
5871 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005872 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005873 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005874 }
5875
5876 /*
5877 * [ WFC: No External Entity References ]
5878 * Attribute values cannot contain direct or indirect
5879 * entity references to external entities.
5880 */
5881 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5882 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005883 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5884 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 }
5886 /*
5887 * [ WFC: No < in Attribute Values ]
5888 * The replacement text of any entity referred to directly or
5889 * indirectly in an attribute value (other than "&lt;") must
5890 * not contain a <.
5891 */
5892 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5893 (ent != NULL) &&
5894 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5895 (ent->content != NULL) &&
5896 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005897 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005898 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 }
5900
5901 /*
5902 * Internal check, no parameter entities here ...
5903 */
5904 else {
5905 switch (ent->etype) {
5906 case XML_INTERNAL_PARAMETER_ENTITY:
5907 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005908 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5909 "Attempt to reference the parameter entity '%s'\n",
5910 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 break;
5912 default:
5913 break;
5914 }
5915 }
5916
5917 /*
5918 * [ WFC: No Recursion ]
5919 * A parsed entity must not contain a recursive reference
5920 * to itself, either directly or indirectly.
5921 * Done somewhere else
5922 */
5923
5924 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005925 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005926 }
Owen Taylor3473f882001-02-23 17:55:21 +00005927 }
5928 }
5929 return(ent);
5930}
5931
5932/**
5933 * xmlParseStringEntityRef:
5934 * @ctxt: an XML parser context
5935 * @str: a pointer to an index in the string
5936 *
5937 * parse ENTITY references declarations, but this version parses it from
5938 * a string value.
5939 *
5940 * [68] EntityRef ::= '&' Name ';'
5941 *
5942 * [ WFC: Entity Declared ]
5943 * In a document without any DTD, a document with only an internal DTD
5944 * subset which contains no parameter entity references, or a document
5945 * with "standalone='yes'", the Name given in the entity reference
5946 * must match that in an entity declaration, except that well-formed
5947 * documents need not declare any of the following entities: amp, lt,
5948 * gt, apos, quot. The declaration of a parameter entity must precede
5949 * any reference to it. Similarly, the declaration of a general entity
5950 * must precede any reference to it which appears in a default value in an
5951 * attribute-list declaration. Note that if entities are declared in the
5952 * external subset or in external parameter entities, a non-validating
5953 * processor is not obligated to read and process their declarations;
5954 * for such documents, the rule that an entity must be declared is a
5955 * well-formedness constraint only if standalone='yes'.
5956 *
5957 * [ WFC: Parsed Entity ]
5958 * An entity reference must not contain the name of an unparsed entity
5959 *
5960 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5961 * is updated to the current location in the string.
5962 */
5963xmlEntityPtr
5964xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5965 xmlChar *name;
5966 const xmlChar *ptr;
5967 xmlChar cur;
5968 xmlEntityPtr ent = NULL;
5969
5970 if ((str == NULL) || (*str == NULL))
5971 return(NULL);
5972 ptr = *str;
5973 cur = *ptr;
5974 if (cur == '&') {
5975 ptr++;
5976 cur = *ptr;
5977 name = xmlParseStringName(ctxt, &ptr);
5978 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005979 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5980 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005981 } else {
5982 if (*ptr == ';') {
5983 ptr++;
5984 /*
5985 * Ask first SAX for entity resolution, otherwise try the
5986 * predefined set.
5987 */
5988 if (ctxt->sax != NULL) {
5989 if (ctxt->sax->getEntity != NULL)
5990 ent = ctxt->sax->getEntity(ctxt->userData, name);
5991 if (ent == NULL)
5992 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005993 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005994 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005995 }
Owen Taylor3473f882001-02-23 17:55:21 +00005996 }
5997 /*
5998 * [ WFC: Entity Declared ]
5999 * In a document without any DTD, a document with only an
6000 * internal DTD subset which contains no parameter entity
6001 * references, or a document with "standalone='yes'", the
6002 * Name given in the entity reference must match that in an
6003 * entity declaration, except that well-formed documents
6004 * need not declare any of the following entities: amp, lt,
6005 * gt, apos, quot.
6006 * The declaration of a parameter entity must precede any
6007 * reference to it.
6008 * Similarly, the declaration of a general entity must
6009 * precede any reference to it which appears in a default
6010 * value in an attribute-list declaration. Note that if
6011 * entities are declared in the external subset or in
6012 * external parameter entities, a non-validating processor
6013 * is not obligated to read and process their declarations;
6014 * for such documents, the rule that an entity must be
6015 * declared is a well-formedness constraint only if
6016 * standalone='yes'.
6017 */
6018 if (ent == NULL) {
6019 if ((ctxt->standalone == 1) ||
6020 ((ctxt->hasExternalSubset == 0) &&
6021 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006022 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006023 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006024 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006025 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006026 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006027 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006028 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006029 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006030 }
6031
6032 /*
6033 * [ WFC: Parsed Entity ]
6034 * An entity reference must not contain the name of an
6035 * unparsed entity
6036 */
6037 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006038 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006039 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006040 }
6041
6042 /*
6043 * [ WFC: No External Entity References ]
6044 * Attribute values cannot contain direct or indirect
6045 * entity references to external entities.
6046 */
6047 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6048 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006049 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006050 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 }
6052 /*
6053 * [ WFC: No < in Attribute Values ]
6054 * The replacement text of any entity referred to directly or
6055 * indirectly in an attribute value (other than "&lt;") must
6056 * not contain a <.
6057 */
6058 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6059 (ent != NULL) &&
6060 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6061 (ent->content != NULL) &&
6062 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006063 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6064 "'<' in entity '%s' is not allowed in attributes values\n",
6065 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006066 }
6067
6068 /*
6069 * Internal check, no parameter entities here ...
6070 */
6071 else {
6072 switch (ent->etype) {
6073 case XML_INTERNAL_PARAMETER_ENTITY:
6074 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006075 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6076 "Attempt to reference the parameter entity '%s'\n",
6077 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 break;
6079 default:
6080 break;
6081 }
6082 }
6083
6084 /*
6085 * [ WFC: No Recursion ]
6086 * A parsed entity must not contain a recursive reference
6087 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006088 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006089 */
6090
6091 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006092 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 xmlFree(name);
6095 }
6096 }
6097 *str = ptr;
6098 return(ent);
6099}
6100
6101/**
6102 * xmlParsePEReference:
6103 * @ctxt: an XML parser context
6104 *
6105 * parse PEReference declarations
6106 * The entity content is handled directly by pushing it's content as
6107 * a new input stream.
6108 *
6109 * [69] PEReference ::= '%' Name ';'
6110 *
6111 * [ WFC: No Recursion ]
6112 * A parsed entity must not contain a recursive
6113 * reference to itself, either directly or indirectly.
6114 *
6115 * [ WFC: Entity Declared ]
6116 * In a document without any DTD, a document with only an internal DTD
6117 * subset which contains no parameter entity references, or a document
6118 * with "standalone='yes'", ... ... The declaration of a parameter
6119 * entity must precede any reference to it...
6120 *
6121 * [ VC: Entity Declared ]
6122 * In a document with an external subset or external parameter entities
6123 * with "standalone='no'", ... ... The declaration of a parameter entity
6124 * must precede any reference to it...
6125 *
6126 * [ WFC: In DTD ]
6127 * Parameter-entity references may only appear in the DTD.
6128 * NOTE: misleading but this is handled.
6129 */
6130void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006131xmlParsePEReference(xmlParserCtxtPtr ctxt)
6132{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006133 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006134 xmlEntityPtr entity = NULL;
6135 xmlParserInputPtr input;
6136
6137 if (RAW == '%') {
6138 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006139 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006140 if (name == NULL) {
6141 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6142 "xmlParsePEReference: no name\n");
6143 } else {
6144 if (RAW == ';') {
6145 NEXT;
6146 if ((ctxt->sax != NULL) &&
6147 (ctxt->sax->getParameterEntity != NULL))
6148 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6149 name);
6150 if (entity == NULL) {
6151 /*
6152 * [ WFC: Entity Declared ]
6153 * In a document without any DTD, a document with only an
6154 * internal DTD subset which contains no parameter entity
6155 * references, or a document with "standalone='yes'", ...
6156 * ... The declaration of a parameter entity must precede
6157 * any reference to it...
6158 */
6159 if ((ctxt->standalone == 1) ||
6160 ((ctxt->hasExternalSubset == 0) &&
6161 (ctxt->hasPErefs == 0))) {
6162 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6163 "PEReference: %%%s; not found\n",
6164 name);
6165 } else {
6166 /*
6167 * [ VC: Entity Declared ]
6168 * In a document with an external subset or external
6169 * parameter entities with "standalone='no'", ...
6170 * ... The declaration of a parameter entity must
6171 * precede any reference to it...
6172 */
6173 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6174 "PEReference: %%%s; not found\n",
6175 name, NULL);
6176 ctxt->valid = 0;
6177 }
6178 } else {
6179 /*
6180 * Internal checking in case the entity quest barfed
6181 */
6182 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6183 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6184 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6185 "Internal: %%%s; is not a parameter entity\n",
6186 name, NULL);
6187 } else if (ctxt->input->free != deallocblankswrapper) {
6188 input =
6189 xmlNewBlanksWrapperInputStream(ctxt, entity);
6190 xmlPushInput(ctxt, input);
6191 } else {
6192 /*
6193 * TODO !!!
6194 * handle the extra spaces added before and after
6195 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6196 */
6197 input = xmlNewEntityInputStream(ctxt, entity);
6198 xmlPushInput(ctxt, input);
6199 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006200 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006201 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006202 xmlParseTextDecl(ctxt);
6203 if (ctxt->errNo ==
6204 XML_ERR_UNSUPPORTED_ENCODING) {
6205 /*
6206 * The XML REC instructs us to stop parsing
6207 * right here
6208 */
6209 ctxt->instate = XML_PARSER_EOF;
6210 return;
6211 }
6212 }
6213 }
6214 }
6215 ctxt->hasPErefs = 1;
6216 } else {
6217 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6218 }
6219 }
Owen Taylor3473f882001-02-23 17:55:21 +00006220 }
6221}
6222
6223/**
6224 * xmlParseStringPEReference:
6225 * @ctxt: an XML parser context
6226 * @str: a pointer to an index in the string
6227 *
6228 * parse PEReference declarations
6229 *
6230 * [69] PEReference ::= '%' Name ';'
6231 *
6232 * [ WFC: No Recursion ]
6233 * A parsed entity must not contain a recursive
6234 * reference to itself, either directly or indirectly.
6235 *
6236 * [ WFC: Entity Declared ]
6237 * In a document without any DTD, a document with only an internal DTD
6238 * subset which contains no parameter entity references, or a document
6239 * with "standalone='yes'", ... ... The declaration of a parameter
6240 * entity must precede any reference to it...
6241 *
6242 * [ VC: Entity Declared ]
6243 * In a document with an external subset or external parameter entities
6244 * with "standalone='no'", ... ... The declaration of a parameter entity
6245 * must precede any reference to it...
6246 *
6247 * [ WFC: In DTD ]
6248 * Parameter-entity references may only appear in the DTD.
6249 * NOTE: misleading but this is handled.
6250 *
6251 * Returns the string of the entity content.
6252 * str is updated to the current value of the index
6253 */
6254xmlEntityPtr
6255xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6256 const xmlChar *ptr;
6257 xmlChar cur;
6258 xmlChar *name;
6259 xmlEntityPtr entity = NULL;
6260
6261 if ((str == NULL) || (*str == NULL)) return(NULL);
6262 ptr = *str;
6263 cur = *ptr;
6264 if (cur == '%') {
6265 ptr++;
6266 cur = *ptr;
6267 name = xmlParseStringName(ctxt, &ptr);
6268 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006271 } else {
6272 cur = *ptr;
6273 if (cur == ';') {
6274 ptr++;
6275 cur = *ptr;
6276 if ((ctxt->sax != NULL) &&
6277 (ctxt->sax->getParameterEntity != NULL))
6278 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6279 name);
6280 if (entity == NULL) {
6281 /*
6282 * [ WFC: Entity Declared ]
6283 * In a document without any DTD, a document with only an
6284 * internal DTD subset which contains no parameter entity
6285 * references, or a document with "standalone='yes'", ...
6286 * ... The declaration of a parameter entity must precede
6287 * any reference to it...
6288 */
6289 if ((ctxt->standalone == 1) ||
6290 ((ctxt->hasExternalSubset == 0) &&
6291 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006292 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006293 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006294 } else {
6295 /*
6296 * [ VC: Entity Declared ]
6297 * In a document with an external subset or external
6298 * parameter entities with "standalone='no'", ...
6299 * ... The declaration of a parameter entity must
6300 * precede any reference to it...
6301 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006302 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6303 "PEReference: %%%s; not found\n",
6304 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 ctxt->valid = 0;
6306 }
6307 } else {
6308 /*
6309 * Internal checking in case the entity quest barfed
6310 */
6311 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6312 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006313 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6314 "%%%s; is not a parameter entity\n",
6315 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 }
6317 }
6318 ctxt->hasPErefs = 1;
6319 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006320 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006321 }
6322 xmlFree(name);
6323 }
6324 }
6325 *str = ptr;
6326 return(entity);
6327}
6328
6329/**
6330 * xmlParseDocTypeDecl:
6331 * @ctxt: an XML parser context
6332 *
6333 * parse a DOCTYPE declaration
6334 *
6335 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6336 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6337 *
6338 * [ VC: Root Element Type ]
6339 * The Name in the document type declaration must match the element
6340 * type of the root element.
6341 */
6342
6343void
6344xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006345 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006346 xmlChar *ExternalID = NULL;
6347 xmlChar *URI = NULL;
6348
6349 /*
6350 * We know that '<!DOCTYPE' has been detected.
6351 */
6352 SKIP(9);
6353
6354 SKIP_BLANKS;
6355
6356 /*
6357 * Parse the DOCTYPE name.
6358 */
6359 name = xmlParseName(ctxt);
6360 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6362 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006363 }
6364 ctxt->intSubName = name;
6365
6366 SKIP_BLANKS;
6367
6368 /*
6369 * Check for SystemID and ExternalID
6370 */
6371 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6372
6373 if ((URI != NULL) || (ExternalID != NULL)) {
6374 ctxt->hasExternalSubset = 1;
6375 }
6376 ctxt->extSubURI = URI;
6377 ctxt->extSubSystem = ExternalID;
6378
6379 SKIP_BLANKS;
6380
6381 /*
6382 * Create and update the internal subset.
6383 */
6384 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6385 (!ctxt->disableSAX))
6386 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6387
6388 /*
6389 * Is there any internal subset declarations ?
6390 * they are handled separately in xmlParseInternalSubset()
6391 */
6392 if (RAW == '[')
6393 return;
6394
6395 /*
6396 * We should be at the end of the DOCTYPE declaration.
6397 */
6398 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006399 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006400 }
6401 NEXT;
6402}
6403
6404/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006405 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006406 * @ctxt: an XML parser context
6407 *
6408 * parse the internal subset declaration
6409 *
6410 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6411 */
6412
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006413static void
Owen Taylor3473f882001-02-23 17:55:21 +00006414xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6415 /*
6416 * Is there any DTD definition ?
6417 */
6418 if (RAW == '[') {
6419 ctxt->instate = XML_PARSER_DTD;
6420 NEXT;
6421 /*
6422 * Parse the succession of Markup declarations and
6423 * PEReferences.
6424 * Subsequence (markupdecl | PEReference | S)*
6425 */
6426 while (RAW != ']') {
6427 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006428 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006429
6430 SKIP_BLANKS;
6431 xmlParseMarkupDecl(ctxt);
6432 xmlParsePEReference(ctxt);
6433
6434 /*
6435 * Pop-up of finished entities.
6436 */
6437 while ((RAW == 0) && (ctxt->inputNr > 1))
6438 xmlPopInput(ctxt);
6439
6440 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006441 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006442 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006443 break;
6444 }
6445 }
6446 if (RAW == ']') {
6447 NEXT;
6448 SKIP_BLANKS;
6449 }
6450 }
6451
6452 /*
6453 * We should be at the end of the DOCTYPE declaration.
6454 */
6455 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006456 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006457 }
6458 NEXT;
6459}
6460
Daniel Veillard81273902003-09-30 00:43:48 +00006461#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006462/**
6463 * xmlParseAttribute:
6464 * @ctxt: an XML parser context
6465 * @value: a xmlChar ** used to store the value of the attribute
6466 *
6467 * parse an attribute
6468 *
6469 * [41] Attribute ::= Name Eq AttValue
6470 *
6471 * [ WFC: No External Entity References ]
6472 * Attribute values cannot contain direct or indirect entity references
6473 * to external entities.
6474 *
6475 * [ WFC: No < in Attribute Values ]
6476 * The replacement text of any entity referred to directly or indirectly in
6477 * an attribute value (other than "&lt;") must not contain a <.
6478 *
6479 * [ VC: Attribute Value Type ]
6480 * The attribute must have been declared; the value must be of the type
6481 * declared for it.
6482 *
6483 * [25] Eq ::= S? '=' S?
6484 *
6485 * With namespace:
6486 *
6487 * [NS 11] Attribute ::= QName Eq AttValue
6488 *
6489 * Also the case QName == xmlns:??? is handled independently as a namespace
6490 * definition.
6491 *
6492 * Returns the attribute name, and the value in *value.
6493 */
6494
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006495const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006496xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006497 const xmlChar *name;
6498 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006499
6500 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006501 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006502 name = xmlParseName(ctxt);
6503 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006504 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006505 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006506 return(NULL);
6507 }
6508
6509 /*
6510 * read the value
6511 */
6512 SKIP_BLANKS;
6513 if (RAW == '=') {
6514 NEXT;
6515 SKIP_BLANKS;
6516 val = xmlParseAttValue(ctxt);
6517 ctxt->instate = XML_PARSER_CONTENT;
6518 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006519 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006520 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006521 return(NULL);
6522 }
6523
6524 /*
6525 * Check that xml:lang conforms to the specification
6526 * No more registered as an error, just generate a warning now
6527 * since this was deprecated in XML second edition
6528 */
6529 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6530 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006531 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6532 "Malformed value for xml:lang : %s\n",
6533 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 }
6535 }
6536
6537 /*
6538 * Check that xml:space conforms to the specification
6539 */
6540 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6541 if (xmlStrEqual(val, BAD_CAST "default"))
6542 *(ctxt->space) = 0;
6543 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6544 *(ctxt->space) = 1;
6545 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006546 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006547"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006548 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006549 }
6550 }
6551
6552 *value = val;
6553 return(name);
6554}
6555
6556/**
6557 * xmlParseStartTag:
6558 * @ctxt: an XML parser context
6559 *
6560 * parse a start of tag either for rule element or
6561 * EmptyElement. In both case we don't parse the tag closing chars.
6562 *
6563 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6564 *
6565 * [ WFC: Unique Att Spec ]
6566 * No attribute name may appear more than once in the same start-tag or
6567 * empty-element tag.
6568 *
6569 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6570 *
6571 * [ WFC: Unique Att Spec ]
6572 * No attribute name may appear more than once in the same start-tag or
6573 * empty-element tag.
6574 *
6575 * With namespace:
6576 *
6577 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6578 *
6579 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6580 *
6581 * Returns the element name parsed
6582 */
6583
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006584const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006585xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006586 const xmlChar *name;
6587 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006588 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006589 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006590 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006591 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006592 int i;
6593
6594 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006595 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006596
6597 name = xmlParseName(ctxt);
6598 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006600 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006601 return(NULL);
6602 }
6603
6604 /*
6605 * Now parse the attributes, it ends up with the ending
6606 *
6607 * (S Attribute)* S?
6608 */
6609 SKIP_BLANKS;
6610 GROW;
6611
Daniel Veillard21a0f912001-02-25 19:54:14 +00006612 while ((RAW != '>') &&
6613 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006614 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006615 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006616 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006617
6618 attname = xmlParseAttribute(ctxt, &attvalue);
6619 if ((attname != NULL) && (attvalue != NULL)) {
6620 /*
6621 * [ WFC: Unique Att Spec ]
6622 * No attribute name may appear more than once in the same
6623 * start-tag or empty-element tag.
6624 */
6625 for (i = 0; i < nbatts;i += 2) {
6626 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006627 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006628 xmlFree(attvalue);
6629 goto failed;
6630 }
6631 }
Owen Taylor3473f882001-02-23 17:55:21 +00006632 /*
6633 * Add the pair to atts
6634 */
6635 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006636 maxatts = 22; /* allow for 10 attrs by default */
6637 atts = (const xmlChar **)
6638 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006639 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006640 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006641 if (attvalue != NULL)
6642 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006643 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006644 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006645 ctxt->atts = atts;
6646 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006647 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006648 const xmlChar **n;
6649
Owen Taylor3473f882001-02-23 17:55:21 +00006650 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006651 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006652 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006653 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006654 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006655 if (attvalue != NULL)
6656 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006657 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006659 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006660 ctxt->atts = atts;
6661 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006662 }
6663 atts[nbatts++] = attname;
6664 atts[nbatts++] = attvalue;
6665 atts[nbatts] = NULL;
6666 atts[nbatts + 1] = NULL;
6667 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006668 if (attvalue != NULL)
6669 xmlFree(attvalue);
6670 }
6671
6672failed:
6673
Daniel Veillard3772de32002-12-17 10:31:45 +00006674 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006675 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6676 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006677 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6679 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
6681 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006682 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6683 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006684 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6685 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006686 break;
6687 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006688 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006689 GROW;
6690 }
6691
6692 /*
6693 * SAX: Start of Element !
6694 */
6695 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006696 (!ctxt->disableSAX)) {
6697 if (nbatts > 0)
6698 ctxt->sax->startElement(ctxt->userData, name, atts);
6699 else
6700 ctxt->sax->startElement(ctxt->userData, name, NULL);
6701 }
Owen Taylor3473f882001-02-23 17:55:21 +00006702
6703 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006704 /* Free only the content strings */
6705 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006706 if (atts[i] != NULL)
6707 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006708 }
6709 return(name);
6710}
6711
6712/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006713 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006714 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006715 * @line: line of the start tag
6716 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006717 *
6718 * parse an end of tag
6719 *
6720 * [42] ETag ::= '</' Name S? '>'
6721 *
6722 * With namespace
6723 *
6724 * [NS 9] ETag ::= '</' QName S? '>'
6725 */
6726
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006727static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006728xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006729 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006730
6731 GROW;
6732 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006733 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006734 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006735 return;
6736 }
6737 SKIP(2);
6738
Daniel Veillard46de64e2002-05-29 08:21:33 +00006739 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006740
6741 /*
6742 * We should definitely be at the ending "S? '>'" part
6743 */
6744 GROW;
6745 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006746 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006747 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006748 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006749 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006750
6751 /*
6752 * [ WFC: Element Type Match ]
6753 * The Name in an element's end-tag must match the element type in the
6754 * start-tag.
6755 *
6756 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006757 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006758 if (name == NULL) name = BAD_CAST "unparseable";
6759 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006760 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006761 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006762 }
6763
6764 /*
6765 * SAX: End of Tag
6766 */
6767 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6768 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006769 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006770
Daniel Veillarde57ec792003-09-10 10:50:59 +00006771 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006772 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006773 return;
6774}
6775
6776/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006777 * xmlParseEndTag:
6778 * @ctxt: an XML parser context
6779 *
6780 * parse an end of tag
6781 *
6782 * [42] ETag ::= '</' Name S? '>'
6783 *
6784 * With namespace
6785 *
6786 * [NS 9] ETag ::= '</' QName S? '>'
6787 */
6788
6789void
6790xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006791 xmlParseEndTag1(ctxt, 0);
6792}
Daniel Veillard81273902003-09-30 00:43:48 +00006793#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006794
6795/************************************************************************
6796 * *
6797 * SAX 2 specific operations *
6798 * *
6799 ************************************************************************/
6800
6801static const xmlChar *
6802xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6803 int len = 0, l;
6804 int c;
6805 int count = 0;
6806
6807 /*
6808 * Handler for more complex cases
6809 */
6810 GROW;
6811 c = CUR_CHAR(l);
6812 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006813 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006814 return(NULL);
6815 }
6816
6817 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006818 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006819 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006820 (IS_COMBINING(c)) ||
6821 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006822 if (count++ > 100) {
6823 count = 0;
6824 GROW;
6825 }
6826 len += l;
6827 NEXTL(l);
6828 c = CUR_CHAR(l);
6829 }
6830 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6831}
6832
6833/*
6834 * xmlGetNamespace:
6835 * @ctxt: an XML parser context
6836 * @prefix: the prefix to lookup
6837 *
6838 * Lookup the namespace name for the @prefix (which ca be NULL)
6839 * The prefix must come from the @ctxt->dict dictionnary
6840 *
6841 * Returns the namespace name or NULL if not bound
6842 */
6843static const xmlChar *
6844xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6845 int i;
6846
Daniel Veillarde57ec792003-09-10 10:50:59 +00006847 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006848 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006849 if (ctxt->nsTab[i] == prefix) {
6850 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6851 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006852 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006853 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006854 return(NULL);
6855}
6856
6857/**
6858 * xmlParseNCName:
6859 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006860 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006861 *
6862 * parse an XML name.
6863 *
6864 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6865 * CombiningChar | Extender
6866 *
6867 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6868 *
6869 * Returns the Name parsed or NULL
6870 */
6871
6872static const xmlChar *
6873xmlParseNCName(xmlParserCtxtPtr ctxt) {
6874 const xmlChar *in;
6875 const xmlChar *ret;
6876 int count = 0;
6877
6878 /*
6879 * Accelerator for simple ASCII names
6880 */
6881 in = ctxt->input->cur;
6882 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6883 ((*in >= 0x41) && (*in <= 0x5A)) ||
6884 (*in == '_')) {
6885 in++;
6886 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6887 ((*in >= 0x41) && (*in <= 0x5A)) ||
6888 ((*in >= 0x30) && (*in <= 0x39)) ||
6889 (*in == '_') || (*in == '-') ||
6890 (*in == '.'))
6891 in++;
6892 if ((*in > 0) && (*in < 0x80)) {
6893 count = in - ctxt->input->cur;
6894 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6895 ctxt->input->cur = in;
6896 ctxt->nbChars += count;
6897 ctxt->input->col += count;
6898 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006899 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006900 }
6901 return(ret);
6902 }
6903 }
6904 return(xmlParseNCNameComplex(ctxt));
6905}
6906
6907/**
6908 * xmlParseQName:
6909 * @ctxt: an XML parser context
6910 * @prefix: pointer to store the prefix part
6911 *
6912 * parse an XML Namespace QName
6913 *
6914 * [6] QName ::= (Prefix ':')? LocalPart
6915 * [7] Prefix ::= NCName
6916 * [8] LocalPart ::= NCName
6917 *
6918 * Returns the Name parsed or NULL
6919 */
6920
6921static const xmlChar *
6922xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6923 const xmlChar *l, *p;
6924
6925 GROW;
6926
6927 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006928 if (l == NULL) {
6929 if (CUR == ':') {
6930 l = xmlParseName(ctxt);
6931 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006932 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6933 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006934 *prefix = NULL;
6935 return(l);
6936 }
6937 }
6938 return(NULL);
6939 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006940 if (CUR == ':') {
6941 NEXT;
6942 p = l;
6943 l = xmlParseNCName(ctxt);
6944 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006945 xmlChar *tmp;
6946
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006947 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6948 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006949 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6950 p = xmlDictLookup(ctxt->dict, tmp, -1);
6951 if (tmp != NULL) xmlFree(tmp);
6952 *prefix = NULL;
6953 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006954 }
6955 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006956 xmlChar *tmp;
6957
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006958 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6959 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006960 NEXT;
6961 tmp = (xmlChar *) xmlParseName(ctxt);
6962 if (tmp != NULL) {
6963 tmp = xmlBuildQName(tmp, l, NULL, 0);
6964 l = xmlDictLookup(ctxt->dict, tmp, -1);
6965 if (tmp != NULL) xmlFree(tmp);
6966 *prefix = p;
6967 return(l);
6968 }
6969 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6970 l = xmlDictLookup(ctxt->dict, tmp, -1);
6971 if (tmp != NULL) xmlFree(tmp);
6972 *prefix = p;
6973 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006974 }
6975 *prefix = p;
6976 } else
6977 *prefix = NULL;
6978 return(l);
6979}
6980
6981/**
6982 * xmlParseQNameAndCompare:
6983 * @ctxt: an XML parser context
6984 * @name: the localname
6985 * @prefix: the prefix, if any.
6986 *
6987 * parse an XML name and compares for match
6988 * (specialized for endtag parsing)
6989 *
6990 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6991 * and the name for mismatch
6992 */
6993
6994static const xmlChar *
6995xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6996 xmlChar const *prefix) {
6997 const xmlChar *cmp = name;
6998 const xmlChar *in;
6999 const xmlChar *ret;
7000 const xmlChar *prefix2;
7001
7002 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7003
7004 GROW;
7005 in = ctxt->input->cur;
7006
7007 cmp = prefix;
7008 while (*in != 0 && *in == *cmp) {
7009 ++in;
7010 ++cmp;
7011 }
7012 if ((*cmp == 0) && (*in == ':')) {
7013 in++;
7014 cmp = name;
7015 while (*in != 0 && *in == *cmp) {
7016 ++in;
7017 ++cmp;
7018 }
William M. Brack76e95df2003-10-18 16:20:14 +00007019 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007020 /* success */
7021 ctxt->input->cur = in;
7022 return((const xmlChar*) 1);
7023 }
7024 }
7025 /*
7026 * all strings coms from the dictionary, equality can be done directly
7027 */
7028 ret = xmlParseQName (ctxt, &prefix2);
7029 if ((ret == name) && (prefix == prefix2))
7030 return((const xmlChar*) 1);
7031 return ret;
7032}
7033
7034/**
7035 * xmlParseAttValueInternal:
7036 * @ctxt: an XML parser context
7037 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007038 * @alloc: whether the attribute was reallocated as a new string
7039 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007040 *
7041 * parse a value for an attribute.
7042 * NOTE: if no normalization is needed, the routine will return pointers
7043 * directly from the data buffer.
7044 *
7045 * 3.3.3 Attribute-Value Normalization:
7046 * Before the value of an attribute is passed to the application or
7047 * checked for validity, the XML processor must normalize it as follows:
7048 * - a character reference is processed by appending the referenced
7049 * character to the attribute value
7050 * - an entity reference is processed by recursively processing the
7051 * replacement text of the entity
7052 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7053 * appending #x20 to the normalized value, except that only a single
7054 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7055 * parsed entity or the literal entity value of an internal parsed entity
7056 * - other characters are processed by appending them to the normalized value
7057 * If the declared value is not CDATA, then the XML processor must further
7058 * process the normalized attribute value by discarding any leading and
7059 * trailing space (#x20) characters, and by replacing sequences of space
7060 * (#x20) characters by a single space (#x20) character.
7061 * All attributes for which no declaration has been read should be treated
7062 * by a non-validating parser as if declared CDATA.
7063 *
7064 * Returns the AttValue parsed or NULL. The value has to be freed by the
7065 * caller if it was copied, this can be detected by val[*len] == 0.
7066 */
7067
7068static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007069xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7070 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007071{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007072 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007073 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007074 xmlChar *ret = NULL;
7075
7076 GROW;
7077 in = (xmlChar *) CUR_PTR;
7078 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007079 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007080 return (NULL);
7081 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007082 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007083
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007084 /*
7085 * try to handle in this routine the most common case where no
7086 * allocation of a new string is required and where content is
7087 * pure ASCII.
7088 */
7089 limit = *in++;
7090 end = ctxt->input->end;
7091 start = in;
7092 if (in >= end) {
7093 const xmlChar *oldbase = ctxt->input->base;
7094 GROW;
7095 if (oldbase != ctxt->input->base) {
7096 long delta = ctxt->input->base - oldbase;
7097 start = start + delta;
7098 in = in + delta;
7099 }
7100 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007101 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007102 if (normalize) {
7103 /*
7104 * Skip any leading spaces
7105 */
7106 while ((in < end) && (*in != limit) &&
7107 ((*in == 0x20) || (*in == 0x9) ||
7108 (*in == 0xA) || (*in == 0xD))) {
7109 in++;
7110 start = in;
7111 if (in >= end) {
7112 const xmlChar *oldbase = ctxt->input->base;
7113 GROW;
7114 if (oldbase != ctxt->input->base) {
7115 long delta = ctxt->input->base - oldbase;
7116 start = start + delta;
7117 in = in + delta;
7118 }
7119 end = ctxt->input->end;
7120 }
7121 }
7122 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7123 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7124 if ((*in++ == 0x20) && (*in == 0x20)) break;
7125 if (in >= end) {
7126 const xmlChar *oldbase = ctxt->input->base;
7127 GROW;
7128 if (oldbase != ctxt->input->base) {
7129 long delta = ctxt->input->base - oldbase;
7130 start = start + delta;
7131 in = in + delta;
7132 }
7133 end = ctxt->input->end;
7134 }
7135 }
7136 last = in;
7137 /*
7138 * skip the trailing blanks
7139 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007140 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007141 while ((in < end) && (*in != limit) &&
7142 ((*in == 0x20) || (*in == 0x9) ||
7143 (*in == 0xA) || (*in == 0xD))) {
7144 in++;
7145 if (in >= end) {
7146 const xmlChar *oldbase = ctxt->input->base;
7147 GROW;
7148 if (oldbase != ctxt->input->base) {
7149 long delta = ctxt->input->base - oldbase;
7150 start = start + delta;
7151 in = in + delta;
7152 last = last + delta;
7153 }
7154 end = ctxt->input->end;
7155 }
7156 }
7157 if (*in != limit) goto need_complex;
7158 } else {
7159 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7160 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7161 in++;
7162 if (in >= end) {
7163 const xmlChar *oldbase = ctxt->input->base;
7164 GROW;
7165 if (oldbase != ctxt->input->base) {
7166 long delta = ctxt->input->base - oldbase;
7167 start = start + delta;
7168 in = in + delta;
7169 }
7170 end = ctxt->input->end;
7171 }
7172 }
7173 last = in;
7174 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007175 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007176 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007177 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007178 *len = last - start;
7179 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007180 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007181 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007182 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007183 }
7184 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007185 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007186 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007187need_complex:
7188 if (alloc) *alloc = 1;
7189 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007190}
7191
7192/**
7193 * xmlParseAttribute2:
7194 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007195 * @pref: the element prefix
7196 * @elem: the element name
7197 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007199 * @len: an int * to save the length of the attribute
7200 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007201 *
7202 * parse an attribute in the new SAX2 framework.
7203 *
7204 * Returns the attribute name, and the value in *value, .
7205 */
7206
7207static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007208xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7209 const xmlChar *pref, const xmlChar *elem,
7210 const xmlChar **prefix, xmlChar **value,
7211 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007212 const xmlChar *name;
7213 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007214 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007215
7216 *value = NULL;
7217 GROW;
7218 name = xmlParseQName(ctxt, prefix);
7219 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7221 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007222 return(NULL);
7223 }
7224
7225 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007226 * get the type if needed
7227 */
7228 if (ctxt->attsSpecial != NULL) {
7229 int type;
7230
7231 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7232 pref, elem, *prefix, name);
7233 if (type != 0) normalize = 1;
7234 }
7235
7236 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007237 * read the value
7238 */
7239 SKIP_BLANKS;
7240 if (RAW == '=') {
7241 NEXT;
7242 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007243 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244 ctxt->instate = XML_PARSER_CONTENT;
7245 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007246 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007247 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248 return(NULL);
7249 }
7250
7251 /*
7252 * Check that xml:lang conforms to the specification
7253 * No more registered as an error, just generate a warning now
7254 * since this was deprecated in XML second edition
7255 */
7256 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7257 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007258 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7259 "Malformed value for xml:lang : %s\n",
7260 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007261 }
7262 }
7263
7264 /*
7265 * Check that xml:space conforms to the specification
7266 */
7267 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7268 if (xmlStrEqual(val, BAD_CAST "default"))
7269 *(ctxt->space) = 0;
7270 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7271 *(ctxt->space) = 1;
7272 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007273 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7275 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007276 }
7277 }
7278
7279 *value = val;
7280 return(name);
7281}
7282
7283/**
7284 * xmlParseStartTag2:
7285 * @ctxt: an XML parser context
7286 *
7287 * parse a start of tag either for rule element or
7288 * EmptyElement. In both case we don't parse the tag closing chars.
7289 * This routine is called when running SAX2 parsing
7290 *
7291 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7292 *
7293 * [ WFC: Unique Att Spec ]
7294 * No attribute name may appear more than once in the same start-tag or
7295 * empty-element tag.
7296 *
7297 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7298 *
7299 * [ WFC: Unique Att Spec ]
7300 * No attribute name may appear more than once in the same start-tag or
7301 * empty-element tag.
7302 *
7303 * With namespace:
7304 *
7305 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7306 *
7307 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7308 *
7309 * Returns the element name parsed
7310 */
7311
7312static const xmlChar *
7313xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007314 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 const xmlChar *localname;
7316 const xmlChar *prefix;
7317 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007318 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007319 const xmlChar *nsname;
7320 xmlChar *attvalue;
7321 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007322 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007323 int nratts, nbatts, nbdef;
7324 int i, j, nbNs, attval;
7325 const xmlChar *base;
7326 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007327
7328 if (RAW != '<') return(NULL);
7329 NEXT1;
7330
7331 /*
7332 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7333 * point since the attribute values may be stored as pointers to
7334 * the buffer and calling SHRINK would destroy them !
7335 * The Shrinking is only possible once the full set of attribute
7336 * callbacks have been done.
7337 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007338reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007339 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007340 base = ctxt->input->base;
7341 cur = ctxt->input->cur - ctxt->input->base;
7342 nbatts = 0;
7343 nratts = 0;
7344 nbdef = 0;
7345 nbNs = 0;
7346 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007347
7348 localname = xmlParseQName(ctxt, &prefix);
7349 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007350 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7351 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007352 return(NULL);
7353 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007354 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007355
7356 /*
7357 * Now parse the attributes, it ends up with the ending
7358 *
7359 * (S Attribute)* S?
7360 */
7361 SKIP_BLANKS;
7362 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007363 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007364
7365 while ((RAW != '>') &&
7366 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007367 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 const xmlChar *q = CUR_PTR;
7369 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007370 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007372 attname = xmlParseAttribute2(ctxt, prefix, localname,
7373 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007374 if ((attname != NULL) && (attvalue != NULL)) {
7375 if (len < 0) len = xmlStrlen(attvalue);
7376 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007377 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7378 xmlURIPtr uri;
7379
7380 if (*URL != 0) {
7381 uri = xmlParseURI((const char *) URL);
7382 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007383 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7384 "xmlns: %s not a valid URI\n",
7385 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 } else {
7387 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007388 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7389 "xmlns: URI %s is not absolute\n",
7390 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007391 }
7392 xmlFreeURI(uri);
7393 }
7394 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007395 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007396 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007397 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007398 for (j = 1;j <= nbNs;j++)
7399 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7400 break;
7401 if (j <= nbNs)
7402 xmlErrAttributeDup(ctxt, NULL, attname);
7403 else
7404 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007405 if (alloc != 0) xmlFree(attvalue);
7406 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007407 continue;
7408 }
7409 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007410 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7411 xmlURIPtr uri;
7412
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007413 if (attname == ctxt->str_xml) {
7414 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007415 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7416 "xml namespace prefix mapped to wrong URI\n",
7417 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007418 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007419 /*
7420 * Do not keep a namespace definition node
7421 */
7422 if (alloc != 0) xmlFree(attvalue);
7423 SKIP_BLANKS;
7424 continue;
7425 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007426 uri = xmlParseURI((const char *) URL);
7427 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007428 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7429 "xmlns:%s: '%s' is not a valid URI\n",
7430 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007431 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007432 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007433 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7434 "xmlns:%s: URI %s is not absolute\n",
7435 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007436 }
7437 xmlFreeURI(uri);
7438 }
7439
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007441 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007443 for (j = 1;j <= nbNs;j++)
7444 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7445 break;
7446 if (j <= nbNs)
7447 xmlErrAttributeDup(ctxt, aprefix, attname);
7448 else
7449 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007450 if (alloc != 0) xmlFree(attvalue);
7451 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007452 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007453 continue;
7454 }
7455
7456 /*
7457 * Add the pair to atts
7458 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007459 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7460 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 if (attvalue[len] == 0)
7462 xmlFree(attvalue);
7463 goto failed;
7464 }
7465 maxatts = ctxt->maxatts;
7466 atts = ctxt->atts;
7467 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007468 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007469 atts[nbatts++] = attname;
7470 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007471 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 atts[nbatts++] = attvalue;
7473 attvalue += len;
7474 atts[nbatts++] = attvalue;
7475 /*
7476 * tag if some deallocation is needed
7477 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007478 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 } else {
7480 if ((attvalue != NULL) && (attvalue[len] == 0))
7481 xmlFree(attvalue);
7482 }
7483
7484failed:
7485
7486 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007487 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7489 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007490 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007491 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7492 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007493 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007494 }
7495 SKIP_BLANKS;
7496 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7497 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007499 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007500 break;
7501 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007502 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007503 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504 }
7505
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007507 * The attributes defaulting
7508 */
7509 if (ctxt->attsDefault != NULL) {
7510 xmlDefAttrsPtr defaults;
7511
7512 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7513 if (defaults != NULL) {
7514 for (i = 0;i < defaults->nbAttrs;i++) {
7515 attname = defaults->values[4 * i];
7516 aprefix = defaults->values[4 * i + 1];
7517
7518 /*
7519 * special work for namespaces defaulted defs
7520 */
7521 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7522 /*
7523 * check that it's not a defined namespace
7524 */
7525 for (j = 1;j <= nbNs;j++)
7526 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7527 break;
7528 if (j <= nbNs) continue;
7529
7530 nsname = xmlGetNamespace(ctxt, NULL);
7531 if (nsname != defaults->values[4 * i + 2]) {
7532 if (nsPush(ctxt, NULL,
7533 defaults->values[4 * i + 2]) > 0)
7534 nbNs++;
7535 }
7536 } else if (aprefix == ctxt->str_xmlns) {
7537 /*
7538 * check that it's not a defined namespace
7539 */
7540 for (j = 1;j <= nbNs;j++)
7541 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7542 break;
7543 if (j <= nbNs) continue;
7544
7545 nsname = xmlGetNamespace(ctxt, attname);
7546 if (nsname != defaults->values[2]) {
7547 if (nsPush(ctxt, attname,
7548 defaults->values[4 * i + 2]) > 0)
7549 nbNs++;
7550 }
7551 } else {
7552 /*
7553 * check that it's not a defined attribute
7554 */
7555 for (j = 0;j < nbatts;j+=5) {
7556 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7557 break;
7558 }
7559 if (j < nbatts) continue;
7560
7561 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7562 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007563 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007564 }
7565 maxatts = ctxt->maxatts;
7566 atts = ctxt->atts;
7567 }
7568 atts[nbatts++] = attname;
7569 atts[nbatts++] = aprefix;
7570 if (aprefix == NULL)
7571 atts[nbatts++] = NULL;
7572 else
7573 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7574 atts[nbatts++] = defaults->values[4 * i + 2];
7575 atts[nbatts++] = defaults->values[4 * i + 3];
7576 nbdef++;
7577 }
7578 }
7579 }
7580 }
7581
Daniel Veillarde70c8772003-11-25 07:21:18 +00007582 /*
7583 * The attributes checkings
7584 */
7585 for (i = 0; i < nbatts;i += 5) {
7586 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7587 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7588 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7589 "Namespace prefix %s for %s on %s is not defined\n",
7590 atts[i + 1], atts[i], localname);
7591 }
7592 atts[i + 2] = nsname;
7593 /*
7594 * [ WFC: Unique Att Spec ]
7595 * No attribute name may appear more than once in the same
7596 * start-tag or empty-element tag.
7597 * As extended by the Namespace in XML REC.
7598 */
7599 for (j = 0; j < i;j += 5) {
7600 if (atts[i] == atts[j]) {
7601 if (atts[i+1] == atts[j+1]) {
7602 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7603 break;
7604 }
7605 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7606 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7607 "Namespaced Attribute %s in '%s' redefined\n",
7608 atts[i], nsname, NULL);
7609 break;
7610 }
7611 }
7612 }
7613 }
7614
Daniel Veillarde57ec792003-09-10 10:50:59 +00007615 nsname = xmlGetNamespace(ctxt, prefix);
7616 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007617 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7618 "Namespace prefix %s on %s is not defined\n",
7619 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007620 }
7621 *pref = prefix;
7622 *URI = nsname;
7623
7624 /*
7625 * SAX: Start of Element !
7626 */
7627 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7628 (!ctxt->disableSAX)) {
7629 if (nbNs > 0)
7630 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7631 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7632 nbatts / 5, nbdef, atts);
7633 else
7634 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7635 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7636 }
7637
7638 /*
7639 * Free up attribute allocated strings if needed
7640 */
7641 if (attval != 0) {
7642 for (i = 3,j = 0; j < nratts;i += 5,j++)
7643 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7644 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 }
7646
7647 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007648
7649base_changed:
7650 /*
7651 * the attribute strings are valid iif the base didn't changed
7652 */
7653 if (attval != 0) {
7654 for (i = 3,j = 0; j < nratts;i += 5,j++)
7655 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7656 xmlFree((xmlChar *) atts[i]);
7657 }
7658 ctxt->input->cur = ctxt->input->base + cur;
7659 if (ctxt->wellFormed == 1) {
7660 goto reparse;
7661 }
7662 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663}
7664
7665/**
7666 * xmlParseEndTag2:
7667 * @ctxt: an XML parser context
7668 * @line: line of the start tag
7669 * @nsNr: number of namespaces on the start tag
7670 *
7671 * parse an end of tag
7672 *
7673 * [42] ETag ::= '</' Name S? '>'
7674 *
7675 * With namespace
7676 *
7677 * [NS 9] ETag ::= '</' QName S? '>'
7678 */
7679
7680static void
7681xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007682 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007683 const xmlChar *name;
7684
7685 GROW;
7686 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007687 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688 return;
7689 }
7690 SKIP(2);
7691
William M. Brack13dfa872004-09-18 04:52:08 +00007692 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007693 if (ctxt->input->cur[tlen] == '>') {
7694 ctxt->input->cur += tlen + 1;
7695 goto done;
7696 }
7697 ctxt->input->cur += tlen;
7698 name = (xmlChar*)1;
7699 } else {
7700 if (prefix == NULL)
7701 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7702 else
7703 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7704 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007705
7706 /*
7707 * We should definitely be at the ending "S? '>'" part
7708 */
7709 GROW;
7710 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007711 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007712 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007713 } else
7714 NEXT1;
7715
7716 /*
7717 * [ WFC: Element Type Match ]
7718 * The Name in an element's end-tag must match the element type in the
7719 * start-tag.
7720 *
7721 */
7722 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007723 if (name == NULL) name = BAD_CAST "unparseable";
7724 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007726 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 }
7728
7729 /*
7730 * SAX: End of Tag
7731 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007732done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7734 (!ctxt->disableSAX))
7735 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7736
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 spacePop(ctxt);
7738 if (nsNr != 0)
7739 nsPop(ctxt, nsNr);
7740 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007741}
7742
7743/**
Owen Taylor3473f882001-02-23 17:55:21 +00007744 * xmlParseCDSect:
7745 * @ctxt: an XML parser context
7746 *
7747 * Parse escaped pure raw content.
7748 *
7749 * [18] CDSect ::= CDStart CData CDEnd
7750 *
7751 * [19] CDStart ::= '<![CDATA['
7752 *
7753 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7754 *
7755 * [21] CDEnd ::= ']]>'
7756 */
7757void
7758xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7759 xmlChar *buf = NULL;
7760 int len = 0;
7761 int size = XML_PARSER_BUFFER_SIZE;
7762 int r, rl;
7763 int s, sl;
7764 int cur, l;
7765 int count = 0;
7766
Daniel Veillard8f597c32003-10-06 08:19:27 +00007767 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007768 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007769 SKIP(9);
7770 } else
7771 return;
7772
7773 ctxt->instate = XML_PARSER_CDATA_SECTION;
7774 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007775 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007776 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007777 ctxt->instate = XML_PARSER_CONTENT;
7778 return;
7779 }
7780 NEXTL(rl);
7781 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007782 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007783 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007784 ctxt->instate = XML_PARSER_CONTENT;
7785 return;
7786 }
7787 NEXTL(sl);
7788 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007789 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007790 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007791 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007792 return;
7793 }
William M. Brack871611b2003-10-18 04:53:14 +00007794 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007795 ((r != ']') || (s != ']') || (cur != '>'))) {
7796 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007797 xmlChar *tmp;
7798
Owen Taylor3473f882001-02-23 17:55:21 +00007799 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007800 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7801 if (tmp == NULL) {
7802 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007803 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007804 return;
7805 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007806 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007807 }
7808 COPY_BUF(rl,buf,len,r);
7809 r = s;
7810 rl = sl;
7811 s = cur;
7812 sl = l;
7813 count++;
7814 if (count > 50) {
7815 GROW;
7816 count = 0;
7817 }
7818 NEXTL(l);
7819 cur = CUR_CHAR(l);
7820 }
7821 buf[len] = 0;
7822 ctxt->instate = XML_PARSER_CONTENT;
7823 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007824 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007825 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007826 xmlFree(buf);
7827 return;
7828 }
7829 NEXTL(l);
7830
7831 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007832 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007833 */
7834 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7835 if (ctxt->sax->cdataBlock != NULL)
7836 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007837 else if (ctxt->sax->characters != NULL)
7838 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007839 }
7840 xmlFree(buf);
7841}
7842
7843/**
7844 * xmlParseContent:
7845 * @ctxt: an XML parser context
7846 *
7847 * Parse a content:
7848 *
7849 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7850 */
7851
7852void
7853xmlParseContent(xmlParserCtxtPtr ctxt) {
7854 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007855 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007856 ((RAW != '<') || (NXT(1) != '/'))) {
7857 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007858 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007859 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007860
7861 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007862 * First case : a Processing Instruction.
7863 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007864 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007865 xmlParsePI(ctxt);
7866 }
7867
7868 /*
7869 * Second case : a CDSection
7870 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007871 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007872 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007873 xmlParseCDSect(ctxt);
7874 }
7875
7876 /*
7877 * Third case : a comment
7878 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007879 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007880 (NXT(2) == '-') && (NXT(3) == '-')) {
7881 xmlParseComment(ctxt);
7882 ctxt->instate = XML_PARSER_CONTENT;
7883 }
7884
7885 /*
7886 * Fourth case : a sub-element.
7887 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007888 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007889 xmlParseElement(ctxt);
7890 }
7891
7892 /*
7893 * Fifth case : a reference. If if has not been resolved,
7894 * parsing returns it's Name, create the node
7895 */
7896
Daniel Veillard21a0f912001-02-25 19:54:14 +00007897 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007898 xmlParseReference(ctxt);
7899 }
7900
7901 /*
7902 * Last case, text. Note that References are handled directly.
7903 */
7904 else {
7905 xmlParseCharData(ctxt, 0);
7906 }
7907
7908 GROW;
7909 /*
7910 * Pop-up of finished entities.
7911 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007912 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007913 xmlPopInput(ctxt);
7914 SHRINK;
7915
Daniel Veillardfdc91562002-07-01 21:52:03 +00007916 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007917 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7918 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007919 ctxt->instate = XML_PARSER_EOF;
7920 break;
7921 }
7922 }
7923}
7924
7925/**
7926 * xmlParseElement:
7927 * @ctxt: an XML parser context
7928 *
7929 * parse an XML element, this is highly recursive
7930 *
7931 * [39] element ::= EmptyElemTag | STag content ETag
7932 *
7933 * [ WFC: Element Type Match ]
7934 * The Name in an element's end-tag must match the element type in the
7935 * start-tag.
7936 *
Owen Taylor3473f882001-02-23 17:55:21 +00007937 */
7938
7939void
7940xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007941 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 const xmlChar *prefix;
7943 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007944 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007945 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007946 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007947 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007948
7949 /* Capture start position */
7950 if (ctxt->record_info) {
7951 node_info.begin_pos = ctxt->input->consumed +
7952 (CUR_PTR - ctxt->input->base);
7953 node_info.begin_line = ctxt->input->line;
7954 }
7955
7956 if (ctxt->spaceNr == 0)
7957 spacePush(ctxt, -1);
7958 else
7959 spacePush(ctxt, *ctxt->space);
7960
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007961 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007963 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007964#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007966#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007967 else
7968 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007969#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007970 if (name == NULL) {
7971 spacePop(ctxt);
7972 return;
7973 }
7974 namePush(ctxt, name);
7975 ret = ctxt->node;
7976
Daniel Veillard4432df22003-09-28 18:58:27 +00007977#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007978 /*
7979 * [ VC: Root Element Type ]
7980 * The Name in the document type declaration must match the element
7981 * type of the root element.
7982 */
7983 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7984 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7985 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007986#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007987
7988 /*
7989 * Check for an Empty Element.
7990 */
7991 if ((RAW == '/') && (NXT(1) == '>')) {
7992 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007993 if (ctxt->sax2) {
7994 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7995 (!ctxt->disableSAX))
7996 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007997#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 } else {
7999 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8000 (!ctxt->disableSAX))
8001 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008002#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008003 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008004 namePop(ctxt);
8005 spacePop(ctxt);
8006 if (nsNr != ctxt->nsNr)
8007 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008008 if ( ret != NULL && ctxt->record_info ) {
8009 node_info.end_pos = ctxt->input->consumed +
8010 (CUR_PTR - ctxt->input->base);
8011 node_info.end_line = ctxt->input->line;
8012 node_info.node = ret;
8013 xmlParserAddNodeInfo(ctxt, &node_info);
8014 }
8015 return;
8016 }
8017 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008018 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008019 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008020 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8021 "Couldn't find end of Start Tag %s line %d\n",
8022 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008023
8024 /*
8025 * end of parsing of this node.
8026 */
8027 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008028 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008029 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008030 if (nsNr != ctxt->nsNr)
8031 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008032
8033 /*
8034 * Capture end position and add node
8035 */
8036 if ( ret != NULL && ctxt->record_info ) {
8037 node_info.end_pos = ctxt->input->consumed +
8038 (CUR_PTR - ctxt->input->base);
8039 node_info.end_line = ctxt->input->line;
8040 node_info.node = ret;
8041 xmlParserAddNodeInfo(ctxt, &node_info);
8042 }
8043 return;
8044 }
8045
8046 /*
8047 * Parse the content of the element:
8048 */
8049 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008050 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008051 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008052 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008053 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008054
8055 /*
8056 * end of parsing of this node.
8057 */
8058 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008059 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008060 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008061 if (nsNr != ctxt->nsNr)
8062 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008063 return;
8064 }
8065
8066 /*
8067 * parse the end of tag: '</' should be here.
8068 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008069 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008070 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008071 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008072 }
8073#ifdef LIBXML_SAX1_ENABLED
8074 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008075 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008076#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008077
8078 /*
8079 * Capture end position and add node
8080 */
8081 if ( ret != NULL && ctxt->record_info ) {
8082 node_info.end_pos = ctxt->input->consumed +
8083 (CUR_PTR - ctxt->input->base);
8084 node_info.end_line = ctxt->input->line;
8085 node_info.node = ret;
8086 xmlParserAddNodeInfo(ctxt, &node_info);
8087 }
8088}
8089
8090/**
8091 * xmlParseVersionNum:
8092 * @ctxt: an XML parser context
8093 *
8094 * parse the XML version value.
8095 *
8096 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8097 *
8098 * Returns the string giving the XML version number, or NULL
8099 */
8100xmlChar *
8101xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8102 xmlChar *buf = NULL;
8103 int len = 0;
8104 int size = 10;
8105 xmlChar cur;
8106
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008107 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008108 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008109 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008110 return(NULL);
8111 }
8112 cur = CUR;
8113 while (((cur >= 'a') && (cur <= 'z')) ||
8114 ((cur >= 'A') && (cur <= 'Z')) ||
8115 ((cur >= '0') && (cur <= '9')) ||
8116 (cur == '_') || (cur == '.') ||
8117 (cur == ':') || (cur == '-')) {
8118 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008119 xmlChar *tmp;
8120
Owen Taylor3473f882001-02-23 17:55:21 +00008121 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008122 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8123 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008124 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008125 return(NULL);
8126 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008127 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008128 }
8129 buf[len++] = cur;
8130 NEXT;
8131 cur=CUR;
8132 }
8133 buf[len] = 0;
8134 return(buf);
8135}
8136
8137/**
8138 * xmlParseVersionInfo:
8139 * @ctxt: an XML parser context
8140 *
8141 * parse the XML version.
8142 *
8143 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8144 *
8145 * [25] Eq ::= S? '=' S?
8146 *
8147 * Returns the version string, e.g. "1.0"
8148 */
8149
8150xmlChar *
8151xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8152 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008153
Daniel Veillarda07050d2003-10-19 14:46:32 +00008154 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008155 SKIP(7);
8156 SKIP_BLANKS;
8157 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008158 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008159 return(NULL);
8160 }
8161 NEXT;
8162 SKIP_BLANKS;
8163 if (RAW == '"') {
8164 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008165 version = xmlParseVersionNum(ctxt);
8166 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008167 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008168 } else
8169 NEXT;
8170 } else if (RAW == '\''){
8171 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008172 version = xmlParseVersionNum(ctxt);
8173 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008174 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008175 } else
8176 NEXT;
8177 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008178 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008179 }
8180 }
8181 return(version);
8182}
8183
8184/**
8185 * xmlParseEncName:
8186 * @ctxt: an XML parser context
8187 *
8188 * parse the XML encoding name
8189 *
8190 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8191 *
8192 * Returns the encoding name value or NULL
8193 */
8194xmlChar *
8195xmlParseEncName(xmlParserCtxtPtr ctxt) {
8196 xmlChar *buf = NULL;
8197 int len = 0;
8198 int size = 10;
8199 xmlChar cur;
8200
8201 cur = CUR;
8202 if (((cur >= 'a') && (cur <= 'z')) ||
8203 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008204 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008205 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008206 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008207 return(NULL);
8208 }
8209
8210 buf[len++] = cur;
8211 NEXT;
8212 cur = CUR;
8213 while (((cur >= 'a') && (cur <= 'z')) ||
8214 ((cur >= 'A') && (cur <= 'Z')) ||
8215 ((cur >= '0') && (cur <= '9')) ||
8216 (cur == '.') || (cur == '_') ||
8217 (cur == '-')) {
8218 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008219 xmlChar *tmp;
8220
Owen Taylor3473f882001-02-23 17:55:21 +00008221 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008222 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8223 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008224 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008225 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008226 return(NULL);
8227 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008228 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008229 }
8230 buf[len++] = cur;
8231 NEXT;
8232 cur = CUR;
8233 if (cur == 0) {
8234 SHRINK;
8235 GROW;
8236 cur = CUR;
8237 }
8238 }
8239 buf[len] = 0;
8240 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008241 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008242 }
8243 return(buf);
8244}
8245
8246/**
8247 * xmlParseEncodingDecl:
8248 * @ctxt: an XML parser context
8249 *
8250 * parse the XML encoding declaration
8251 *
8252 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8253 *
8254 * this setups the conversion filters.
8255 *
8256 * Returns the encoding value or NULL
8257 */
8258
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008259const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008260xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8261 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008262
8263 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008264 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008265 SKIP(8);
8266 SKIP_BLANKS;
8267 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008268 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008269 return(NULL);
8270 }
8271 NEXT;
8272 SKIP_BLANKS;
8273 if (RAW == '"') {
8274 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008275 encoding = xmlParseEncName(ctxt);
8276 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008277 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008278 } else
8279 NEXT;
8280 } else if (RAW == '\''){
8281 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008282 encoding = xmlParseEncName(ctxt);
8283 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008284 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008285 } else
8286 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008287 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008288 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008289 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008290 /*
8291 * UTF-16 encoding stwich has already taken place at this stage,
8292 * more over the little-endian/big-endian selection is already done
8293 */
8294 if ((encoding != NULL) &&
8295 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8296 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008297 if (ctxt->encoding != NULL)
8298 xmlFree((xmlChar *) ctxt->encoding);
8299 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008300 }
8301 /*
8302 * UTF-8 encoding is handled natively
8303 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008304 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008305 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8306 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008307 if (ctxt->encoding != NULL)
8308 xmlFree((xmlChar *) ctxt->encoding);
8309 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008310 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008311 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008312 xmlCharEncodingHandlerPtr handler;
8313
8314 if (ctxt->input->encoding != NULL)
8315 xmlFree((xmlChar *) ctxt->input->encoding);
8316 ctxt->input->encoding = encoding;
8317
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008318 handler = xmlFindCharEncodingHandler((const char *) encoding);
8319 if (handler != NULL) {
8320 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008321 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008322 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008323 "Unsupported encoding %s\n", encoding);
8324 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008325 }
8326 }
8327 }
8328 return(encoding);
8329}
8330
8331/**
8332 * xmlParseSDDecl:
8333 * @ctxt: an XML parser context
8334 *
8335 * parse the XML standalone declaration
8336 *
8337 * [32] SDDecl ::= S 'standalone' Eq
8338 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8339 *
8340 * [ VC: Standalone Document Declaration ]
8341 * TODO The standalone document declaration must have the value "no"
8342 * if any external markup declarations contain declarations of:
8343 * - attributes with default values, if elements to which these
8344 * attributes apply appear in the document without specifications
8345 * of values for these attributes, or
8346 * - entities (other than amp, lt, gt, apos, quot), if references
8347 * to those entities appear in the document, or
8348 * - attributes with values subject to normalization, where the
8349 * attribute appears in the document with a value which will change
8350 * as a result of normalization, or
8351 * - element types with element content, if white space occurs directly
8352 * within any instance of those types.
8353 *
8354 * Returns 1 if standalone, 0 otherwise
8355 */
8356
8357int
8358xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8359 int standalone = -1;
8360
8361 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008362 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008363 SKIP(10);
8364 SKIP_BLANKS;
8365 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 return(standalone);
8368 }
8369 NEXT;
8370 SKIP_BLANKS;
8371 if (RAW == '\''){
8372 NEXT;
8373 if ((RAW == 'n') && (NXT(1) == 'o')) {
8374 standalone = 0;
8375 SKIP(2);
8376 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8377 (NXT(2) == 's')) {
8378 standalone = 1;
8379 SKIP(3);
8380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008381 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008382 }
8383 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008385 } else
8386 NEXT;
8387 } else if (RAW == '"'){
8388 NEXT;
8389 if ((RAW == 'n') && (NXT(1) == 'o')) {
8390 standalone = 0;
8391 SKIP(2);
8392 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8393 (NXT(2) == 's')) {
8394 standalone = 1;
8395 SKIP(3);
8396 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008397 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008398 }
8399 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 } else
8402 NEXT;
8403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 }
8406 }
8407 return(standalone);
8408}
8409
8410/**
8411 * xmlParseXMLDecl:
8412 * @ctxt: an XML parser context
8413 *
8414 * parse an XML declaration header
8415 *
8416 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8417 */
8418
8419void
8420xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8421 xmlChar *version;
8422
8423 /*
8424 * We know that '<?xml' is here.
8425 */
8426 SKIP(5);
8427
William M. Brack76e95df2003-10-18 16:20:14 +00008428 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8430 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008431 }
8432 SKIP_BLANKS;
8433
8434 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008435 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008436 */
8437 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008438 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008439 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008440 } else {
8441 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8442 /*
8443 * TODO: Blueberry should be detected here
8444 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008445 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8446 "Unsupported version '%s'\n",
8447 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008448 }
8449 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008450 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008451 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008452 }
Owen Taylor3473f882001-02-23 17:55:21 +00008453
8454 /*
8455 * We may have the encoding declaration
8456 */
William M. Brack76e95df2003-10-18 16:20:14 +00008457 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008458 if ((RAW == '?') && (NXT(1) == '>')) {
8459 SKIP(2);
8460 return;
8461 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008462 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008463 }
8464 xmlParseEncodingDecl(ctxt);
8465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8466 /*
8467 * The XML REC instructs us to stop parsing right here
8468 */
8469 return;
8470 }
8471
8472 /*
8473 * We may have the standalone status.
8474 */
William M. Brack76e95df2003-10-18 16:20:14 +00008475 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008476 if ((RAW == '?') && (NXT(1) == '>')) {
8477 SKIP(2);
8478 return;
8479 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008481 }
8482 SKIP_BLANKS;
8483 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8484
8485 SKIP_BLANKS;
8486 if ((RAW == '?') && (NXT(1) == '>')) {
8487 SKIP(2);
8488 } else if (RAW == '>') {
8489 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008490 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008491 NEXT;
8492 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008493 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008494 MOVETO_ENDTAG(CUR_PTR);
8495 NEXT;
8496 }
8497}
8498
8499/**
8500 * xmlParseMisc:
8501 * @ctxt: an XML parser context
8502 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008503 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008504 *
8505 * [27] Misc ::= Comment | PI | S
8506 */
8507
8508void
8509xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008510 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008511 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008512 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008513 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008514 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008515 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008516 NEXT;
8517 } else
8518 xmlParseComment(ctxt);
8519 }
8520}
8521
8522/**
8523 * xmlParseDocument:
8524 * @ctxt: an XML parser context
8525 *
8526 * parse an XML document (and build a tree if using the standard SAX
8527 * interface).
8528 *
8529 * [1] document ::= prolog element Misc*
8530 *
8531 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8532 *
8533 * Returns 0, -1 in case of error. the parser context is augmented
8534 * as a result of the parsing.
8535 */
8536
8537int
8538xmlParseDocument(xmlParserCtxtPtr ctxt) {
8539 xmlChar start[4];
8540 xmlCharEncoding enc;
8541
8542 xmlInitParser();
8543
8544 GROW;
8545
8546 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547 * SAX: detecting the level.
8548 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008549 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008550
8551 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008552 * SAX: beginning of the document processing.
8553 */
8554 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8555 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8556
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008557 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8558 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008559 /*
8560 * Get the 4 first bytes and decode the charset
8561 * if enc != XML_CHAR_ENCODING_NONE
8562 * plug some encoding conversion routines.
8563 */
8564 start[0] = RAW;
8565 start[1] = NXT(1);
8566 start[2] = NXT(2);
8567 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008568 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008569 if (enc != XML_CHAR_ENCODING_NONE) {
8570 xmlSwitchEncoding(ctxt, enc);
8571 }
Owen Taylor3473f882001-02-23 17:55:21 +00008572 }
8573
8574
8575 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008576 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008577 }
8578
8579 /*
8580 * Check for the XMLDecl in the Prolog.
8581 */
8582 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008583 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008584
8585 /*
8586 * Note that we will switch encoding on the fly.
8587 */
8588 xmlParseXMLDecl(ctxt);
8589 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8590 /*
8591 * The XML REC instructs us to stop parsing right here
8592 */
8593 return(-1);
8594 }
8595 ctxt->standalone = ctxt->input->standalone;
8596 SKIP_BLANKS;
8597 } else {
8598 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8599 }
8600 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8601 ctxt->sax->startDocument(ctxt->userData);
8602
8603 /*
8604 * The Misc part of the Prolog
8605 */
8606 GROW;
8607 xmlParseMisc(ctxt);
8608
8609 /*
8610 * Then possibly doc type declaration(s) and more Misc
8611 * (doctypedecl Misc*)?
8612 */
8613 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008614 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008615
8616 ctxt->inSubset = 1;
8617 xmlParseDocTypeDecl(ctxt);
8618 if (RAW == '[') {
8619 ctxt->instate = XML_PARSER_DTD;
8620 xmlParseInternalSubset(ctxt);
8621 }
8622
8623 /*
8624 * Create and update the external subset.
8625 */
8626 ctxt->inSubset = 2;
8627 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8628 (!ctxt->disableSAX))
8629 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8630 ctxt->extSubSystem, ctxt->extSubURI);
8631 ctxt->inSubset = 0;
8632
8633
8634 ctxt->instate = XML_PARSER_PROLOG;
8635 xmlParseMisc(ctxt);
8636 }
8637
8638 /*
8639 * Time to start parsing the tree itself
8640 */
8641 GROW;
8642 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008643 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8644 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008645 } else {
8646 ctxt->instate = XML_PARSER_CONTENT;
8647 xmlParseElement(ctxt);
8648 ctxt->instate = XML_PARSER_EPILOG;
8649
8650
8651 /*
8652 * The Misc part at the end
8653 */
8654 xmlParseMisc(ctxt);
8655
Daniel Veillard561b7f82002-03-20 21:55:57 +00008656 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008657 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008658 }
8659 ctxt->instate = XML_PARSER_EOF;
8660 }
8661
8662 /*
8663 * SAX: end of the document processing.
8664 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008665 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008666 ctxt->sax->endDocument(ctxt->userData);
8667
Daniel Veillard5997aca2002-03-18 18:36:20 +00008668 /*
8669 * Remove locally kept entity definitions if the tree was not built
8670 */
8671 if ((ctxt->myDoc != NULL) &&
8672 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8673 xmlFreeDoc(ctxt->myDoc);
8674 ctxt->myDoc = NULL;
8675 }
8676
Daniel Veillardc7612992002-02-17 22:47:37 +00008677 if (! ctxt->wellFormed) {
8678 ctxt->valid = 0;
8679 return(-1);
8680 }
Owen Taylor3473f882001-02-23 17:55:21 +00008681 return(0);
8682}
8683
8684/**
8685 * xmlParseExtParsedEnt:
8686 * @ctxt: an XML parser context
8687 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008688 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008689 * An external general parsed entity is well-formed if it matches the
8690 * production labeled extParsedEnt.
8691 *
8692 * [78] extParsedEnt ::= TextDecl? content
8693 *
8694 * Returns 0, -1 in case of error. the parser context is augmented
8695 * as a result of the parsing.
8696 */
8697
8698int
8699xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8700 xmlChar start[4];
8701 xmlCharEncoding enc;
8702
8703 xmlDefaultSAXHandlerInit();
8704
Daniel Veillard309f81d2003-09-23 09:02:53 +00008705 xmlDetectSAX2(ctxt);
8706
Owen Taylor3473f882001-02-23 17:55:21 +00008707 GROW;
8708
8709 /*
8710 * SAX: beginning of the document processing.
8711 */
8712 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8713 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8714
8715 /*
8716 * Get the 4 first bytes and decode the charset
8717 * if enc != XML_CHAR_ENCODING_NONE
8718 * plug some encoding conversion routines.
8719 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008720 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8721 start[0] = RAW;
8722 start[1] = NXT(1);
8723 start[2] = NXT(2);
8724 start[3] = NXT(3);
8725 enc = xmlDetectCharEncoding(start, 4);
8726 if (enc != XML_CHAR_ENCODING_NONE) {
8727 xmlSwitchEncoding(ctxt, enc);
8728 }
Owen Taylor3473f882001-02-23 17:55:21 +00008729 }
8730
8731
8732 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008733 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008734 }
8735
8736 /*
8737 * Check for the XMLDecl in the Prolog.
8738 */
8739 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008740 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008741
8742 /*
8743 * Note that we will switch encoding on the fly.
8744 */
8745 xmlParseXMLDecl(ctxt);
8746 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8747 /*
8748 * The XML REC instructs us to stop parsing right here
8749 */
8750 return(-1);
8751 }
8752 SKIP_BLANKS;
8753 } else {
8754 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8755 }
8756 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8757 ctxt->sax->startDocument(ctxt->userData);
8758
8759 /*
8760 * Doing validity checking on chunk doesn't make sense
8761 */
8762 ctxt->instate = XML_PARSER_CONTENT;
8763 ctxt->validate = 0;
8764 ctxt->loadsubset = 0;
8765 ctxt->depth = 0;
8766
8767 xmlParseContent(ctxt);
8768
8769 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008770 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008771 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008772 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008773 }
8774
8775 /*
8776 * SAX: end of the document processing.
8777 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008778 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008779 ctxt->sax->endDocument(ctxt->userData);
8780
8781 if (! ctxt->wellFormed) return(-1);
8782 return(0);
8783}
8784
Daniel Veillard73b013f2003-09-30 12:36:01 +00008785#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008786/************************************************************************
8787 * *
8788 * Progressive parsing interfaces *
8789 * *
8790 ************************************************************************/
8791
8792/**
8793 * xmlParseLookupSequence:
8794 * @ctxt: an XML parser context
8795 * @first: the first char to lookup
8796 * @next: the next char to lookup or zero
8797 * @third: the next char to lookup or zero
8798 *
8799 * Try to find if a sequence (first, next, third) or just (first next) or
8800 * (first) is available in the input stream.
8801 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8802 * to avoid rescanning sequences of bytes, it DOES change the state of the
8803 * parser, do not use liberally.
8804 *
8805 * Returns the index to the current parsing point if the full sequence
8806 * is available, -1 otherwise.
8807 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008808static int
Owen Taylor3473f882001-02-23 17:55:21 +00008809xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8810 xmlChar next, xmlChar third) {
8811 int base, len;
8812 xmlParserInputPtr in;
8813 const xmlChar *buf;
8814
8815 in = ctxt->input;
8816 if (in == NULL) return(-1);
8817 base = in->cur - in->base;
8818 if (base < 0) return(-1);
8819 if (ctxt->checkIndex > base)
8820 base = ctxt->checkIndex;
8821 if (in->buf == NULL) {
8822 buf = in->base;
8823 len = in->length;
8824 } else {
8825 buf = in->buf->buffer->content;
8826 len = in->buf->buffer->use;
8827 }
8828 /* take into account the sequence length */
8829 if (third) len -= 2;
8830 else if (next) len --;
8831 for (;base < len;base++) {
8832 if (buf[base] == first) {
8833 if (third != 0) {
8834 if ((buf[base + 1] != next) ||
8835 (buf[base + 2] != third)) continue;
8836 } else if (next != 0) {
8837 if (buf[base + 1] != next) continue;
8838 }
8839 ctxt->checkIndex = 0;
8840#ifdef DEBUG_PUSH
8841 if (next == 0)
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: lookup '%c' found at %d\n",
8844 first, base);
8845 else if (third == 0)
8846 xmlGenericError(xmlGenericErrorContext,
8847 "PP: lookup '%c%c' found at %d\n",
8848 first, next, base);
8849 else
8850 xmlGenericError(xmlGenericErrorContext,
8851 "PP: lookup '%c%c%c' found at %d\n",
8852 first, next, third, base);
8853#endif
8854 return(base - (in->cur - in->base));
8855 }
8856 }
8857 ctxt->checkIndex = base;
8858#ifdef DEBUG_PUSH
8859 if (next == 0)
8860 xmlGenericError(xmlGenericErrorContext,
8861 "PP: lookup '%c' failed\n", first);
8862 else if (third == 0)
8863 xmlGenericError(xmlGenericErrorContext,
8864 "PP: lookup '%c%c' failed\n", first, next);
8865 else
8866 xmlGenericError(xmlGenericErrorContext,
8867 "PP: lookup '%c%c%c' failed\n", first, next, third);
8868#endif
8869 return(-1);
8870}
8871
8872/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008873 * xmlParseGetLasts:
8874 * @ctxt: an XML parser context
8875 * @lastlt: pointer to store the last '<' from the input
8876 * @lastgt: pointer to store the last '>' from the input
8877 *
8878 * Lookup the last < and > in the current chunk
8879 */
8880static void
8881xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8882 const xmlChar **lastgt) {
8883 const xmlChar *tmp;
8884
8885 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8886 xmlGenericError(xmlGenericErrorContext,
8887 "Internal error: xmlParseGetLasts\n");
8888 return;
8889 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008890 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008891 tmp = ctxt->input->end;
8892 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008893 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008894 if (tmp < ctxt->input->base) {
8895 *lastlt = NULL;
8896 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008897 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008898 *lastlt = tmp;
8899 tmp++;
8900 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8901 if (*tmp == '\'') {
8902 tmp++;
8903 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8904 if (tmp < ctxt->input->end) tmp++;
8905 } else if (*tmp == '"') {
8906 tmp++;
8907 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8908 if (tmp < ctxt->input->end) tmp++;
8909 } else
8910 tmp++;
8911 }
8912 if (tmp < ctxt->input->end)
8913 *lastgt = tmp;
8914 else {
8915 tmp = *lastlt;
8916 tmp--;
8917 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8918 if (tmp >= ctxt->input->base)
8919 *lastgt = tmp;
8920 else
8921 *lastgt = NULL;
8922 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008923 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008924 } else {
8925 *lastlt = NULL;
8926 *lastgt = NULL;
8927 }
8928}
8929/**
Owen Taylor3473f882001-02-23 17:55:21 +00008930 * xmlParseTryOrFinish:
8931 * @ctxt: an XML parser context
8932 * @terminate: last chunk indicator
8933 *
8934 * Try to progress on parsing
8935 *
8936 * Returns zero if no parsing was possible
8937 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008938static int
Owen Taylor3473f882001-02-23 17:55:21 +00008939xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8940 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008941 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008942 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008943 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008944
8945#ifdef DEBUG_PUSH
8946 switch (ctxt->instate) {
8947 case XML_PARSER_EOF:
8948 xmlGenericError(xmlGenericErrorContext,
8949 "PP: try EOF\n"); break;
8950 case XML_PARSER_START:
8951 xmlGenericError(xmlGenericErrorContext,
8952 "PP: try START\n"); break;
8953 case XML_PARSER_MISC:
8954 xmlGenericError(xmlGenericErrorContext,
8955 "PP: try MISC\n");break;
8956 case XML_PARSER_COMMENT:
8957 xmlGenericError(xmlGenericErrorContext,
8958 "PP: try COMMENT\n");break;
8959 case XML_PARSER_PROLOG:
8960 xmlGenericError(xmlGenericErrorContext,
8961 "PP: try PROLOG\n");break;
8962 case XML_PARSER_START_TAG:
8963 xmlGenericError(xmlGenericErrorContext,
8964 "PP: try START_TAG\n");break;
8965 case XML_PARSER_CONTENT:
8966 xmlGenericError(xmlGenericErrorContext,
8967 "PP: try CONTENT\n");break;
8968 case XML_PARSER_CDATA_SECTION:
8969 xmlGenericError(xmlGenericErrorContext,
8970 "PP: try CDATA_SECTION\n");break;
8971 case XML_PARSER_END_TAG:
8972 xmlGenericError(xmlGenericErrorContext,
8973 "PP: try END_TAG\n");break;
8974 case XML_PARSER_ENTITY_DECL:
8975 xmlGenericError(xmlGenericErrorContext,
8976 "PP: try ENTITY_DECL\n");break;
8977 case XML_PARSER_ENTITY_VALUE:
8978 xmlGenericError(xmlGenericErrorContext,
8979 "PP: try ENTITY_VALUE\n");break;
8980 case XML_PARSER_ATTRIBUTE_VALUE:
8981 xmlGenericError(xmlGenericErrorContext,
8982 "PP: try ATTRIBUTE_VALUE\n");break;
8983 case XML_PARSER_DTD:
8984 xmlGenericError(xmlGenericErrorContext,
8985 "PP: try DTD\n");break;
8986 case XML_PARSER_EPILOG:
8987 xmlGenericError(xmlGenericErrorContext,
8988 "PP: try EPILOG\n");break;
8989 case XML_PARSER_PI:
8990 xmlGenericError(xmlGenericErrorContext,
8991 "PP: try PI\n");break;
8992 case XML_PARSER_IGNORE:
8993 xmlGenericError(xmlGenericErrorContext,
8994 "PP: try IGNORE\n");break;
8995 }
8996#endif
8997
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008998 if ((ctxt->input != NULL) &&
8999 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009000 xmlSHRINK(ctxt);
9001 ctxt->checkIndex = 0;
9002 }
9003 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009004
Daniel Veillarda880b122003-04-21 21:36:41 +00009005 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009006 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9007 return(0);
9008
9009
Owen Taylor3473f882001-02-23 17:55:21 +00009010 /*
9011 * Pop-up of finished entities.
9012 */
9013 while ((RAW == 0) && (ctxt->inputNr > 1))
9014 xmlPopInput(ctxt);
9015
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009016 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009017 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009018 avail = ctxt->input->length -
9019 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009020 else {
9021 /*
9022 * If we are operating on converted input, try to flush
9023 * remainng chars to avoid them stalling in the non-converted
9024 * buffer.
9025 */
9026 if ((ctxt->input->buf->raw != NULL) &&
9027 (ctxt->input->buf->raw->use > 0)) {
9028 int base = ctxt->input->base -
9029 ctxt->input->buf->buffer->content;
9030 int current = ctxt->input->cur - ctxt->input->base;
9031
9032 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9033 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9034 ctxt->input->cur = ctxt->input->base + current;
9035 ctxt->input->end =
9036 &ctxt->input->buf->buffer->content[
9037 ctxt->input->buf->buffer->use];
9038 }
9039 avail = ctxt->input->buf->buffer->use -
9040 (ctxt->input->cur - ctxt->input->base);
9041 }
Owen Taylor3473f882001-02-23 17:55:21 +00009042 if (avail < 1)
9043 goto done;
9044 switch (ctxt->instate) {
9045 case XML_PARSER_EOF:
9046 /*
9047 * Document parsing is done !
9048 */
9049 goto done;
9050 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009051 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9052 xmlChar start[4];
9053 xmlCharEncoding enc;
9054
9055 /*
9056 * Very first chars read from the document flow.
9057 */
9058 if (avail < 4)
9059 goto done;
9060
9061 /*
9062 * Get the 4 first bytes and decode the charset
9063 * if enc != XML_CHAR_ENCODING_NONE
9064 * plug some encoding conversion routines.
9065 */
9066 start[0] = RAW;
9067 start[1] = NXT(1);
9068 start[2] = NXT(2);
9069 start[3] = NXT(3);
9070 enc = xmlDetectCharEncoding(start, 4);
9071 if (enc != XML_CHAR_ENCODING_NONE) {
9072 xmlSwitchEncoding(ctxt, enc);
9073 }
9074 break;
9075 }
Owen Taylor3473f882001-02-23 17:55:21 +00009076
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009077 if (avail < 2)
9078 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009079 cur = ctxt->input->cur[0];
9080 next = ctxt->input->cur[1];
9081 if (cur == 0) {
9082 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9083 ctxt->sax->setDocumentLocator(ctxt->userData,
9084 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009085 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009086 ctxt->instate = XML_PARSER_EOF;
9087#ifdef DEBUG_PUSH
9088 xmlGenericError(xmlGenericErrorContext,
9089 "PP: entering EOF\n");
9090#endif
9091 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9092 ctxt->sax->endDocument(ctxt->userData);
9093 goto done;
9094 }
9095 if ((cur == '<') && (next == '?')) {
9096 /* PI or XML decl */
9097 if (avail < 5) return(ret);
9098 if ((!terminate) &&
9099 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9100 return(ret);
9101 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9102 ctxt->sax->setDocumentLocator(ctxt->userData,
9103 &xmlDefaultSAXLocator);
9104 if ((ctxt->input->cur[2] == 'x') &&
9105 (ctxt->input->cur[3] == 'm') &&
9106 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009107 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009108 ret += 5;
9109#ifdef DEBUG_PUSH
9110 xmlGenericError(xmlGenericErrorContext,
9111 "PP: Parsing XML Decl\n");
9112#endif
9113 xmlParseXMLDecl(ctxt);
9114 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9115 /*
9116 * The XML REC instructs us to stop parsing right
9117 * here
9118 */
9119 ctxt->instate = XML_PARSER_EOF;
9120 return(0);
9121 }
9122 ctxt->standalone = ctxt->input->standalone;
9123 if ((ctxt->encoding == NULL) &&
9124 (ctxt->input->encoding != NULL))
9125 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9126 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9127 (!ctxt->disableSAX))
9128 ctxt->sax->startDocument(ctxt->userData);
9129 ctxt->instate = XML_PARSER_MISC;
9130#ifdef DEBUG_PUSH
9131 xmlGenericError(xmlGenericErrorContext,
9132 "PP: entering MISC\n");
9133#endif
9134 } else {
9135 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9136 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9137 (!ctxt->disableSAX))
9138 ctxt->sax->startDocument(ctxt->userData);
9139 ctxt->instate = XML_PARSER_MISC;
9140#ifdef DEBUG_PUSH
9141 xmlGenericError(xmlGenericErrorContext,
9142 "PP: entering MISC\n");
9143#endif
9144 }
9145 } else {
9146 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9147 ctxt->sax->setDocumentLocator(ctxt->userData,
9148 &xmlDefaultSAXLocator);
9149 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009150 if (ctxt->version == NULL) {
9151 xmlErrMemory(ctxt, NULL);
9152 break;
9153 }
Owen Taylor3473f882001-02-23 17:55:21 +00009154 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9155 (!ctxt->disableSAX))
9156 ctxt->sax->startDocument(ctxt->userData);
9157 ctxt->instate = XML_PARSER_MISC;
9158#ifdef DEBUG_PUSH
9159 xmlGenericError(xmlGenericErrorContext,
9160 "PP: entering MISC\n");
9161#endif
9162 }
9163 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009164 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009165 const xmlChar *name;
9166 const xmlChar *prefix;
9167 const xmlChar *URI;
9168 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009169
9170 if ((avail < 2) && (ctxt->inputNr == 1))
9171 goto done;
9172 cur = ctxt->input->cur[0];
9173 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009174 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009175 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009176 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9177 ctxt->sax->endDocument(ctxt->userData);
9178 goto done;
9179 }
9180 if (!terminate) {
9181 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009182 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009183 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009184 goto done;
9185 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9186 goto done;
9187 }
9188 }
9189 if (ctxt->spaceNr == 0)
9190 spacePush(ctxt, -1);
9191 else
9192 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009193#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009194 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009195#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009196 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009197#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009198 else
9199 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009200#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009201 if (name == NULL) {
9202 spacePop(ctxt);
9203 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009204 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9205 ctxt->sax->endDocument(ctxt->userData);
9206 goto done;
9207 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009208#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009209 /*
9210 * [ VC: Root Element Type ]
9211 * The Name in the document type declaration must match
9212 * the element type of the root element.
9213 */
9214 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9215 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9216 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009217#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009218
9219 /*
9220 * Check for an Empty Element.
9221 */
9222 if ((RAW == '/') && (NXT(1) == '>')) {
9223 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009224
9225 if (ctxt->sax2) {
9226 if ((ctxt->sax != NULL) &&
9227 (ctxt->sax->endElementNs != NULL) &&
9228 (!ctxt->disableSAX))
9229 ctxt->sax->endElementNs(ctxt->userData, name,
9230 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009231#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009232 } else {
9233 if ((ctxt->sax != NULL) &&
9234 (ctxt->sax->endElement != NULL) &&
9235 (!ctxt->disableSAX))
9236 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009237#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009238 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009239 spacePop(ctxt);
9240 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009241 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009242 } else {
9243 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009244 }
9245 break;
9246 }
9247 if (RAW == '>') {
9248 NEXT;
9249 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009250 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 "Couldn't find end of Start Tag %s\n",
9252 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009253 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009254 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009255 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009256 if (ctxt->sax2)
9257 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009258#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009259 else
9260 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009261#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009262
Daniel Veillarda880b122003-04-21 21:36:41 +00009263 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009264 break;
9265 }
9266 case XML_PARSER_CONTENT: {
9267 const xmlChar *test;
9268 unsigned int cons;
9269 if ((avail < 2) && (ctxt->inputNr == 1))
9270 goto done;
9271 cur = ctxt->input->cur[0];
9272 next = ctxt->input->cur[1];
9273
9274 test = CUR_PTR;
9275 cons = ctxt->input->consumed;
9276 if ((cur == '<') && (next == '/')) {
9277 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009278 break;
9279 } else if ((cur == '<') && (next == '?')) {
9280 if ((!terminate) &&
9281 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9282 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009283 xmlParsePI(ctxt);
9284 } else if ((cur == '<') && (next != '!')) {
9285 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009286 break;
9287 } else if ((cur == '<') && (next == '!') &&
9288 (ctxt->input->cur[2] == '-') &&
9289 (ctxt->input->cur[3] == '-')) {
9290 if ((!terminate) &&
9291 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9292 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009293 xmlParseComment(ctxt);
9294 ctxt->instate = XML_PARSER_CONTENT;
9295 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9296 (ctxt->input->cur[2] == '[') &&
9297 (ctxt->input->cur[3] == 'C') &&
9298 (ctxt->input->cur[4] == 'D') &&
9299 (ctxt->input->cur[5] == 'A') &&
9300 (ctxt->input->cur[6] == 'T') &&
9301 (ctxt->input->cur[7] == 'A') &&
9302 (ctxt->input->cur[8] == '[')) {
9303 SKIP(9);
9304 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009305 break;
9306 } else if ((cur == '<') && (next == '!') &&
9307 (avail < 9)) {
9308 goto done;
9309 } else if (cur == '&') {
9310 if ((!terminate) &&
9311 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9312 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009313 xmlParseReference(ctxt);
9314 } else {
9315 /* TODO Avoid the extra copy, handle directly !!! */
9316 /*
9317 * Goal of the following test is:
9318 * - minimize calls to the SAX 'character' callback
9319 * when they are mergeable
9320 * - handle an problem for isBlank when we only parse
9321 * a sequence of blank chars and the next one is
9322 * not available to check against '<' presence.
9323 * - tries to homogenize the differences in SAX
9324 * callbacks between the push and pull versions
9325 * of the parser.
9326 */
9327 if ((ctxt->inputNr == 1) &&
9328 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9329 if (!terminate) {
9330 if (ctxt->progressive) {
9331 if ((lastlt == NULL) ||
9332 (ctxt->input->cur > lastlt))
9333 goto done;
9334 } else if (xmlParseLookupSequence(ctxt,
9335 '<', 0, 0) < 0) {
9336 goto done;
9337 }
9338 }
9339 }
9340 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009341 xmlParseCharData(ctxt, 0);
9342 }
9343 /*
9344 * Pop-up of finished entities.
9345 */
9346 while ((RAW == 0) && (ctxt->inputNr > 1))
9347 xmlPopInput(ctxt);
9348 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009349 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9350 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009351 ctxt->instate = XML_PARSER_EOF;
9352 break;
9353 }
9354 break;
9355 }
9356 case XML_PARSER_END_TAG:
9357 if (avail < 2)
9358 goto done;
9359 if (!terminate) {
9360 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009361 /* > can be found unescaped in attribute values */
9362 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009363 goto done;
9364 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9365 goto done;
9366 }
9367 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009368 if (ctxt->sax2) {
9369 xmlParseEndTag2(ctxt,
9370 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9371 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009372 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009373 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009374 }
9375#ifdef LIBXML_SAX1_ENABLED
9376 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009377 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009378#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009379 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009380 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009381 } else {
9382 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009383 }
9384 break;
9385 case XML_PARSER_CDATA_SECTION: {
9386 /*
9387 * The Push mode need to have the SAX callback for
9388 * cdataBlock merge back contiguous callbacks.
9389 */
9390 int base;
9391
9392 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9393 if (base < 0) {
9394 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9395 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9396 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009397 ctxt->sax->cdataBlock(ctxt->userData,
9398 ctxt->input->cur,
9399 XML_PARSER_BIG_BUFFER_SIZE);
9400 else if (ctxt->sax->characters != NULL)
9401 ctxt->sax->characters(ctxt->userData,
9402 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009403 XML_PARSER_BIG_BUFFER_SIZE);
9404 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009405 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 ctxt->checkIndex = 0;
9407 }
9408 goto done;
9409 } else {
9410 if ((ctxt->sax != NULL) && (base > 0) &&
9411 (!ctxt->disableSAX)) {
9412 if (ctxt->sax->cdataBlock != NULL)
9413 ctxt->sax->cdataBlock(ctxt->userData,
9414 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009415 else if (ctxt->sax->characters != NULL)
9416 ctxt->sax->characters(ctxt->userData,
9417 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009418 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009419 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009420 ctxt->checkIndex = 0;
9421 ctxt->instate = XML_PARSER_CONTENT;
9422#ifdef DEBUG_PUSH
9423 xmlGenericError(xmlGenericErrorContext,
9424 "PP: entering CONTENT\n");
9425#endif
9426 }
9427 break;
9428 }
Owen Taylor3473f882001-02-23 17:55:21 +00009429 case XML_PARSER_MISC:
9430 SKIP_BLANKS;
9431 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009432 avail = ctxt->input->length -
9433 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009434 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009435 avail = ctxt->input->buf->buffer->use -
9436 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009437 if (avail < 2)
9438 goto done;
9439 cur = ctxt->input->cur[0];
9440 next = ctxt->input->cur[1];
9441 if ((cur == '<') && (next == '?')) {
9442 if ((!terminate) &&
9443 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9444 goto done;
9445#ifdef DEBUG_PUSH
9446 xmlGenericError(xmlGenericErrorContext,
9447 "PP: Parsing PI\n");
9448#endif
9449 xmlParsePI(ctxt);
9450 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009451 (ctxt->input->cur[2] == '-') &&
9452 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009453 if ((!terminate) &&
9454 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9455 goto done;
9456#ifdef DEBUG_PUSH
9457 xmlGenericError(xmlGenericErrorContext,
9458 "PP: Parsing Comment\n");
9459#endif
9460 xmlParseComment(ctxt);
9461 ctxt->instate = XML_PARSER_MISC;
9462 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009463 (ctxt->input->cur[2] == 'D') &&
9464 (ctxt->input->cur[3] == 'O') &&
9465 (ctxt->input->cur[4] == 'C') &&
9466 (ctxt->input->cur[5] == 'T') &&
9467 (ctxt->input->cur[6] == 'Y') &&
9468 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009469 (ctxt->input->cur[8] == 'E')) {
9470 if ((!terminate) &&
9471 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9472 goto done;
9473#ifdef DEBUG_PUSH
9474 xmlGenericError(xmlGenericErrorContext,
9475 "PP: Parsing internal subset\n");
9476#endif
9477 ctxt->inSubset = 1;
9478 xmlParseDocTypeDecl(ctxt);
9479 if (RAW == '[') {
9480 ctxt->instate = XML_PARSER_DTD;
9481#ifdef DEBUG_PUSH
9482 xmlGenericError(xmlGenericErrorContext,
9483 "PP: entering DTD\n");
9484#endif
9485 } else {
9486 /*
9487 * Create and update the external subset.
9488 */
9489 ctxt->inSubset = 2;
9490 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9491 (ctxt->sax->externalSubset != NULL))
9492 ctxt->sax->externalSubset(ctxt->userData,
9493 ctxt->intSubName, ctxt->extSubSystem,
9494 ctxt->extSubURI);
9495 ctxt->inSubset = 0;
9496 ctxt->instate = XML_PARSER_PROLOG;
9497#ifdef DEBUG_PUSH
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: entering PROLOG\n");
9500#endif
9501 }
9502 } else if ((cur == '<') && (next == '!') &&
9503 (avail < 9)) {
9504 goto done;
9505 } else {
9506 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 ctxt->progressive = 1;
9508 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009509#ifdef DEBUG_PUSH
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: entering START_TAG\n");
9512#endif
9513 }
9514 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009515 case XML_PARSER_PROLOG:
9516 SKIP_BLANKS;
9517 if (ctxt->input->buf == NULL)
9518 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9519 else
9520 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9521 if (avail < 2)
9522 goto done;
9523 cur = ctxt->input->cur[0];
9524 next = ctxt->input->cur[1];
9525 if ((cur == '<') && (next == '?')) {
9526 if ((!terminate) &&
9527 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9528 goto done;
9529#ifdef DEBUG_PUSH
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: Parsing PI\n");
9532#endif
9533 xmlParsePI(ctxt);
9534 } else if ((cur == '<') && (next == '!') &&
9535 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9536 if ((!terminate) &&
9537 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9538 goto done;
9539#ifdef DEBUG_PUSH
9540 xmlGenericError(xmlGenericErrorContext,
9541 "PP: Parsing Comment\n");
9542#endif
9543 xmlParseComment(ctxt);
9544 ctxt->instate = XML_PARSER_PROLOG;
9545 } else if ((cur == '<') && (next == '!') &&
9546 (avail < 4)) {
9547 goto done;
9548 } else {
9549 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009550 if (ctxt->progressive == 0)
9551 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009553#ifdef DEBUG_PUSH
9554 xmlGenericError(xmlGenericErrorContext,
9555 "PP: entering START_TAG\n");
9556#endif
9557 }
9558 break;
9559 case XML_PARSER_EPILOG:
9560 SKIP_BLANKS;
9561 if (ctxt->input->buf == NULL)
9562 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9563 else
9564 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9565 if (avail < 2)
9566 goto done;
9567 cur = ctxt->input->cur[0];
9568 next = ctxt->input->cur[1];
9569 if ((cur == '<') && (next == '?')) {
9570 if ((!terminate) &&
9571 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9572 goto done;
9573#ifdef DEBUG_PUSH
9574 xmlGenericError(xmlGenericErrorContext,
9575 "PP: Parsing PI\n");
9576#endif
9577 xmlParsePI(ctxt);
9578 ctxt->instate = XML_PARSER_EPILOG;
9579 } else if ((cur == '<') && (next == '!') &&
9580 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9581 if ((!terminate) &&
9582 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9583 goto done;
9584#ifdef DEBUG_PUSH
9585 xmlGenericError(xmlGenericErrorContext,
9586 "PP: Parsing Comment\n");
9587#endif
9588 xmlParseComment(ctxt);
9589 ctxt->instate = XML_PARSER_EPILOG;
9590 } else if ((cur == '<') && (next == '!') &&
9591 (avail < 4)) {
9592 goto done;
9593 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009594 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009595 ctxt->instate = XML_PARSER_EOF;
9596#ifdef DEBUG_PUSH
9597 xmlGenericError(xmlGenericErrorContext,
9598 "PP: entering EOF\n");
9599#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009601 ctxt->sax->endDocument(ctxt->userData);
9602 goto done;
9603 }
9604 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009605 case XML_PARSER_DTD: {
9606 /*
9607 * Sorry but progressive parsing of the internal subset
9608 * is not expected to be supported. We first check that
9609 * the full content of the internal subset is available and
9610 * the parsing is launched only at that point.
9611 * Internal subset ends up with "']' S? '>'" in an unescaped
9612 * section and not in a ']]>' sequence which are conditional
9613 * sections (whoever argued to keep that crap in XML deserve
9614 * a place in hell !).
9615 */
9616 int base, i;
9617 xmlChar *buf;
9618 xmlChar quote = 0;
9619
9620 base = ctxt->input->cur - ctxt->input->base;
9621 if (base < 0) return(0);
9622 if (ctxt->checkIndex > base)
9623 base = ctxt->checkIndex;
9624 buf = ctxt->input->buf->buffer->content;
9625 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9626 base++) {
9627 if (quote != 0) {
9628 if (buf[base] == quote)
9629 quote = 0;
9630 continue;
9631 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009632 if ((quote == 0) && (buf[base] == '<')) {
9633 int found = 0;
9634 /* special handling of comments */
9635 if (((unsigned int) base + 4 <
9636 ctxt->input->buf->buffer->use) &&
9637 (buf[base + 1] == '!') &&
9638 (buf[base + 2] == '-') &&
9639 (buf[base + 3] == '-')) {
9640 for (;(unsigned int) base + 3 <
9641 ctxt->input->buf->buffer->use; base++) {
9642 if ((buf[base] == '-') &&
9643 (buf[base + 1] == '-') &&
9644 (buf[base + 2] == '>')) {
9645 found = 1;
9646 base += 2;
9647 break;
9648 }
9649 }
9650 if (!found)
9651 break;
9652 continue;
9653 }
9654 }
Owen Taylor3473f882001-02-23 17:55:21 +00009655 if (buf[base] == '"') {
9656 quote = '"';
9657 continue;
9658 }
9659 if (buf[base] == '\'') {
9660 quote = '\'';
9661 continue;
9662 }
9663 if (buf[base] == ']') {
9664 if ((unsigned int) base +1 >=
9665 ctxt->input->buf->buffer->use)
9666 break;
9667 if (buf[base + 1] == ']') {
9668 /* conditional crap, skip both ']' ! */
9669 base++;
9670 continue;
9671 }
9672 for (i = 0;
9673 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9674 i++) {
9675 if (buf[base + i] == '>')
9676 goto found_end_int_subset;
9677 }
9678 break;
9679 }
9680 }
9681 /*
9682 * We didn't found the end of the Internal subset
9683 */
9684 if (quote == 0)
9685 ctxt->checkIndex = base;
9686#ifdef DEBUG_PUSH
9687 if (next == 0)
9688 xmlGenericError(xmlGenericErrorContext,
9689 "PP: lookup of int subset end filed\n");
9690#endif
9691 goto done;
9692
9693found_end_int_subset:
9694 xmlParseInternalSubset(ctxt);
9695 ctxt->inSubset = 2;
9696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9697 (ctxt->sax->externalSubset != NULL))
9698 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9699 ctxt->extSubSystem, ctxt->extSubURI);
9700 ctxt->inSubset = 0;
9701 ctxt->instate = XML_PARSER_PROLOG;
9702 ctxt->checkIndex = 0;
9703#ifdef DEBUG_PUSH
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: entering PROLOG\n");
9706#endif
9707 break;
9708 }
9709 case XML_PARSER_COMMENT:
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: internal error, state == COMMENT\n");
9712 ctxt->instate = XML_PARSER_CONTENT;
9713#ifdef DEBUG_PUSH
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: entering CONTENT\n");
9716#endif
9717 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009718 case XML_PARSER_IGNORE:
9719 xmlGenericError(xmlGenericErrorContext,
9720 "PP: internal error, state == IGNORE");
9721 ctxt->instate = XML_PARSER_DTD;
9722#ifdef DEBUG_PUSH
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: entering DTD\n");
9725#endif
9726 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009727 case XML_PARSER_PI:
9728 xmlGenericError(xmlGenericErrorContext,
9729 "PP: internal error, state == PI\n");
9730 ctxt->instate = XML_PARSER_CONTENT;
9731#ifdef DEBUG_PUSH
9732 xmlGenericError(xmlGenericErrorContext,
9733 "PP: entering CONTENT\n");
9734#endif
9735 break;
9736 case XML_PARSER_ENTITY_DECL:
9737 xmlGenericError(xmlGenericErrorContext,
9738 "PP: internal error, state == ENTITY_DECL\n");
9739 ctxt->instate = XML_PARSER_DTD;
9740#ifdef DEBUG_PUSH
9741 xmlGenericError(xmlGenericErrorContext,
9742 "PP: entering DTD\n");
9743#endif
9744 break;
9745 case XML_PARSER_ENTITY_VALUE:
9746 xmlGenericError(xmlGenericErrorContext,
9747 "PP: internal error, state == ENTITY_VALUE\n");
9748 ctxt->instate = XML_PARSER_CONTENT;
9749#ifdef DEBUG_PUSH
9750 xmlGenericError(xmlGenericErrorContext,
9751 "PP: entering DTD\n");
9752#endif
9753 break;
9754 case XML_PARSER_ATTRIBUTE_VALUE:
9755 xmlGenericError(xmlGenericErrorContext,
9756 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9757 ctxt->instate = XML_PARSER_START_TAG;
9758#ifdef DEBUG_PUSH
9759 xmlGenericError(xmlGenericErrorContext,
9760 "PP: entering START_TAG\n");
9761#endif
9762 break;
9763 case XML_PARSER_SYSTEM_LITERAL:
9764 xmlGenericError(xmlGenericErrorContext,
9765 "PP: internal error, state == SYSTEM_LITERAL\n");
9766 ctxt->instate = XML_PARSER_START_TAG;
9767#ifdef DEBUG_PUSH
9768 xmlGenericError(xmlGenericErrorContext,
9769 "PP: entering START_TAG\n");
9770#endif
9771 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009772 case XML_PARSER_PUBLIC_LITERAL:
9773 xmlGenericError(xmlGenericErrorContext,
9774 "PP: internal error, state == PUBLIC_LITERAL\n");
9775 ctxt->instate = XML_PARSER_START_TAG;
9776#ifdef DEBUG_PUSH
9777 xmlGenericError(xmlGenericErrorContext,
9778 "PP: entering START_TAG\n");
9779#endif
9780 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009781 }
9782 }
9783done:
9784#ifdef DEBUG_PUSH
9785 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9786#endif
9787 return(ret);
9788}
9789
9790/**
Owen Taylor3473f882001-02-23 17:55:21 +00009791 * xmlParseChunk:
9792 * @ctxt: an XML parser context
9793 * @chunk: an char array
9794 * @size: the size in byte of the chunk
9795 * @terminate: last chunk indicator
9796 *
9797 * Parse a Chunk of memory
9798 *
9799 * Returns zero if no error, the xmlParserErrors otherwise.
9800 */
9801int
9802xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9803 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009804 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9805 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009806 if (ctxt->instate == XML_PARSER_START)
9807 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009808 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9809 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9810 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9811 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009812 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009813
William M. Bracka3215c72004-07-31 16:24:01 +00009814 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9815 if (res < 0) {
9816 ctxt->errNo = XML_PARSER_EOF;
9817 ctxt->disableSAX = 1;
9818 return (XML_PARSER_EOF);
9819 }
Owen Taylor3473f882001-02-23 17:55:21 +00009820 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9821 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009822 ctxt->input->end =
9823 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009824#ifdef DEBUG_PUSH
9825 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9826#endif
9827
Owen Taylor3473f882001-02-23 17:55:21 +00009828 } else if (ctxt->instate != XML_PARSER_EOF) {
9829 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9830 xmlParserInputBufferPtr in = ctxt->input->buf;
9831 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9832 (in->raw != NULL)) {
9833 int nbchars;
9834
9835 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9836 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009837 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009838 xmlGenericError(xmlGenericErrorContext,
9839 "xmlParseChunk: encoder error\n");
9840 return(XML_ERR_INVALID_ENCODING);
9841 }
9842 }
9843 }
9844 }
9845 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009846 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9847 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009848 if (terminate) {
9849 /*
9850 * Check for termination
9851 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009852 int avail = 0;
9853 if (ctxt->input->buf == NULL)
9854 avail = ctxt->input->length -
9855 (ctxt->input->cur - ctxt->input->base);
9856 else
9857 avail = ctxt->input->buf->buffer->use -
9858 (ctxt->input->cur - ctxt->input->base);
9859
Owen Taylor3473f882001-02-23 17:55:21 +00009860 if ((ctxt->instate != XML_PARSER_EOF) &&
9861 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009862 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009863 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009864 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009865 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009866 }
Owen Taylor3473f882001-02-23 17:55:21 +00009867 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009868 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009869 ctxt->sax->endDocument(ctxt->userData);
9870 }
9871 ctxt->instate = XML_PARSER_EOF;
9872 }
9873 return((xmlParserErrors) ctxt->errNo);
9874}
9875
9876/************************************************************************
9877 * *
9878 * I/O front end functions to the parser *
9879 * *
9880 ************************************************************************/
9881
9882/**
9883 * xmlStopParser:
9884 * @ctxt: an XML parser context
9885 *
9886 * Blocks further parser processing
9887 */
9888void
9889xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009890 if (ctxt == NULL)
9891 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009892 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009893 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009894 if (ctxt->input != NULL)
9895 ctxt->input->cur = BAD_CAST"";
9896}
9897
9898/**
9899 * xmlCreatePushParserCtxt:
9900 * @sax: a SAX handler
9901 * @user_data: The user data returned on SAX callbacks
9902 * @chunk: a pointer to an array of chars
9903 * @size: number of chars in the array
9904 * @filename: an optional file name or URI
9905 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009906 * Create a parser context for using the XML parser in push mode.
9907 * If @buffer and @size are non-NULL, the data is used to detect
9908 * the encoding. The remaining characters will be parsed so they
9909 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009910 * To allow content encoding detection, @size should be >= 4
9911 * The value of @filename is used for fetching external entities
9912 * and error/warning reports.
9913 *
9914 * Returns the new parser context or NULL
9915 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009916
Owen Taylor3473f882001-02-23 17:55:21 +00009917xmlParserCtxtPtr
9918xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9919 const char *chunk, int size, const char *filename) {
9920 xmlParserCtxtPtr ctxt;
9921 xmlParserInputPtr inputStream;
9922 xmlParserInputBufferPtr buf;
9923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9924
9925 /*
9926 * plug some encoding conversion routines
9927 */
9928 if ((chunk != NULL) && (size >= 4))
9929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9930
9931 buf = xmlAllocParserInputBuffer(enc);
9932 if (buf == NULL) return(NULL);
9933
9934 ctxt = xmlNewParserCtxt();
9935 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009936 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009937 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009938 return(NULL);
9939 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009940 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009941 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9942 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009943 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009944 xmlFreeParserInputBuffer(buf);
9945 xmlFreeParserCtxt(ctxt);
9946 return(NULL);
9947 }
Owen Taylor3473f882001-02-23 17:55:21 +00009948 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009949#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009950 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009951#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009952 xmlFree(ctxt->sax);
9953 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9954 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009955 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009956 xmlFreeParserInputBuffer(buf);
9957 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009958 return(NULL);
9959 }
9960 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9961 if (user_data != NULL)
9962 ctxt->userData = user_data;
9963 }
9964 if (filename == NULL) {
9965 ctxt->directory = NULL;
9966 } else {
9967 ctxt->directory = xmlParserGetDirectory(filename);
9968 }
9969
9970 inputStream = xmlNewInputStream(ctxt);
9971 if (inputStream == NULL) {
9972 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009973 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009974 return(NULL);
9975 }
9976
9977 if (filename == NULL)
9978 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +00009979 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +00009980 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009981 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +00009982 if (inputStream->filename == NULL) {
9983 xmlFreeParserCtxt(ctxt);
9984 xmlFreeParserInputBuffer(buf);
9985 return(NULL);
9986 }
9987 }
Owen Taylor3473f882001-02-23 17:55:21 +00009988 inputStream->buf = buf;
9989 inputStream->base = inputStream->buf->buffer->content;
9990 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009991 inputStream->end =
9992 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009993
9994 inputPush(ctxt, inputStream);
9995
9996 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9997 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009998 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9999 int cur = ctxt->input->cur - ctxt->input->base;
10000
Owen Taylor3473f882001-02-23 17:55:21 +000010001 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010002
10003 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10004 ctxt->input->cur = ctxt->input->base + cur;
10005 ctxt->input->end =
10006 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010007#ifdef DEBUG_PUSH
10008 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10009#endif
10010 }
10011
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010012 if (enc != XML_CHAR_ENCODING_NONE) {
10013 xmlSwitchEncoding(ctxt, enc);
10014 }
10015
Owen Taylor3473f882001-02-23 17:55:21 +000010016 return(ctxt);
10017}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010018#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010019
10020/**
10021 * xmlCreateIOParserCtxt:
10022 * @sax: a SAX handler
10023 * @user_data: The user data returned on SAX callbacks
10024 * @ioread: an I/O read function
10025 * @ioclose: an I/O close function
10026 * @ioctx: an I/O handler
10027 * @enc: the charset encoding if known
10028 *
10029 * Create a parser context for using the XML parser with an existing
10030 * I/O stream
10031 *
10032 * Returns the new parser context or NULL
10033 */
10034xmlParserCtxtPtr
10035xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10036 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10037 void *ioctx, xmlCharEncoding enc) {
10038 xmlParserCtxtPtr ctxt;
10039 xmlParserInputPtr inputStream;
10040 xmlParserInputBufferPtr buf;
10041
10042 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10043 if (buf == NULL) return(NULL);
10044
10045 ctxt = xmlNewParserCtxt();
10046 if (ctxt == NULL) {
10047 xmlFree(buf);
10048 return(NULL);
10049 }
10050 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010051#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010052 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010053#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010054 xmlFree(ctxt->sax);
10055 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10056 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010057 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 xmlFree(ctxt);
10059 return(NULL);
10060 }
10061 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10062 if (user_data != NULL)
10063 ctxt->userData = user_data;
10064 }
10065
10066 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10067 if (inputStream == NULL) {
10068 xmlFreeParserCtxt(ctxt);
10069 return(NULL);
10070 }
10071 inputPush(ctxt, inputStream);
10072
10073 return(ctxt);
10074}
10075
Daniel Veillard4432df22003-09-28 18:58:27 +000010076#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010077/************************************************************************
10078 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010079 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010080 * *
10081 ************************************************************************/
10082
10083/**
10084 * xmlIOParseDTD:
10085 * @sax: the SAX handler block or NULL
10086 * @input: an Input Buffer
10087 * @enc: the charset encoding if known
10088 *
10089 * Load and parse a DTD
10090 *
10091 * Returns the resulting xmlDtdPtr or NULL in case of error.
10092 * @input will be freed at parsing end.
10093 */
10094
10095xmlDtdPtr
10096xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10097 xmlCharEncoding enc) {
10098 xmlDtdPtr ret = NULL;
10099 xmlParserCtxtPtr ctxt;
10100 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010101 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010102
10103 if (input == NULL)
10104 return(NULL);
10105
10106 ctxt = xmlNewParserCtxt();
10107 if (ctxt == NULL) {
10108 return(NULL);
10109 }
10110
10111 /*
10112 * Set-up the SAX context
10113 */
10114 if (sax != NULL) {
10115 if (ctxt->sax != NULL)
10116 xmlFree(ctxt->sax);
10117 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010118 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010119 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010120 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010121
10122 /*
10123 * generate a parser input from the I/O handler
10124 */
10125
Daniel Veillard43caefb2003-12-07 19:32:22 +000010126 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010127 if (pinput == NULL) {
10128 if (sax != NULL) ctxt->sax = NULL;
10129 xmlFreeParserCtxt(ctxt);
10130 return(NULL);
10131 }
10132
10133 /*
10134 * plug some encoding conversion routines here.
10135 */
10136 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010137 if (enc != XML_CHAR_ENCODING_NONE) {
10138 xmlSwitchEncoding(ctxt, enc);
10139 }
Owen Taylor3473f882001-02-23 17:55:21 +000010140
10141 pinput->filename = NULL;
10142 pinput->line = 1;
10143 pinput->col = 1;
10144 pinput->base = ctxt->input->cur;
10145 pinput->cur = ctxt->input->cur;
10146 pinput->free = NULL;
10147
10148 /*
10149 * let's parse that entity knowing it's an external subset.
10150 */
10151 ctxt->inSubset = 2;
10152 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10153 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10154 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010155
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010156 if ((enc == XML_CHAR_ENCODING_NONE) &&
10157 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010158 /*
10159 * Get the 4 first bytes and decode the charset
10160 * if enc != XML_CHAR_ENCODING_NONE
10161 * plug some encoding conversion routines.
10162 */
10163 start[0] = RAW;
10164 start[1] = NXT(1);
10165 start[2] = NXT(2);
10166 start[3] = NXT(3);
10167 enc = xmlDetectCharEncoding(start, 4);
10168 if (enc != XML_CHAR_ENCODING_NONE) {
10169 xmlSwitchEncoding(ctxt, enc);
10170 }
10171 }
10172
Owen Taylor3473f882001-02-23 17:55:21 +000010173 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10174
10175 if (ctxt->myDoc != NULL) {
10176 if (ctxt->wellFormed) {
10177 ret = ctxt->myDoc->extSubset;
10178 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010179 if (ret != NULL) {
10180 xmlNodePtr tmp;
10181
10182 ret->doc = NULL;
10183 tmp = ret->children;
10184 while (tmp != NULL) {
10185 tmp->doc = NULL;
10186 tmp = tmp->next;
10187 }
10188 }
Owen Taylor3473f882001-02-23 17:55:21 +000010189 } else {
10190 ret = NULL;
10191 }
10192 xmlFreeDoc(ctxt->myDoc);
10193 ctxt->myDoc = NULL;
10194 }
10195 if (sax != NULL) ctxt->sax = NULL;
10196 xmlFreeParserCtxt(ctxt);
10197
10198 return(ret);
10199}
10200
10201/**
10202 * xmlSAXParseDTD:
10203 * @sax: the SAX handler block
10204 * @ExternalID: a NAME* containing the External ID of the DTD
10205 * @SystemID: a NAME* containing the URL to the DTD
10206 *
10207 * Load and parse an external subset.
10208 *
10209 * Returns the resulting xmlDtdPtr or NULL in case of error.
10210 */
10211
10212xmlDtdPtr
10213xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10214 const xmlChar *SystemID) {
10215 xmlDtdPtr ret = NULL;
10216 xmlParserCtxtPtr ctxt;
10217 xmlParserInputPtr input = NULL;
10218 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010219 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010220
10221 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10222
10223 ctxt = xmlNewParserCtxt();
10224 if (ctxt == NULL) {
10225 return(NULL);
10226 }
10227
10228 /*
10229 * Set-up the SAX context
10230 */
10231 if (sax != NULL) {
10232 if (ctxt->sax != NULL)
10233 xmlFree(ctxt->sax);
10234 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010235 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010236 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010237
10238 /*
10239 * Canonicalise the system ID
10240 */
10241 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010242 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010243 xmlFreeParserCtxt(ctxt);
10244 return(NULL);
10245 }
Owen Taylor3473f882001-02-23 17:55:21 +000010246
10247 /*
10248 * Ask the Entity resolver to load the damn thing
10249 */
10250
10251 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010252 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010253 if (input == NULL) {
10254 if (sax != NULL) ctxt->sax = NULL;
10255 xmlFreeParserCtxt(ctxt);
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010256 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 return(NULL);
10258 }
10259
10260 /*
10261 * plug some encoding conversion routines here.
10262 */
10263 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010264 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10265 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10266 xmlSwitchEncoding(ctxt, enc);
10267 }
Owen Taylor3473f882001-02-23 17:55:21 +000010268
10269 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010270 input->filename = (char *) systemIdCanonic;
10271 else
10272 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 input->line = 1;
10274 input->col = 1;
10275 input->base = ctxt->input->cur;
10276 input->cur = ctxt->input->cur;
10277 input->free = NULL;
10278
10279 /*
10280 * let's parse that entity knowing it's an external subset.
10281 */
10282 ctxt->inSubset = 2;
10283 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10284 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10285 ExternalID, SystemID);
10286 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10287
10288 if (ctxt->myDoc != NULL) {
10289 if (ctxt->wellFormed) {
10290 ret = ctxt->myDoc->extSubset;
10291 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010292 if (ret != NULL) {
10293 xmlNodePtr tmp;
10294
10295 ret->doc = NULL;
10296 tmp = ret->children;
10297 while (tmp != NULL) {
10298 tmp->doc = NULL;
10299 tmp = tmp->next;
10300 }
10301 }
Owen Taylor3473f882001-02-23 17:55:21 +000010302 } else {
10303 ret = NULL;
10304 }
10305 xmlFreeDoc(ctxt->myDoc);
10306 ctxt->myDoc = NULL;
10307 }
10308 if (sax != NULL) ctxt->sax = NULL;
10309 xmlFreeParserCtxt(ctxt);
10310
10311 return(ret);
10312}
10313
Daniel Veillard4432df22003-09-28 18:58:27 +000010314
Owen Taylor3473f882001-02-23 17:55:21 +000010315/**
10316 * xmlParseDTD:
10317 * @ExternalID: a NAME* containing the External ID of the DTD
10318 * @SystemID: a NAME* containing the URL to the DTD
10319 *
10320 * Load and parse an external subset.
10321 *
10322 * Returns the resulting xmlDtdPtr or NULL in case of error.
10323 */
10324
10325xmlDtdPtr
10326xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10327 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10328}
Daniel Veillard4432df22003-09-28 18:58:27 +000010329#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010330
10331/************************************************************************
10332 * *
10333 * Front ends when parsing an Entity *
10334 * *
10335 ************************************************************************/
10336
10337/**
Owen Taylor3473f882001-02-23 17:55:21 +000010338 * xmlParseCtxtExternalEntity:
10339 * @ctx: the existing parsing context
10340 * @URL: the URL for the entity to load
10341 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010342 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010343 *
10344 * Parse an external general entity within an existing parsing context
10345 * An external general parsed entity is well-formed if it matches the
10346 * production labeled extParsedEnt.
10347 *
10348 * [78] extParsedEnt ::= TextDecl? content
10349 *
10350 * Returns 0 if the entity is well formed, -1 in case of args problem and
10351 * the parser error code otherwise
10352 */
10353
10354int
10355xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010356 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010357 xmlParserCtxtPtr ctxt;
10358 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010359 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010360 xmlSAXHandlerPtr oldsax = NULL;
10361 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010362 xmlChar start[4];
10363 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010364
10365 if (ctx->depth > 40) {
10366 return(XML_ERR_ENTITY_LOOP);
10367 }
10368
Daniel Veillardcda96922001-08-21 10:56:31 +000010369 if (lst != NULL)
10370 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010371 if ((URL == NULL) && (ID == NULL))
10372 return(-1);
10373 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10374 return(-1);
10375
10376
10377 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10378 if (ctxt == NULL) return(-1);
10379 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010380 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010381 oldsax = ctxt->sax;
10382 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010383 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 newDoc = xmlNewDoc(BAD_CAST "1.0");
10385 if (newDoc == NULL) {
10386 xmlFreeParserCtxt(ctxt);
10387 return(-1);
10388 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010389 if (ctx->myDoc->dict) {
10390 newDoc->dict = ctx->myDoc->dict;
10391 xmlDictReference(newDoc->dict);
10392 }
Owen Taylor3473f882001-02-23 17:55:21 +000010393 if (ctx->myDoc != NULL) {
10394 newDoc->intSubset = ctx->myDoc->intSubset;
10395 newDoc->extSubset = ctx->myDoc->extSubset;
10396 }
10397 if (ctx->myDoc->URL != NULL) {
10398 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10399 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010400 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10401 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010402 ctxt->sax = oldsax;
10403 xmlFreeParserCtxt(ctxt);
10404 newDoc->intSubset = NULL;
10405 newDoc->extSubset = NULL;
10406 xmlFreeDoc(newDoc);
10407 return(-1);
10408 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010409 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010410 nodePush(ctxt, newDoc->children);
10411 if (ctx->myDoc == NULL) {
10412 ctxt->myDoc = newDoc;
10413 } else {
10414 ctxt->myDoc = ctx->myDoc;
10415 newDoc->children->doc = ctx->myDoc;
10416 }
10417
Daniel Veillard87a764e2001-06-20 17:41:10 +000010418 /*
10419 * Get the 4 first bytes and decode the charset
10420 * if enc != XML_CHAR_ENCODING_NONE
10421 * plug some encoding conversion routines.
10422 */
10423 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010424 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10425 start[0] = RAW;
10426 start[1] = NXT(1);
10427 start[2] = NXT(2);
10428 start[3] = NXT(3);
10429 enc = xmlDetectCharEncoding(start, 4);
10430 if (enc != XML_CHAR_ENCODING_NONE) {
10431 xmlSwitchEncoding(ctxt, enc);
10432 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010433 }
10434
Owen Taylor3473f882001-02-23 17:55:21 +000010435 /*
10436 * Parse a possible text declaration first
10437 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010438 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010439 xmlParseTextDecl(ctxt);
10440 }
10441
10442 /*
10443 * Doing validity checking on chunk doesn't make sense
10444 */
10445 ctxt->instate = XML_PARSER_CONTENT;
10446 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010447 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010448 ctxt->loadsubset = ctx->loadsubset;
10449 ctxt->depth = ctx->depth + 1;
10450 ctxt->replaceEntities = ctx->replaceEntities;
10451 if (ctxt->validate) {
10452 ctxt->vctxt.error = ctx->vctxt.error;
10453 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010454 } else {
10455 ctxt->vctxt.error = NULL;
10456 ctxt->vctxt.warning = NULL;
10457 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010458 ctxt->vctxt.nodeTab = NULL;
10459 ctxt->vctxt.nodeNr = 0;
10460 ctxt->vctxt.nodeMax = 0;
10461 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010462 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10463 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010464 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10465 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10466 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010467 ctxt->dictNames = ctx->dictNames;
10468 ctxt->attsDefault = ctx->attsDefault;
10469 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010470 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010471
10472 xmlParseContent(ctxt);
10473
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010474 ctx->validate = ctxt->validate;
10475 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010476 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010477 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010478 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010479 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010480 }
10481 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010482 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010483 }
10484
10485 if (!ctxt->wellFormed) {
10486 if (ctxt->errNo == 0)
10487 ret = 1;
10488 else
10489 ret = ctxt->errNo;
10490 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010491 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010492 xmlNodePtr cur;
10493
10494 /*
10495 * Return the newly created nodeset after unlinking it from
10496 * they pseudo parent.
10497 */
10498 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010499 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010500 while (cur != NULL) {
10501 cur->parent = NULL;
10502 cur = cur->next;
10503 }
10504 newDoc->children->children = NULL;
10505 }
10506 ret = 0;
10507 }
10508 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010509 ctxt->dict = NULL;
10510 ctxt->attsDefault = NULL;
10511 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010512 xmlFreeParserCtxt(ctxt);
10513 newDoc->intSubset = NULL;
10514 newDoc->extSubset = NULL;
10515 xmlFreeDoc(newDoc);
10516
10517 return(ret);
10518}
10519
10520/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010521 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010522 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010523 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010524 * @sax: the SAX handler bloc (possibly NULL)
10525 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10526 * @depth: Used for loop detection, use 0
10527 * @URL: the URL for the entity to load
10528 * @ID: the System ID for the entity to load
10529 * @list: the return value for the set of parsed nodes
10530 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010531 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010532 *
10533 * Returns 0 if the entity is well formed, -1 in case of args problem and
10534 * the parser error code otherwise
10535 */
10536
Daniel Veillard7d515752003-09-26 19:12:37 +000010537static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010538xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10539 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010540 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010541 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010542 xmlParserCtxtPtr ctxt;
10543 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010544 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010545 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010546 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010547 xmlChar start[4];
10548 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010549
10550 if (depth > 40) {
10551 return(XML_ERR_ENTITY_LOOP);
10552 }
10553
10554
10555
10556 if (list != NULL)
10557 *list = NULL;
10558 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010559 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010560 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010561 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010562
10563
10564 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010565 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010566 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010567 if (oldctxt != NULL) {
10568 ctxt->_private = oldctxt->_private;
10569 ctxt->loadsubset = oldctxt->loadsubset;
10570 ctxt->validate = oldctxt->validate;
10571 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010572 ctxt->record_info = oldctxt->record_info;
10573 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10574 ctxt->node_seq.length = oldctxt->node_seq.length;
10575 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010576 } else {
10577 /*
10578 * Doing validity checking on chunk without context
10579 * doesn't make sense
10580 */
10581 ctxt->_private = NULL;
10582 ctxt->validate = 0;
10583 ctxt->external = 2;
10584 ctxt->loadsubset = 0;
10585 }
Owen Taylor3473f882001-02-23 17:55:21 +000010586 if (sax != NULL) {
10587 oldsax = ctxt->sax;
10588 ctxt->sax = sax;
10589 if (user_data != NULL)
10590 ctxt->userData = user_data;
10591 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010592 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010593 newDoc = xmlNewDoc(BAD_CAST "1.0");
10594 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010595 ctxt->node_seq.maximum = 0;
10596 ctxt->node_seq.length = 0;
10597 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010598 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010599 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010600 }
10601 if (doc != NULL) {
10602 newDoc->intSubset = doc->intSubset;
10603 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010604 newDoc->dict = doc->dict;
10605 } else if (oldctxt != NULL) {
10606 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010607 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010608 xmlDictReference(newDoc->dict);
10609
Owen Taylor3473f882001-02-23 17:55:21 +000010610 if (doc->URL != NULL) {
10611 newDoc->URL = xmlStrdup(doc->URL);
10612 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010613 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10614 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010615 if (sax != NULL)
10616 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010617 ctxt->node_seq.maximum = 0;
10618 ctxt->node_seq.length = 0;
10619 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010620 xmlFreeParserCtxt(ctxt);
10621 newDoc->intSubset = NULL;
10622 newDoc->extSubset = NULL;
10623 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010624 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010625 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010626 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 nodePush(ctxt, newDoc->children);
10628 if (doc == NULL) {
10629 ctxt->myDoc = newDoc;
10630 } else {
10631 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010632 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010633 }
10634
Daniel Veillard87a764e2001-06-20 17:41:10 +000010635 /*
10636 * Get the 4 first bytes and decode the charset
10637 * if enc != XML_CHAR_ENCODING_NONE
10638 * plug some encoding conversion routines.
10639 */
10640 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010641 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10642 start[0] = RAW;
10643 start[1] = NXT(1);
10644 start[2] = NXT(2);
10645 start[3] = NXT(3);
10646 enc = xmlDetectCharEncoding(start, 4);
10647 if (enc != XML_CHAR_ENCODING_NONE) {
10648 xmlSwitchEncoding(ctxt, enc);
10649 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010650 }
10651
Owen Taylor3473f882001-02-23 17:55:21 +000010652 /*
10653 * Parse a possible text declaration first
10654 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010655 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010656 xmlParseTextDecl(ctxt);
10657 }
10658
Owen Taylor3473f882001-02-23 17:55:21 +000010659 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010660 ctxt->depth = depth;
10661
10662 xmlParseContent(ctxt);
10663
Daniel Veillard561b7f82002-03-20 21:55:57 +000010664 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010665 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010666 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010667 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010668 }
10669 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010670 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010671 }
10672
10673 if (!ctxt->wellFormed) {
10674 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010675 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010676 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010677 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010678 } else {
10679 if (list != NULL) {
10680 xmlNodePtr cur;
10681
10682 /*
10683 * Return the newly created nodeset after unlinking it from
10684 * they pseudo parent.
10685 */
10686 cur = newDoc->children->children;
10687 *list = cur;
10688 while (cur != NULL) {
10689 cur->parent = NULL;
10690 cur = cur->next;
10691 }
10692 newDoc->children->children = NULL;
10693 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010694 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010695 }
10696 if (sax != NULL)
10697 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010698 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10699 oldctxt->node_seq.length = ctxt->node_seq.length;
10700 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010701 ctxt->node_seq.maximum = 0;
10702 ctxt->node_seq.length = 0;
10703 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010704 xmlFreeParserCtxt(ctxt);
10705 newDoc->intSubset = NULL;
10706 newDoc->extSubset = NULL;
10707 xmlFreeDoc(newDoc);
10708
10709 return(ret);
10710}
10711
Daniel Veillard81273902003-09-30 00:43:48 +000010712#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010713/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010714 * xmlParseExternalEntity:
10715 * @doc: the document the chunk pertains to
10716 * @sax: the SAX handler bloc (possibly NULL)
10717 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10718 * @depth: Used for loop detection, use 0
10719 * @URL: the URL for the entity to load
10720 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010721 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010722 *
10723 * Parse an external general entity
10724 * An external general parsed entity is well-formed if it matches the
10725 * production labeled extParsedEnt.
10726 *
10727 * [78] extParsedEnt ::= TextDecl? content
10728 *
10729 * Returns 0 if the entity is well formed, -1 in case of args problem and
10730 * the parser error code otherwise
10731 */
10732
10733int
10734xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010735 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010736 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010737 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010738}
10739
10740/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010741 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010742 * @doc: the document the chunk pertains to
10743 * @sax: the SAX handler bloc (possibly NULL)
10744 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10745 * @depth: Used for loop detection, use 0
10746 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010747 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010748 *
10749 * Parse a well-balanced chunk of an XML document
10750 * called by the parser
10751 * The allowed sequence for the Well Balanced Chunk is the one defined by
10752 * the content production in the XML grammar:
10753 *
10754 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10755 *
10756 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10757 * the parser error code otherwise
10758 */
10759
10760int
10761xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010762 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010763 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10764 depth, string, lst, 0 );
10765}
Daniel Veillard81273902003-09-30 00:43:48 +000010766#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010767
10768/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010769 * xmlParseBalancedChunkMemoryInternal:
10770 * @oldctxt: the existing parsing context
10771 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10772 * @user_data: the user data field for the parser context
10773 * @lst: the return value for the set of parsed nodes
10774 *
10775 *
10776 * Parse a well-balanced chunk of an XML document
10777 * called by the parser
10778 * The allowed sequence for the Well Balanced Chunk is the one defined by
10779 * the content production in the XML grammar:
10780 *
10781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10782 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010783 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10784 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010785 *
10786 * In case recover is set to 1, the nodelist will not be empty even if
10787 * the parsed chunk is not well balanced.
10788 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010789static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010790xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10791 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10792 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010793 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010794 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010795 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010796 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010797 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010798 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010799 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010800
10801 if (oldctxt->depth > 40) {
10802 return(XML_ERR_ENTITY_LOOP);
10803 }
10804
10805
10806 if (lst != NULL)
10807 *lst = NULL;
10808 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010809 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010810
10811 size = xmlStrlen(string);
10812
10813 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010814 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010815 if (user_data != NULL)
10816 ctxt->userData = user_data;
10817 else
10818 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010819 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10820 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010821 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10822 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10823 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010824
10825 oldsax = ctxt->sax;
10826 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010827 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010828 ctxt->replaceEntities = oldctxt->replaceEntities;
10829 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010830
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010831 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010832 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010833 newDoc = xmlNewDoc(BAD_CAST "1.0");
10834 if (newDoc == NULL) {
10835 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010836 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010837 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010838 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010839 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010840 newDoc->dict = ctxt->dict;
10841 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010842 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010843 } else {
10844 ctxt->myDoc = oldctxt->myDoc;
10845 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010846 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010847 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010848 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10849 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010850 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010851 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010852 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010853 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010854 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010855 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010856 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010857 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010858 ctxt->myDoc->children = NULL;
10859 ctxt->myDoc->last = NULL;
10860 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010861 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010862 ctxt->instate = XML_PARSER_CONTENT;
10863 ctxt->depth = oldctxt->depth + 1;
10864
Daniel Veillard328f48c2002-11-15 15:24:34 +000010865 ctxt->validate = 0;
10866 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010867 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10868 /*
10869 * ID/IDREF registration will be done in xmlValidateElement below
10870 */
10871 ctxt->loadsubset |= XML_SKIP_IDS;
10872 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010873 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010874 ctxt->attsDefault = oldctxt->attsDefault;
10875 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010876
Daniel Veillard68e9e742002-11-16 15:35:11 +000010877 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010878 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010879 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010880 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010881 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010882 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010883 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010885 }
10886
10887 if (!ctxt->wellFormed) {
10888 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010889 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010890 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010891 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010892 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010893 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010894 }
10895
William M. Brack7b9154b2003-09-27 19:23:50 +000010896 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010897 xmlNodePtr cur;
10898
10899 /*
10900 * Return the newly created nodeset after unlinking it from
10901 * they pseudo parent.
10902 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010903 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010904 *lst = cur;
10905 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010906#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010907 if (oldctxt->validate && oldctxt->wellFormed &&
10908 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10909 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10910 oldctxt->myDoc, cur);
10911 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010912#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010913 cur->parent = NULL;
10914 cur = cur->next;
10915 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010916 ctxt->myDoc->children->children = NULL;
10917 }
10918 if (ctxt->myDoc != NULL) {
10919 xmlFreeNode(ctxt->myDoc->children);
10920 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010921 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010922 }
10923
10924 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010925 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010926 ctxt->attsDefault = NULL;
10927 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010928 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010929 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010930 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010931 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010932
10933 return(ret);
10934}
10935
Daniel Veillard29b17482004-08-16 00:39:03 +000010936/**
10937 * xmlParseInNodeContext:
10938 * @node: the context node
10939 * @data: the input string
10940 * @datalen: the input string length in bytes
10941 * @options: a combination of xmlParserOption
10942 * @lst: the return value for the set of parsed nodes
10943 *
10944 * Parse a well-balanced chunk of an XML document
10945 * within the context (DTD, namespaces, etc ...) of the given node.
10946 *
10947 * The allowed sequence for the data is a Well Balanced Chunk defined by
10948 * the content production in the XML grammar:
10949 *
10950 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10951 *
10952 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10953 * error code otherwise
10954 */
10955xmlParserErrors
10956xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
10957 int options, xmlNodePtr *lst) {
10958#ifdef SAX2
10959 xmlParserCtxtPtr ctxt;
10960 xmlDocPtr doc = NULL;
10961 xmlNodePtr fake, cur;
10962 int nsnr = 0;
10963
10964 xmlParserErrors ret = XML_ERR_OK;
10965
10966 /*
10967 * check all input parameters, grab the document
10968 */
10969 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
10970 return(XML_ERR_INTERNAL_ERROR);
10971 switch (node->type) {
10972 case XML_ELEMENT_NODE:
10973 case XML_ATTRIBUTE_NODE:
10974 case XML_TEXT_NODE:
10975 case XML_CDATA_SECTION_NODE:
10976 case XML_ENTITY_REF_NODE:
10977 case XML_PI_NODE:
10978 case XML_COMMENT_NODE:
10979 case XML_DOCUMENT_NODE:
10980 case XML_HTML_DOCUMENT_NODE:
10981 break;
10982 default:
10983 return(XML_ERR_INTERNAL_ERROR);
10984
10985 }
10986 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
10987 (node->type != XML_DOCUMENT_NODE) &&
10988 (node->type != XML_HTML_DOCUMENT_NODE))
10989 node = node->parent;
10990 if (node == NULL)
10991 return(XML_ERR_INTERNAL_ERROR);
10992 if (node->type == XML_ELEMENT_NODE)
10993 doc = node->doc;
10994 else
10995 doc = (xmlDocPtr) node;
10996 if (doc == NULL)
10997 return(XML_ERR_INTERNAL_ERROR);
10998
10999 /*
11000 * allocate a context and set-up everything not related to the
11001 * node position in the tree
11002 */
11003 if (doc->type == XML_DOCUMENT_NODE)
11004 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11005#ifdef LIBXML_HTML_ENABLED
11006 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11007 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11008#endif
11009 else
11010 return(XML_ERR_INTERNAL_ERROR);
11011
11012 if (ctxt == NULL)
11013 return(XML_ERR_NO_MEMORY);
11014 fake = xmlNewComment(NULL);
11015 if (fake == NULL) {
11016 xmlFreeParserCtxt(ctxt);
11017 return(XML_ERR_NO_MEMORY);
11018 }
11019 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011020
11021 /*
11022 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11023 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11024 * we must wait until the last moment to free the original one.
11025 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011026 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011027 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011028 xmlDictFree(ctxt->dict);
11029 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011030 } else
11031 options |= XML_PARSE_NODICT;
11032
11033 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011034 xmlDetectSAX2(ctxt);
11035 ctxt->myDoc = doc;
11036
11037 if (node->type == XML_ELEMENT_NODE) {
11038 nodePush(ctxt, node);
11039 /*
11040 * initialize the SAX2 namespaces stack
11041 */
11042 cur = node;
11043 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11044 xmlNsPtr ns = cur->nsDef;
11045 const xmlChar *iprefix, *ihref;
11046
11047 while (ns != NULL) {
11048 if (ctxt->dict) {
11049 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11050 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11051 } else {
11052 iprefix = ns->prefix;
11053 ihref = ns->href;
11054 }
11055
11056 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11057 nsPush(ctxt, iprefix, ihref);
11058 nsnr++;
11059 }
11060 ns = ns->next;
11061 }
11062 cur = cur->parent;
11063 }
11064 ctxt->instate = XML_PARSER_CONTENT;
11065 }
11066
11067 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11068 /*
11069 * ID/IDREF registration will be done in xmlValidateElement below
11070 */
11071 ctxt->loadsubset |= XML_SKIP_IDS;
11072 }
11073
11074 xmlParseContent(ctxt);
11075 nsPop(ctxt, nsnr);
11076 if ((RAW == '<') && (NXT(1) == '/')) {
11077 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11078 } else if (RAW != 0) {
11079 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11080 }
11081 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11082 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11083 ctxt->wellFormed = 0;
11084 }
11085
11086 if (!ctxt->wellFormed) {
11087 if (ctxt->errNo == 0)
11088 ret = XML_ERR_INTERNAL_ERROR;
11089 else
11090 ret = (xmlParserErrors)ctxt->errNo;
11091 } else {
11092 ret = XML_ERR_OK;
11093 }
11094
11095 /*
11096 * Return the newly created nodeset after unlinking it from
11097 * the pseudo sibling.
11098 */
11099
11100 cur = fake->next;
11101 fake->next = NULL;
11102 node->last = fake;
11103
11104 if (cur != NULL) {
11105 cur->prev = NULL;
11106 }
11107
11108 *lst = cur;
11109
11110 while (cur != NULL) {
11111 cur->parent = NULL;
11112 cur = cur->next;
11113 }
11114
11115 xmlUnlinkNode(fake);
11116 xmlFreeNode(fake);
11117
11118
11119 if (ret != XML_ERR_OK) {
11120 xmlFreeNodeList(*lst);
11121 *lst = NULL;
11122 }
William M. Brackc3f81342004-10-03 01:22:44 +000011123
William M. Brackb7b54de2004-10-06 16:38:01 +000011124 if (doc->dict != NULL)
11125 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011126 xmlFreeParserCtxt(ctxt);
11127
11128 return(ret);
11129#else /* !SAX2 */
11130 return(XML_ERR_INTERNAL_ERROR);
11131#endif
11132}
11133
Daniel Veillard81273902003-09-30 00:43:48 +000011134#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011135/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011136 * xmlParseBalancedChunkMemoryRecover:
11137 * @doc: the document the chunk pertains to
11138 * @sax: the SAX handler bloc (possibly NULL)
11139 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11140 * @depth: Used for loop detection, use 0
11141 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11142 * @lst: the return value for the set of parsed nodes
11143 * @recover: return nodes even if the data is broken (use 0)
11144 *
11145 *
11146 * Parse a well-balanced chunk of an XML document
11147 * called by the parser
11148 * The allowed sequence for the Well Balanced Chunk is the one defined by
11149 * the content production in the XML grammar:
11150 *
11151 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11152 *
11153 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11154 * the parser error code otherwise
11155 *
11156 * In case recover is set to 1, the nodelist will not be empty even if
11157 * the parsed chunk is not well balanced.
11158 */
11159int
11160xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11161 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11162 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011163 xmlParserCtxtPtr ctxt;
11164 xmlDocPtr newDoc;
11165 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011166 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011167 int size;
11168 int ret = 0;
11169
11170 if (depth > 40) {
11171 return(XML_ERR_ENTITY_LOOP);
11172 }
11173
11174
Daniel Veillardcda96922001-08-21 10:56:31 +000011175 if (lst != NULL)
11176 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011177 if (string == NULL)
11178 return(-1);
11179
11180 size = xmlStrlen(string);
11181
11182 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11183 if (ctxt == NULL) return(-1);
11184 ctxt->userData = ctxt;
11185 if (sax != NULL) {
11186 oldsax = ctxt->sax;
11187 ctxt->sax = sax;
11188 if (user_data != NULL)
11189 ctxt->userData = user_data;
11190 }
11191 newDoc = xmlNewDoc(BAD_CAST "1.0");
11192 if (newDoc == NULL) {
11193 xmlFreeParserCtxt(ctxt);
11194 return(-1);
11195 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011196 newDoc->dict = ctxt->dict;
11197 xmlDictReference(newDoc->dict);
Owen Taylor3473f882001-02-23 17:55:21 +000011198 if (doc != NULL) {
11199 newDoc->intSubset = doc->intSubset;
11200 newDoc->extSubset = doc->extSubset;
11201 }
11202 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11203 if (newDoc->children == NULL) {
11204 if (sax != NULL)
11205 ctxt->sax = oldsax;
11206 xmlFreeParserCtxt(ctxt);
11207 newDoc->intSubset = NULL;
11208 newDoc->extSubset = NULL;
11209 xmlFreeDoc(newDoc);
11210 return(-1);
11211 }
11212 nodePush(ctxt, newDoc->children);
11213 if (doc == NULL) {
11214 ctxt->myDoc = newDoc;
11215 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011216 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011217 newDoc->children->doc = doc;
11218 }
11219 ctxt->instate = XML_PARSER_CONTENT;
11220 ctxt->depth = depth;
11221
11222 /*
11223 * Doing validity checking on chunk doesn't make sense
11224 */
11225 ctxt->validate = 0;
11226 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011227 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011228
Daniel Veillardb39bc392002-10-26 19:29:51 +000011229 if ( doc != NULL ){
11230 content = doc->children;
11231 doc->children = NULL;
11232 xmlParseContent(ctxt);
11233 doc->children = content;
11234 }
11235 else {
11236 xmlParseContent(ctxt);
11237 }
Owen Taylor3473f882001-02-23 17:55:21 +000011238 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011239 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011240 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011241 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011242 }
11243 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011244 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 }
11246
11247 if (!ctxt->wellFormed) {
11248 if (ctxt->errNo == 0)
11249 ret = 1;
11250 else
11251 ret = ctxt->errNo;
11252 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011253 ret = 0;
11254 }
11255
11256 if (lst != NULL && (ret == 0 || recover == 1)) {
11257 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011258
11259 /*
11260 * Return the newly created nodeset after unlinking it from
11261 * they pseudo parent.
11262 */
11263 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011264 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011265 while (cur != NULL) {
11266 cur->parent = NULL;
11267 cur = cur->next;
11268 }
11269 newDoc->children->children = NULL;
11270 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011271
Owen Taylor3473f882001-02-23 17:55:21 +000011272 if (sax != NULL)
11273 ctxt->sax = oldsax;
11274 xmlFreeParserCtxt(ctxt);
11275 newDoc->intSubset = NULL;
11276 newDoc->extSubset = NULL;
11277 xmlFreeDoc(newDoc);
11278
11279 return(ret);
11280}
11281
11282/**
11283 * xmlSAXParseEntity:
11284 * @sax: the SAX handler block
11285 * @filename: the filename
11286 *
11287 * parse an XML external entity out of context and build a tree.
11288 * It use the given SAX function block to handle the parsing callback.
11289 * If sax is NULL, fallback to the default DOM tree building routines.
11290 *
11291 * [78] extParsedEnt ::= TextDecl? content
11292 *
11293 * This correspond to a "Well Balanced" chunk
11294 *
11295 * Returns the resulting document tree
11296 */
11297
11298xmlDocPtr
11299xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11300 xmlDocPtr ret;
11301 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011302
11303 ctxt = xmlCreateFileParserCtxt(filename);
11304 if (ctxt == NULL) {
11305 return(NULL);
11306 }
11307 if (sax != NULL) {
11308 if (ctxt->sax != NULL)
11309 xmlFree(ctxt->sax);
11310 ctxt->sax = sax;
11311 ctxt->userData = NULL;
11312 }
11313
Owen Taylor3473f882001-02-23 17:55:21 +000011314 xmlParseExtParsedEnt(ctxt);
11315
11316 if (ctxt->wellFormed)
11317 ret = ctxt->myDoc;
11318 else {
11319 ret = NULL;
11320 xmlFreeDoc(ctxt->myDoc);
11321 ctxt->myDoc = NULL;
11322 }
11323 if (sax != NULL)
11324 ctxt->sax = NULL;
11325 xmlFreeParserCtxt(ctxt);
11326
11327 return(ret);
11328}
11329
11330/**
11331 * xmlParseEntity:
11332 * @filename: the filename
11333 *
11334 * parse an XML external entity out of context and build a tree.
11335 *
11336 * [78] extParsedEnt ::= TextDecl? content
11337 *
11338 * This correspond to a "Well Balanced" chunk
11339 *
11340 * Returns the resulting document tree
11341 */
11342
11343xmlDocPtr
11344xmlParseEntity(const char *filename) {
11345 return(xmlSAXParseEntity(NULL, filename));
11346}
Daniel Veillard81273902003-09-30 00:43:48 +000011347#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011348
11349/**
11350 * xmlCreateEntityParserCtxt:
11351 * @URL: the entity URL
11352 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011353 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011354 *
11355 * Create a parser context for an external entity
11356 * Automatic support for ZLIB/Compress compressed document is provided
11357 * by default if found at compile-time.
11358 *
11359 * Returns the new parser context or NULL
11360 */
11361xmlParserCtxtPtr
11362xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11363 const xmlChar *base) {
11364 xmlParserCtxtPtr ctxt;
11365 xmlParserInputPtr inputStream;
11366 char *directory = NULL;
11367 xmlChar *uri;
11368
11369 ctxt = xmlNewParserCtxt();
11370 if (ctxt == NULL) {
11371 return(NULL);
11372 }
11373
11374 uri = xmlBuildURI(URL, base);
11375
11376 if (uri == NULL) {
11377 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11378 if (inputStream == NULL) {
11379 xmlFreeParserCtxt(ctxt);
11380 return(NULL);
11381 }
11382
11383 inputPush(ctxt, inputStream);
11384
11385 if ((ctxt->directory == NULL) && (directory == NULL))
11386 directory = xmlParserGetDirectory((char *)URL);
11387 if ((ctxt->directory == NULL) && (directory != NULL))
11388 ctxt->directory = directory;
11389 } else {
11390 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11391 if (inputStream == NULL) {
11392 xmlFree(uri);
11393 xmlFreeParserCtxt(ctxt);
11394 return(NULL);
11395 }
11396
11397 inputPush(ctxt, inputStream);
11398
11399 if ((ctxt->directory == NULL) && (directory == NULL))
11400 directory = xmlParserGetDirectory((char *)uri);
11401 if ((ctxt->directory == NULL) && (directory != NULL))
11402 ctxt->directory = directory;
11403 xmlFree(uri);
11404 }
Owen Taylor3473f882001-02-23 17:55:21 +000011405 return(ctxt);
11406}
11407
11408/************************************************************************
11409 * *
11410 * Front ends when parsing from a file *
11411 * *
11412 ************************************************************************/
11413
11414/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011415 * xmlCreateURLParserCtxt:
11416 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011417 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011418 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011419 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011420 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011421 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011422 *
11423 * Returns the new parser context or NULL
11424 */
11425xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011426xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011427{
11428 xmlParserCtxtPtr ctxt;
11429 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011430 char *directory = NULL;
11431
Owen Taylor3473f882001-02-23 17:55:21 +000011432 ctxt = xmlNewParserCtxt();
11433 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011434 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011435 return(NULL);
11436 }
11437
Daniel Veillard61b93382003-11-03 14:28:31 +000011438 if (options != 0)
11439 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011440
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011441 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011442 if (inputStream == NULL) {
11443 xmlFreeParserCtxt(ctxt);
11444 return(NULL);
11445 }
11446
Owen Taylor3473f882001-02-23 17:55:21 +000011447 inputPush(ctxt, inputStream);
11448 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011449 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011450 if ((ctxt->directory == NULL) && (directory != NULL))
11451 ctxt->directory = directory;
11452
11453 return(ctxt);
11454}
11455
Daniel Veillard61b93382003-11-03 14:28:31 +000011456/**
11457 * xmlCreateFileParserCtxt:
11458 * @filename: the filename
11459 *
11460 * Create a parser context for a file content.
11461 * Automatic support for ZLIB/Compress compressed document is provided
11462 * by default if found at compile-time.
11463 *
11464 * Returns the new parser context or NULL
11465 */
11466xmlParserCtxtPtr
11467xmlCreateFileParserCtxt(const char *filename)
11468{
11469 return(xmlCreateURLParserCtxt(filename, 0));
11470}
11471
Daniel Veillard81273902003-09-30 00:43:48 +000011472#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011473/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011474 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011475 * @sax: the SAX handler block
11476 * @filename: the filename
11477 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11478 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011479 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011480 *
11481 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11482 * compressed document is provided by default if found at compile-time.
11483 * It use the given SAX function block to handle the parsing callback.
11484 * If sax is NULL, fallback to the default DOM tree building routines.
11485 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011486 * User data (void *) is stored within the parser context in the
11487 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011488 *
Owen Taylor3473f882001-02-23 17:55:21 +000011489 * Returns the resulting document tree
11490 */
11491
11492xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011493xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11494 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011495 xmlDocPtr ret;
11496 xmlParserCtxtPtr ctxt;
11497 char *directory = NULL;
11498
Daniel Veillard635ef722001-10-29 11:48:19 +000011499 xmlInitParser();
11500
Owen Taylor3473f882001-02-23 17:55:21 +000011501 ctxt = xmlCreateFileParserCtxt(filename);
11502 if (ctxt == NULL) {
11503 return(NULL);
11504 }
11505 if (sax != NULL) {
11506 if (ctxt->sax != NULL)
11507 xmlFree(ctxt->sax);
11508 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011509 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011510 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011511 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011512 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011513 }
Owen Taylor3473f882001-02-23 17:55:21 +000011514
11515 if ((ctxt->directory == NULL) && (directory == NULL))
11516 directory = xmlParserGetDirectory(filename);
11517 if ((ctxt->directory == NULL) && (directory != NULL))
11518 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11519
Daniel Veillarddad3f682002-11-17 16:47:27 +000011520 ctxt->recovery = recovery;
11521
Owen Taylor3473f882001-02-23 17:55:21 +000011522 xmlParseDocument(ctxt);
11523
William M. Brackc07329e2003-09-08 01:57:30 +000011524 if ((ctxt->wellFormed) || recovery) {
11525 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011526 if (ret != NULL) {
11527 if (ctxt->input->buf->compressed > 0)
11528 ret->compression = 9;
11529 else
11530 ret->compression = ctxt->input->buf->compressed;
11531 }
William M. Brackc07329e2003-09-08 01:57:30 +000011532 }
Owen Taylor3473f882001-02-23 17:55:21 +000011533 else {
11534 ret = NULL;
11535 xmlFreeDoc(ctxt->myDoc);
11536 ctxt->myDoc = NULL;
11537 }
11538 if (sax != NULL)
11539 ctxt->sax = NULL;
11540 xmlFreeParserCtxt(ctxt);
11541
11542 return(ret);
11543}
11544
11545/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011546 * xmlSAXParseFile:
11547 * @sax: the SAX handler block
11548 * @filename: the filename
11549 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11550 * documents
11551 *
11552 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11553 * compressed document is provided by default if found at compile-time.
11554 * It use the given SAX function block to handle the parsing callback.
11555 * If sax is NULL, fallback to the default DOM tree building routines.
11556 *
11557 * Returns the resulting document tree
11558 */
11559
11560xmlDocPtr
11561xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11562 int recovery) {
11563 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11564}
11565
11566/**
Owen Taylor3473f882001-02-23 17:55:21 +000011567 * xmlRecoverDoc:
11568 * @cur: a pointer to an array of xmlChar
11569 *
11570 * parse an XML in-memory document and build a tree.
11571 * In the case the document is not Well Formed, a tree is built anyway
11572 *
11573 * Returns the resulting document tree
11574 */
11575
11576xmlDocPtr
11577xmlRecoverDoc(xmlChar *cur) {
11578 return(xmlSAXParseDoc(NULL, cur, 1));
11579}
11580
11581/**
11582 * xmlParseFile:
11583 * @filename: the filename
11584 *
11585 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11586 * compressed document is provided by default if found at compile-time.
11587 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011588 * Returns the resulting document tree if the file was wellformed,
11589 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011590 */
11591
11592xmlDocPtr
11593xmlParseFile(const char *filename) {
11594 return(xmlSAXParseFile(NULL, filename, 0));
11595}
11596
11597/**
11598 * xmlRecoverFile:
11599 * @filename: the filename
11600 *
11601 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11602 * compressed document is provided by default if found at compile-time.
11603 * In the case the document is not Well Formed, a tree is built anyway
11604 *
11605 * Returns the resulting document tree
11606 */
11607
11608xmlDocPtr
11609xmlRecoverFile(const char *filename) {
11610 return(xmlSAXParseFile(NULL, filename, 1));
11611}
11612
11613
11614/**
11615 * xmlSetupParserForBuffer:
11616 * @ctxt: an XML parser context
11617 * @buffer: a xmlChar * buffer
11618 * @filename: a file name
11619 *
11620 * Setup the parser context to parse a new buffer; Clears any prior
11621 * contents from the parser context. The buffer parameter must not be
11622 * NULL, but the filename parameter can be
11623 */
11624void
11625xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11626 const char* filename)
11627{
11628 xmlParserInputPtr input;
11629
11630 input = xmlNewInputStream(ctxt);
11631 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011632 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011633 xmlFree(ctxt);
11634 return;
11635 }
11636
11637 xmlClearParserCtxt(ctxt);
11638 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011639 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011640 input->base = buffer;
11641 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011642 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011643 inputPush(ctxt, input);
11644}
11645
11646/**
11647 * xmlSAXUserParseFile:
11648 * @sax: a SAX handler
11649 * @user_data: The user data returned on SAX callbacks
11650 * @filename: a file name
11651 *
11652 * parse an XML file and call the given SAX handler routines.
11653 * Automatic support for ZLIB/Compress compressed document is provided
11654 *
11655 * Returns 0 in case of success or a error number otherwise
11656 */
11657int
11658xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11659 const char *filename) {
11660 int ret = 0;
11661 xmlParserCtxtPtr ctxt;
11662
11663 ctxt = xmlCreateFileParserCtxt(filename);
11664 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011665#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011666 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011667#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011668 xmlFree(ctxt->sax);
11669 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011670 xmlDetectSAX2(ctxt);
11671
Owen Taylor3473f882001-02-23 17:55:21 +000011672 if (user_data != NULL)
11673 ctxt->userData = user_data;
11674
11675 xmlParseDocument(ctxt);
11676
11677 if (ctxt->wellFormed)
11678 ret = 0;
11679 else {
11680 if (ctxt->errNo != 0)
11681 ret = ctxt->errNo;
11682 else
11683 ret = -1;
11684 }
11685 if (sax != NULL)
11686 ctxt->sax = NULL;
11687 xmlFreeParserCtxt(ctxt);
11688
11689 return ret;
11690}
Daniel Veillard81273902003-09-30 00:43:48 +000011691#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011692
11693/************************************************************************
11694 * *
11695 * Front ends when parsing from memory *
11696 * *
11697 ************************************************************************/
11698
11699/**
11700 * xmlCreateMemoryParserCtxt:
11701 * @buffer: a pointer to a char array
11702 * @size: the size of the array
11703 *
11704 * Create a parser context for an XML in-memory document.
11705 *
11706 * Returns the new parser context or NULL
11707 */
11708xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011709xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011710 xmlParserCtxtPtr ctxt;
11711 xmlParserInputPtr input;
11712 xmlParserInputBufferPtr buf;
11713
11714 if (buffer == NULL)
11715 return(NULL);
11716 if (size <= 0)
11717 return(NULL);
11718
11719 ctxt = xmlNewParserCtxt();
11720 if (ctxt == NULL)
11721 return(NULL);
11722
Daniel Veillard53350552003-09-18 13:35:51 +000011723 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011724 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011725 if (buf == NULL) {
11726 xmlFreeParserCtxt(ctxt);
11727 return(NULL);
11728 }
Owen Taylor3473f882001-02-23 17:55:21 +000011729
11730 input = xmlNewInputStream(ctxt);
11731 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011732 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011733 xmlFreeParserCtxt(ctxt);
11734 return(NULL);
11735 }
11736
11737 input->filename = NULL;
11738 input->buf = buf;
11739 input->base = input->buf->buffer->content;
11740 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011741 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011742
11743 inputPush(ctxt, input);
11744 return(ctxt);
11745}
11746
Daniel Veillard81273902003-09-30 00:43:48 +000011747#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011748/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011749 * xmlSAXParseMemoryWithData:
11750 * @sax: the SAX handler block
11751 * @buffer: an pointer to a char array
11752 * @size: the size of the array
11753 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11754 * documents
11755 * @data: the userdata
11756 *
11757 * parse an XML in-memory block and use the given SAX function block
11758 * to handle the parsing callback. If sax is NULL, fallback to the default
11759 * DOM tree building routines.
11760 *
11761 * User data (void *) is stored within the parser context in the
11762 * context's _private member, so it is available nearly everywhere in libxml
11763 *
11764 * Returns the resulting document tree
11765 */
11766
11767xmlDocPtr
11768xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11769 int size, int recovery, void *data) {
11770 xmlDocPtr ret;
11771 xmlParserCtxtPtr ctxt;
11772
11773 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11774 if (ctxt == NULL) return(NULL);
11775 if (sax != NULL) {
11776 if (ctxt->sax != NULL)
11777 xmlFree(ctxt->sax);
11778 ctxt->sax = sax;
11779 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011780 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011781 if (data!=NULL) {
11782 ctxt->_private=data;
11783 }
11784
Daniel Veillardadba5f12003-04-04 16:09:01 +000011785 ctxt->recovery = recovery;
11786
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011787 xmlParseDocument(ctxt);
11788
11789 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11790 else {
11791 ret = NULL;
11792 xmlFreeDoc(ctxt->myDoc);
11793 ctxt->myDoc = NULL;
11794 }
11795 if (sax != NULL)
11796 ctxt->sax = NULL;
11797 xmlFreeParserCtxt(ctxt);
11798
11799 return(ret);
11800}
11801
11802/**
Owen Taylor3473f882001-02-23 17:55:21 +000011803 * xmlSAXParseMemory:
11804 * @sax: the SAX handler block
11805 * @buffer: an pointer to a char array
11806 * @size: the size of the array
11807 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11808 * documents
11809 *
11810 * parse an XML in-memory block and use the given SAX function block
11811 * to handle the parsing callback. If sax is NULL, fallback to the default
11812 * DOM tree building routines.
11813 *
11814 * Returns the resulting document tree
11815 */
11816xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011817xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11818 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011819 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011820}
11821
11822/**
11823 * xmlParseMemory:
11824 * @buffer: an pointer to a char array
11825 * @size: the size of the array
11826 *
11827 * parse an XML in-memory block and build a tree.
11828 *
11829 * Returns the resulting document tree
11830 */
11831
Daniel Veillard50822cb2001-07-26 20:05:51 +000011832xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011833 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11834}
11835
11836/**
11837 * xmlRecoverMemory:
11838 * @buffer: an pointer to a char array
11839 * @size: the size of the array
11840 *
11841 * parse an XML in-memory block and build a tree.
11842 * In the case the document is not Well Formed, a tree is built anyway
11843 *
11844 * Returns the resulting document tree
11845 */
11846
Daniel Veillard50822cb2001-07-26 20:05:51 +000011847xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011848 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11849}
11850
11851/**
11852 * xmlSAXUserParseMemory:
11853 * @sax: a SAX handler
11854 * @user_data: The user data returned on SAX callbacks
11855 * @buffer: an in-memory XML document input
11856 * @size: the length of the XML document in bytes
11857 *
11858 * A better SAX parsing routine.
11859 * parse an XML in-memory buffer and call the given SAX handler routines.
11860 *
11861 * Returns 0 in case of success or a error number otherwise
11862 */
11863int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011864 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011865 int ret = 0;
11866 xmlParserCtxtPtr ctxt;
11867 xmlSAXHandlerPtr oldsax = NULL;
11868
Daniel Veillard9e923512002-08-14 08:48:52 +000011869 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011870 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11871 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011872 oldsax = ctxt->sax;
11873 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011874 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011875 if (user_data != NULL)
11876 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011877
11878 xmlParseDocument(ctxt);
11879
11880 if (ctxt->wellFormed)
11881 ret = 0;
11882 else {
11883 if (ctxt->errNo != 0)
11884 ret = ctxt->errNo;
11885 else
11886 ret = -1;
11887 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011888 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011889 xmlFreeParserCtxt(ctxt);
11890
11891 return ret;
11892}
Daniel Veillard81273902003-09-30 00:43:48 +000011893#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011894
11895/**
11896 * xmlCreateDocParserCtxt:
11897 * @cur: a pointer to an array of xmlChar
11898 *
11899 * Creates a parser context for an XML in-memory document.
11900 *
11901 * Returns the new parser context or NULL
11902 */
11903xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011904xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011905 int len;
11906
11907 if (cur == NULL)
11908 return(NULL);
11909 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011910 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011911}
11912
Daniel Veillard81273902003-09-30 00:43:48 +000011913#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011914/**
11915 * xmlSAXParseDoc:
11916 * @sax: the SAX handler block
11917 * @cur: a pointer to an array of xmlChar
11918 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11919 * documents
11920 *
11921 * parse an XML in-memory document and build a tree.
11922 * It use the given SAX function block to handle the parsing callback.
11923 * If sax is NULL, fallback to the default DOM tree building routines.
11924 *
11925 * Returns the resulting document tree
11926 */
11927
11928xmlDocPtr
11929xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11930 xmlDocPtr ret;
11931 xmlParserCtxtPtr ctxt;
11932
11933 if (cur == NULL) return(NULL);
11934
11935
11936 ctxt = xmlCreateDocParserCtxt(cur);
11937 if (ctxt == NULL) return(NULL);
11938 if (sax != NULL) {
11939 ctxt->sax = sax;
11940 ctxt->userData = NULL;
11941 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011942 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011943
11944 xmlParseDocument(ctxt);
11945 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11946 else {
11947 ret = NULL;
11948 xmlFreeDoc(ctxt->myDoc);
11949 ctxt->myDoc = NULL;
11950 }
11951 if (sax != NULL)
11952 ctxt->sax = NULL;
11953 xmlFreeParserCtxt(ctxt);
11954
11955 return(ret);
11956}
11957
11958/**
11959 * xmlParseDoc:
11960 * @cur: a pointer to an array of xmlChar
11961 *
11962 * parse an XML in-memory document and build a tree.
11963 *
11964 * Returns the resulting document tree
11965 */
11966
11967xmlDocPtr
11968xmlParseDoc(xmlChar *cur) {
11969 return(xmlSAXParseDoc(NULL, cur, 0));
11970}
Daniel Veillard81273902003-09-30 00:43:48 +000011971#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011972
Daniel Veillard81273902003-09-30 00:43:48 +000011973#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011974/************************************************************************
11975 * *
11976 * Specific function to keep track of entities references *
11977 * and used by the XSLT debugger *
11978 * *
11979 ************************************************************************/
11980
11981static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11982
11983/**
11984 * xmlAddEntityReference:
11985 * @ent : A valid entity
11986 * @firstNode : A valid first node for children of entity
11987 * @lastNode : A valid last node of children entity
11988 *
11989 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11990 */
11991static void
11992xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11993 xmlNodePtr lastNode)
11994{
11995 if (xmlEntityRefFunc != NULL) {
11996 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11997 }
11998}
11999
12000
12001/**
12002 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012003 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012004 *
12005 * Set the function to call call back when a xml reference has been made
12006 */
12007void
12008xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12009{
12010 xmlEntityRefFunc = func;
12011}
Daniel Veillard81273902003-09-30 00:43:48 +000012012#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012013
12014/************************************************************************
12015 * *
12016 * Miscellaneous *
12017 * *
12018 ************************************************************************/
12019
12020#ifdef LIBXML_XPATH_ENABLED
12021#include <libxml/xpath.h>
12022#endif
12023
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012024extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012025static int xmlParserInitialized = 0;
12026
12027/**
12028 * xmlInitParser:
12029 *
12030 * Initialization function for the XML parser.
12031 * This is not reentrant. Call once before processing in case of
12032 * use in multithreaded programs.
12033 */
12034
12035void
12036xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012037 if (xmlParserInitialized != 0)
12038 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012039
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012040 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12041 (xmlGenericError == NULL))
12042 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012043 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012044 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012045 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012046 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012047 xmlDefaultSAXHandlerInit();
12048 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012049#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012050 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012051#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012052#ifdef LIBXML_HTML_ENABLED
12053 htmlInitAutoClose();
12054 htmlDefaultSAXHandlerInit();
12055#endif
12056#ifdef LIBXML_XPATH_ENABLED
12057 xmlXPathInit();
12058#endif
12059 xmlParserInitialized = 1;
12060}
12061
12062/**
12063 * xmlCleanupParser:
12064 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012065 * Cleanup function for the XML library. It tries to reclaim all
12066 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012067 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012068 * function should not prevent reusing the library but one should
12069 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012070 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012071 */
12072
12073void
12074xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012075 if (!xmlParserInitialized)
12076 return;
12077
Owen Taylor3473f882001-02-23 17:55:21 +000012078 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012079#ifdef LIBXML_CATALOG_ENABLED
12080 xmlCatalogCleanup();
12081#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012082 xmlCleanupInputCallbacks();
12083#ifdef LIBXML_OUTPUT_ENABLED
12084 xmlCleanupOutputCallbacks();
12085#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012086 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012087 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012088 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012089 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012090 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012091}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012092
12093/************************************************************************
12094 * *
12095 * New set (2.6.0) of simpler and more flexible APIs *
12096 * *
12097 ************************************************************************/
12098
12099/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012100 * DICT_FREE:
12101 * @str: a string
12102 *
12103 * Free a string if it is not owned by the "dict" dictionnary in the
12104 * current scope
12105 */
12106#define DICT_FREE(str) \
12107 if ((str) && ((!dict) || \
12108 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12109 xmlFree((char *)(str));
12110
12111/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 * xmlCtxtReset:
12113 * @ctxt: an XML parser context
12114 *
12115 * Reset a parser context
12116 */
12117void
12118xmlCtxtReset(xmlParserCtxtPtr ctxt)
12119{
12120 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012121 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012122
12123 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12124 xmlFreeInputStream(input);
12125 }
12126 ctxt->inputNr = 0;
12127 ctxt->input = NULL;
12128
12129 ctxt->spaceNr = 0;
12130 ctxt->spaceTab[0] = -1;
12131 ctxt->space = &ctxt->spaceTab[0];
12132
12133
12134 ctxt->nodeNr = 0;
12135 ctxt->node = NULL;
12136
12137 ctxt->nameNr = 0;
12138 ctxt->name = NULL;
12139
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012140 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012141 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012142 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012143 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012144 DICT_FREE(ctxt->directory);
12145 ctxt->directory = NULL;
12146 DICT_FREE(ctxt->extSubURI);
12147 ctxt->extSubURI = NULL;
12148 DICT_FREE(ctxt->extSubSystem);
12149 ctxt->extSubSystem = NULL;
12150 if (ctxt->myDoc != NULL)
12151 xmlFreeDoc(ctxt->myDoc);
12152 ctxt->myDoc = NULL;
12153
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012154 ctxt->standalone = -1;
12155 ctxt->hasExternalSubset = 0;
12156 ctxt->hasPErefs = 0;
12157 ctxt->html = 0;
12158 ctxt->external = 0;
12159 ctxt->instate = XML_PARSER_START;
12160 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012161
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012162 ctxt->wellFormed = 1;
12163 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012164 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012165 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012166#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012167 ctxt->vctxt.userData = ctxt;
12168 ctxt->vctxt.error = xmlParserValidityError;
12169 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012170#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012171 ctxt->record_info = 0;
12172 ctxt->nbChars = 0;
12173 ctxt->checkIndex = 0;
12174 ctxt->inSubset = 0;
12175 ctxt->errNo = XML_ERR_OK;
12176 ctxt->depth = 0;
12177 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12178 ctxt->catalogs = NULL;
12179 xmlInitNodeInfoSeq(&ctxt->node_seq);
12180
12181 if (ctxt->attsDefault != NULL) {
12182 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12183 ctxt->attsDefault = NULL;
12184 }
12185 if (ctxt->attsSpecial != NULL) {
12186 xmlHashFree(ctxt->attsSpecial, NULL);
12187 ctxt->attsSpecial = NULL;
12188 }
12189
Daniel Veillard4432df22003-09-28 18:58:27 +000012190#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012191 if (ctxt->catalogs != NULL)
12192 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012193#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012194 if (ctxt->lastError.code != XML_ERR_OK)
12195 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012196}
12197
12198/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012199 * xmlCtxtResetPush:
12200 * @ctxt: an XML parser context
12201 * @chunk: a pointer to an array of chars
12202 * @size: number of chars in the array
12203 * @filename: an optional file name or URI
12204 * @encoding: the document encoding, or NULL
12205 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012206 * Reset a push parser context
12207 *
12208 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012209 */
12210int
12211xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12212 int size, const char *filename, const char *encoding)
12213{
12214 xmlParserInputPtr inputStream;
12215 xmlParserInputBufferPtr buf;
12216 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12217
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012218 if (ctxt == NULL)
12219 return(1);
12220
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012221 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12222 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12223
12224 buf = xmlAllocParserInputBuffer(enc);
12225 if (buf == NULL)
12226 return(1);
12227
12228 if (ctxt == NULL) {
12229 xmlFreeParserInputBuffer(buf);
12230 return(1);
12231 }
12232
12233 xmlCtxtReset(ctxt);
12234
12235 if (ctxt->pushTab == NULL) {
12236 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12237 sizeof(xmlChar *));
12238 if (ctxt->pushTab == NULL) {
12239 xmlErrMemory(ctxt, NULL);
12240 xmlFreeParserInputBuffer(buf);
12241 return(1);
12242 }
12243 }
12244
12245 if (filename == NULL) {
12246 ctxt->directory = NULL;
12247 } else {
12248 ctxt->directory = xmlParserGetDirectory(filename);
12249 }
12250
12251 inputStream = xmlNewInputStream(ctxt);
12252 if (inputStream == NULL) {
12253 xmlFreeParserInputBuffer(buf);
12254 return(1);
12255 }
12256
12257 if (filename == NULL)
12258 inputStream->filename = NULL;
12259 else
12260 inputStream->filename = (char *)
12261 xmlCanonicPath((const xmlChar *) filename);
12262 inputStream->buf = buf;
12263 inputStream->base = inputStream->buf->buffer->content;
12264 inputStream->cur = inputStream->buf->buffer->content;
12265 inputStream->end =
12266 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12267
12268 inputPush(ctxt, inputStream);
12269
12270 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12271 (ctxt->input->buf != NULL)) {
12272 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12273 int cur = ctxt->input->cur - ctxt->input->base;
12274
12275 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12276
12277 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12278 ctxt->input->cur = ctxt->input->base + cur;
12279 ctxt->input->end =
12280 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12281 use];
12282#ifdef DEBUG_PUSH
12283 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12284#endif
12285 }
12286
12287 if (encoding != NULL) {
12288 xmlCharEncodingHandlerPtr hdlr;
12289
12290 hdlr = xmlFindCharEncodingHandler(encoding);
12291 if (hdlr != NULL) {
12292 xmlSwitchToEncoding(ctxt, hdlr);
12293 } else {
12294 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12295 "Unsupported encoding %s\n", BAD_CAST encoding);
12296 }
12297 } else if (enc != XML_CHAR_ENCODING_NONE) {
12298 xmlSwitchEncoding(ctxt, enc);
12299 }
12300
12301 return(0);
12302}
12303
12304/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012305 * xmlCtxtUseOptions:
12306 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012307 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012308 *
12309 * Applies the options to the parser context
12310 *
12311 * Returns 0 in case of success, the set of unknown or unimplemented options
12312 * in case of error.
12313 */
12314int
12315xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12316{
12317 if (options & XML_PARSE_RECOVER) {
12318 ctxt->recovery = 1;
12319 options -= XML_PARSE_RECOVER;
12320 } else
12321 ctxt->recovery = 0;
12322 if (options & XML_PARSE_DTDLOAD) {
12323 ctxt->loadsubset = XML_DETECT_IDS;
12324 options -= XML_PARSE_DTDLOAD;
12325 } else
12326 ctxt->loadsubset = 0;
12327 if (options & XML_PARSE_DTDATTR) {
12328 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12329 options -= XML_PARSE_DTDATTR;
12330 }
12331 if (options & XML_PARSE_NOENT) {
12332 ctxt->replaceEntities = 1;
12333 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12334 options -= XML_PARSE_NOENT;
12335 } else
12336 ctxt->replaceEntities = 0;
12337 if (options & XML_PARSE_NOWARNING) {
12338 ctxt->sax->warning = NULL;
12339 options -= XML_PARSE_NOWARNING;
12340 }
12341 if (options & XML_PARSE_NOERROR) {
12342 ctxt->sax->error = NULL;
12343 ctxt->sax->fatalError = NULL;
12344 options -= XML_PARSE_NOERROR;
12345 }
12346 if (options & XML_PARSE_PEDANTIC) {
12347 ctxt->pedantic = 1;
12348 options -= XML_PARSE_PEDANTIC;
12349 } else
12350 ctxt->pedantic = 0;
12351 if (options & XML_PARSE_NOBLANKS) {
12352 ctxt->keepBlanks = 0;
12353 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12354 options -= XML_PARSE_NOBLANKS;
12355 } else
12356 ctxt->keepBlanks = 1;
12357 if (options & XML_PARSE_DTDVALID) {
12358 ctxt->validate = 1;
12359 if (options & XML_PARSE_NOWARNING)
12360 ctxt->vctxt.warning = NULL;
12361 if (options & XML_PARSE_NOERROR)
12362 ctxt->vctxt.error = NULL;
12363 options -= XML_PARSE_DTDVALID;
12364 } else
12365 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012366#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012367 if (options & XML_PARSE_SAX1) {
12368 ctxt->sax->startElement = xmlSAX2StartElement;
12369 ctxt->sax->endElement = xmlSAX2EndElement;
12370 ctxt->sax->startElementNs = NULL;
12371 ctxt->sax->endElementNs = NULL;
12372 ctxt->sax->initialized = 1;
12373 options -= XML_PARSE_SAX1;
12374 }
Daniel Veillard81273902003-09-30 00:43:48 +000012375#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012376 if (options & XML_PARSE_NODICT) {
12377 ctxt->dictNames = 0;
12378 options -= XML_PARSE_NODICT;
12379 } else {
12380 ctxt->dictNames = 1;
12381 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012382 if (options & XML_PARSE_NOCDATA) {
12383 ctxt->sax->cdataBlock = NULL;
12384 options -= XML_PARSE_NOCDATA;
12385 }
12386 if (options & XML_PARSE_NSCLEAN) {
12387 ctxt->options |= XML_PARSE_NSCLEAN;
12388 options -= XML_PARSE_NSCLEAN;
12389 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012390 if (options & XML_PARSE_NONET) {
12391 ctxt->options |= XML_PARSE_NONET;
12392 options -= XML_PARSE_NONET;
12393 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012394 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012395 return (options);
12396}
12397
12398/**
12399 * xmlDoRead:
12400 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012401 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012402 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012403 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012404 * @reuse: keep the context for reuse
12405 *
12406 * Common front-end for the xmlRead functions
12407 *
12408 * Returns the resulting document tree or NULL
12409 */
12410static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012411xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12412 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012413{
12414 xmlDocPtr ret;
12415
12416 xmlCtxtUseOptions(ctxt, options);
12417 if (encoding != NULL) {
12418 xmlCharEncodingHandlerPtr hdlr;
12419
12420 hdlr = xmlFindCharEncodingHandler(encoding);
12421 if (hdlr != NULL)
12422 xmlSwitchToEncoding(ctxt, hdlr);
12423 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012424 if ((URL != NULL) && (ctxt->input != NULL) &&
12425 (ctxt->input->filename == NULL))
12426 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012427 xmlParseDocument(ctxt);
12428 if ((ctxt->wellFormed) || ctxt->recovery)
12429 ret = ctxt->myDoc;
12430 else {
12431 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012432 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012433 xmlFreeDoc(ctxt->myDoc);
12434 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012435 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012436 ctxt->myDoc = NULL;
12437 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012438 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012439 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012440
12441 return (ret);
12442}
12443
12444/**
12445 * xmlReadDoc:
12446 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012447 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012448 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012449 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012450 *
12451 * parse an XML in-memory document and build a tree.
12452 *
12453 * Returns the resulting document tree
12454 */
12455xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012456xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012457{
12458 xmlParserCtxtPtr ctxt;
12459
12460 if (cur == NULL)
12461 return (NULL);
12462
12463 ctxt = xmlCreateDocParserCtxt(cur);
12464 if (ctxt == NULL)
12465 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012466 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012467}
12468
12469/**
12470 * xmlReadFile:
12471 * @filename: a file or URL
12472 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012473 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012474 *
12475 * parse an XML file from the filesystem or the network.
12476 *
12477 * Returns the resulting document tree
12478 */
12479xmlDocPtr
12480xmlReadFile(const char *filename, const char *encoding, int options)
12481{
12482 xmlParserCtxtPtr ctxt;
12483
Daniel Veillard61b93382003-11-03 14:28:31 +000012484 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012485 if (ctxt == NULL)
12486 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012487 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012488}
12489
12490/**
12491 * xmlReadMemory:
12492 * @buffer: a pointer to a char array
12493 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012494 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012495 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012496 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012497 *
12498 * parse an XML in-memory document and build a tree.
12499 *
12500 * Returns the resulting document tree
12501 */
12502xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012503xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012504{
12505 xmlParserCtxtPtr ctxt;
12506
12507 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12508 if (ctxt == NULL)
12509 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012510 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012511}
12512
12513/**
12514 * xmlReadFd:
12515 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012519 *
12520 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012521 * NOTE that the file descriptor will not be closed when the
12522 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012523 *
12524 * Returns the resulting document tree
12525 */
12526xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012527xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528{
12529 xmlParserCtxtPtr ctxt;
12530 xmlParserInputBufferPtr input;
12531 xmlParserInputPtr stream;
12532
12533 if (fd < 0)
12534 return (NULL);
12535
12536 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12537 if (input == NULL)
12538 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012539 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012540 ctxt = xmlNewParserCtxt();
12541 if (ctxt == NULL) {
12542 xmlFreeParserInputBuffer(input);
12543 return (NULL);
12544 }
12545 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12546 if (stream == NULL) {
12547 xmlFreeParserInputBuffer(input);
12548 xmlFreeParserCtxt(ctxt);
12549 return (NULL);
12550 }
12551 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012552 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012553}
12554
12555/**
12556 * xmlReadIO:
12557 * @ioread: an I/O read function
12558 * @ioclose: an I/O close function
12559 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012560 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012562 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012563 *
12564 * parse an XML document from I/O functions and source and build a tree.
12565 *
12566 * Returns the resulting document tree
12567 */
12568xmlDocPtr
12569xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012570 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012571{
12572 xmlParserCtxtPtr ctxt;
12573 xmlParserInputBufferPtr input;
12574 xmlParserInputPtr stream;
12575
12576 if (ioread == NULL)
12577 return (NULL);
12578
12579 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12580 XML_CHAR_ENCODING_NONE);
12581 if (input == NULL)
12582 return (NULL);
12583 ctxt = xmlNewParserCtxt();
12584 if (ctxt == NULL) {
12585 xmlFreeParserInputBuffer(input);
12586 return (NULL);
12587 }
12588 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12589 if (stream == NULL) {
12590 xmlFreeParserInputBuffer(input);
12591 xmlFreeParserCtxt(ctxt);
12592 return (NULL);
12593 }
12594 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012595 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012596}
12597
12598/**
12599 * xmlCtxtReadDoc:
12600 * @ctxt: an XML parser context
12601 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012602 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012603 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012604 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605 *
12606 * parse an XML in-memory document and build a tree.
12607 * This reuses the existing @ctxt parser context
12608 *
12609 * Returns the resulting document tree
12610 */
12611xmlDocPtr
12612xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012613 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012614{
12615 xmlParserInputPtr stream;
12616
12617 if (cur == NULL)
12618 return (NULL);
12619 if (ctxt == NULL)
12620 return (NULL);
12621
12622 xmlCtxtReset(ctxt);
12623
12624 stream = xmlNewStringInputStream(ctxt, cur);
12625 if (stream == NULL) {
12626 return (NULL);
12627 }
12628 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012629 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012630}
12631
12632/**
12633 * xmlCtxtReadFile:
12634 * @ctxt: an XML parser context
12635 * @filename: a file or URL
12636 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012637 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012638 *
12639 * parse an XML file from the filesystem or the network.
12640 * This reuses the existing @ctxt parser context
12641 *
12642 * Returns the resulting document tree
12643 */
12644xmlDocPtr
12645xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12646 const char *encoding, int options)
12647{
12648 xmlParserInputPtr stream;
12649
12650 if (filename == NULL)
12651 return (NULL);
12652 if (ctxt == NULL)
12653 return (NULL);
12654
12655 xmlCtxtReset(ctxt);
12656
12657 stream = xmlNewInputFromFile(ctxt, filename);
12658 if (stream == NULL) {
12659 return (NULL);
12660 }
12661 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012662 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012663}
12664
12665/**
12666 * xmlCtxtReadMemory:
12667 * @ctxt: an XML parser context
12668 * @buffer: a pointer to a char array
12669 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012670 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012671 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012672 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012673 *
12674 * parse an XML in-memory document and build a tree.
12675 * This reuses the existing @ctxt parser context
12676 *
12677 * Returns the resulting document tree
12678 */
12679xmlDocPtr
12680xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012681 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012682{
12683 xmlParserInputBufferPtr input;
12684 xmlParserInputPtr stream;
12685
12686 if (ctxt == NULL)
12687 return (NULL);
12688 if (buffer == NULL)
12689 return (NULL);
12690
12691 xmlCtxtReset(ctxt);
12692
12693 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12694 if (input == NULL) {
12695 return(NULL);
12696 }
12697
12698 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12699 if (stream == NULL) {
12700 xmlFreeParserInputBuffer(input);
12701 return(NULL);
12702 }
12703
12704 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012705 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012706}
12707
12708/**
12709 * xmlCtxtReadFd:
12710 * @ctxt: an XML parser context
12711 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012712 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012713 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012714 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012715 *
12716 * parse an XML from a file descriptor and build a tree.
12717 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012718 * NOTE that the file descriptor will not be closed when the
12719 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012720 *
12721 * Returns the resulting document tree
12722 */
12723xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012724xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12725 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012726{
12727 xmlParserInputBufferPtr input;
12728 xmlParserInputPtr stream;
12729
12730 if (fd < 0)
12731 return (NULL);
12732 if (ctxt == NULL)
12733 return (NULL);
12734
12735 xmlCtxtReset(ctxt);
12736
12737
12738 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12739 if (input == NULL)
12740 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012741 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012742 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12743 if (stream == NULL) {
12744 xmlFreeParserInputBuffer(input);
12745 return (NULL);
12746 }
12747 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012748 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012749}
12750
12751/**
12752 * xmlCtxtReadIO:
12753 * @ctxt: an XML parser context
12754 * @ioread: an I/O read function
12755 * @ioclose: an I/O close function
12756 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012757 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012758 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012759 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012760 *
12761 * parse an XML document from I/O functions and source and build a tree.
12762 * This reuses the existing @ctxt parser context
12763 *
12764 * Returns the resulting document tree
12765 */
12766xmlDocPtr
12767xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12768 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012769 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012770 const char *encoding, int options)
12771{
12772 xmlParserInputBufferPtr input;
12773 xmlParserInputPtr stream;
12774
12775 if (ioread == NULL)
12776 return (NULL);
12777 if (ctxt == NULL)
12778 return (NULL);
12779
12780 xmlCtxtReset(ctxt);
12781
12782 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12783 XML_CHAR_ENCODING_NONE);
12784 if (input == NULL)
12785 return (NULL);
12786 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12787 if (stream == NULL) {
12788 xmlFreeParserInputBuffer(input);
12789 return (NULL);
12790 }
12791 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012792 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012793}