blob: d038e4684010a7b58181cf4b9e18a8eb8b244b31 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000857 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillard0b787f32004-03-26 17:29:53 +00001263#define SKIPL(val) do { \
1264 int skipl; \
1265 for(skipl=0; skipl<val; skipl++) { \
1266 if (*(ctxt->input->cur) == '\n') { \
1267 ctxt->input->line++; ctxt->input->col = 1; \
1268 } else ctxt->input->col++; \
1269 ctxt->nbChars++; \
1270 ctxt->input->cur++; \
1271 } \
1272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1273 if ((*ctxt->input->cur == 0) && \
1274 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1275 xmlPopInput(ctxt); \
1276 } while (0)
1277
Daniel Veillarda880b122003-04-21 21:36:41 +00001278#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001279 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1280 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001281 xmlSHRINK (ctxt);
1282
1283static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1284 xmlParserInputShrink(ctxt->input);
1285 if ((*ctxt->input->cur == 0) &&
1286 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1287 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001288 }
Owen Taylor3473f882001-02-23 17:55:21 +00001289
Daniel Veillarda880b122003-04-21 21:36:41 +00001290#define GROW if ((ctxt->progressive == 0) && \
1291 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001292 xmlGROW (ctxt);
1293
1294static void xmlGROW (xmlParserCtxtPtr ctxt) {
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((*ctxt->input->cur == 0) &&
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1298 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001299}
Owen Taylor3473f882001-02-23 17:55:21 +00001300
1301#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1302
1303#define NEXT xmlNextChar(ctxt)
1304
Daniel Veillard21a0f912001-02-25 19:54:14 +00001305#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001306 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001307 ctxt->input->cur++; \
1308 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001309 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1311 }
1312
Owen Taylor3473f882001-02-23 17:55:21 +00001313#define NEXTL(l) do { \
1314 if (*(ctxt->input->cur) == '\n') { \
1315 ctxt->input->line++; ctxt->input->col = 1; \
1316 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001317 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001318 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001319 } while (0)
1320
1321#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1322#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1323
1324#define COPY_BUF(l,b,i,v) \
1325 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001326 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001327
1328/**
1329 * xmlSkipBlankChars:
1330 * @ctxt: the XML parser context
1331 *
1332 * skip all blanks character found at that point in the input streams.
1333 * It pops up finished entities in the process if allowable at that point.
1334 *
1335 * Returns the number of space chars skipped
1336 */
1337
1338int
1339xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001340 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342 /*
1343 * It's Okay to use CUR/NEXT here since all the blanks are on
1344 * the ASCII range.
1345 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001346 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1347 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001348 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001349 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001351 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001352 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001353 if (*cur == '\n') {
1354 ctxt->input->line++; ctxt->input->col = 1;
1355 }
1356 cur++;
1357 res++;
1358 if (*cur == 0) {
1359 ctxt->input->cur = cur;
1360 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1361 cur = ctxt->input->cur;
1362 }
1363 }
1364 ctxt->input->cur = cur;
1365 } else {
1366 int cur;
1367 do {
1368 cur = CUR;
1369 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1370 NEXT;
1371 cur = CUR;
1372 res++;
1373 }
1374 while ((cur == 0) && (ctxt->inputNr > 1) &&
1375 (ctxt->instate != XML_PARSER_COMMENT)) {
1376 xmlPopInput(ctxt);
1377 cur = CUR;
1378 }
1379 /*
1380 * Need to handle support of entities branching here
1381 */
1382 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1383 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1384 }
Owen Taylor3473f882001-02-23 17:55:21 +00001385 return(res);
1386}
1387
1388/************************************************************************
1389 * *
1390 * Commodity functions to handle entities *
1391 * *
1392 ************************************************************************/
1393
1394/**
1395 * xmlPopInput:
1396 * @ctxt: an XML parser context
1397 *
1398 * xmlPopInput: the current input pointed by ctxt->input came to an end
1399 * pop it and return the next char.
1400 *
1401 * Returns the current xmlChar in the parser context
1402 */
1403xmlChar
1404xmlPopInput(xmlParserCtxtPtr ctxt) {
1405 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1406 if (xmlParserDebugEntities)
1407 xmlGenericError(xmlGenericErrorContext,
1408 "Popping input %d\n", ctxt->inputNr);
1409 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001410 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001411 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1412 return(xmlPopInput(ctxt));
1413 return(CUR);
1414}
1415
1416/**
1417 * xmlPushInput:
1418 * @ctxt: an XML parser context
1419 * @input: an XML parser input fragment (entity, XML fragment ...).
1420 *
1421 * xmlPushInput: switch to a new input stream which is stacked on top
1422 * of the previous one(s).
1423 */
1424void
1425xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1426 if (input == NULL) return;
1427
1428 if (xmlParserDebugEntities) {
1429 if ((ctxt->input != NULL) && (ctxt->input->filename))
1430 xmlGenericError(xmlGenericErrorContext,
1431 "%s(%d): ", ctxt->input->filename,
1432 ctxt->input->line);
1433 xmlGenericError(xmlGenericErrorContext,
1434 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1435 }
1436 inputPush(ctxt, input);
1437 GROW;
1438}
1439
1440/**
1441 * xmlParseCharRef:
1442 * @ctxt: an XML parser context
1443 *
1444 * parse Reference declarations
1445 *
1446 * [66] CharRef ::= '&#' [0-9]+ ';' |
1447 * '&#x' [0-9a-fA-F]+ ';'
1448 *
1449 * [ WFC: Legal Character ]
1450 * Characters referred to using character references must match the
1451 * production for Char.
1452 *
1453 * Returns the value parsed (as an int), 0 in case of error
1454 */
1455int
1456xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001457 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001458 int count = 0;
1459
Owen Taylor3473f882001-02-23 17:55:21 +00001460 /*
1461 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1462 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001463 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001464 (NXT(2) == 'x')) {
1465 SKIP(3);
1466 GROW;
1467 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001468 if (count++ > 20) {
1469 count = 0;
1470 GROW;
1471 }
1472 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001473 val = val * 16 + (CUR - '0');
1474 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1475 val = val * 16 + (CUR - 'a') + 10;
1476 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1477 val = val * 16 + (CUR - 'A') + 10;
1478 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001479 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001480 val = 0;
1481 break;
1482 }
1483 NEXT;
1484 count++;
1485 }
1486 if (RAW == ';') {
1487 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001488 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001489 ctxt->nbChars ++;
1490 ctxt->input->cur++;
1491 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001492 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001493 SKIP(2);
1494 GROW;
1495 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001496 if (count++ > 20) {
1497 count = 0;
1498 GROW;
1499 }
1500 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001501 val = val * 10 + (CUR - '0');
1502 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 val = 0;
1505 break;
1506 }
1507 NEXT;
1508 count++;
1509 }
1510 if (RAW == ';') {
1511 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001512 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001513 ctxt->nbChars ++;
1514 ctxt->input->cur++;
1515 }
1516 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001517 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 }
1519
1520 /*
1521 * [ WFC: Legal Character ]
1522 * Characters referred to using character references must match the
1523 * production for Char.
1524 */
William M. Brack871611b2003-10-18 04:53:14 +00001525 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001526 return(val);
1527 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001528 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1529 "xmlParseCharRef: invalid xmlChar value %d\n",
1530 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 }
1532 return(0);
1533}
1534
1535/**
1536 * xmlParseStringCharRef:
1537 * @ctxt: an XML parser context
1538 * @str: a pointer to an index in the string
1539 *
1540 * parse Reference declarations, variant parsing from a string rather
1541 * than an an input flow.
1542 *
1543 * [66] CharRef ::= '&#' [0-9]+ ';' |
1544 * '&#x' [0-9a-fA-F]+ ';'
1545 *
1546 * [ WFC: Legal Character ]
1547 * Characters referred to using character references must match the
1548 * production for Char.
1549 *
1550 * Returns the value parsed (as an int), 0 in case of error, str will be
1551 * updated to the current value of the index
1552 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001553static int
Owen Taylor3473f882001-02-23 17:55:21 +00001554xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1555 const xmlChar *ptr;
1556 xmlChar cur;
1557 int val = 0;
1558
1559 if ((str == NULL) || (*str == NULL)) return(0);
1560 ptr = *str;
1561 cur = *ptr;
1562 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1563 ptr += 3;
1564 cur = *ptr;
1565 while (cur != ';') { /* Non input consuming loop */
1566 if ((cur >= '0') && (cur <= '9'))
1567 val = val * 16 + (cur - '0');
1568 else if ((cur >= 'a') && (cur <= 'f'))
1569 val = val * 16 + (cur - 'a') + 10;
1570 else if ((cur >= 'A') && (cur <= 'F'))
1571 val = val * 16 + (cur - 'A') + 10;
1572 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001573 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001574 val = 0;
1575 break;
1576 }
1577 ptr++;
1578 cur = *ptr;
1579 }
1580 if (cur == ';')
1581 ptr++;
1582 } else if ((cur == '&') && (ptr[1] == '#')){
1583 ptr += 2;
1584 cur = *ptr;
1585 while (cur != ';') { /* Non input consuming loops */
1586 if ((cur >= '0') && (cur <= '9'))
1587 val = val * 10 + (cur - '0');
1588 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001589 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001590 val = 0;
1591 break;
1592 }
1593 ptr++;
1594 cur = *ptr;
1595 }
1596 if (cur == ';')
1597 ptr++;
1598 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001599 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 return(0);
1601 }
1602 *str = ptr;
1603
1604 /*
1605 * [ WFC: Legal Character ]
1606 * Characters referred to using character references must match the
1607 * production for Char.
1608 */
William M. Brack871611b2003-10-18 04:53:14 +00001609 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001610 return(val);
1611 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001612 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1613 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1614 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001615 }
1616 return(0);
1617}
1618
1619/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001620 * xmlNewBlanksWrapperInputStream:
1621 * @ctxt: an XML parser context
1622 * @entity: an Entity pointer
1623 *
1624 * Create a new input stream for wrapping
1625 * blanks around a PEReference
1626 *
1627 * Returns the new input stream or NULL
1628 */
1629
1630static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1631
Daniel Veillardf4862f02002-09-10 11:13:43 +00001632static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001633xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1634 xmlParserInputPtr input;
1635 xmlChar *buffer;
1636 size_t length;
1637 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001638 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1639 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001640 return(NULL);
1641 }
1642 if (xmlParserDebugEntities)
1643 xmlGenericError(xmlGenericErrorContext,
1644 "new blanks wrapper for entity: %s\n", entity->name);
1645 input = xmlNewInputStream(ctxt);
1646 if (input == NULL) {
1647 return(NULL);
1648 }
1649 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001650 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001651 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001652 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001653 return(NULL);
1654 }
1655 buffer [0] = ' ';
1656 buffer [1] = '%';
1657 buffer [length-3] = ';';
1658 buffer [length-2] = ' ';
1659 buffer [length-1] = 0;
1660 memcpy(buffer + 2, entity->name, length - 5);
1661 input->free = deallocblankswrapper;
1662 input->base = buffer;
1663 input->cur = buffer;
1664 input->length = length;
1665 input->end = &buffer[length];
1666 return(input);
1667}
1668
1669/**
Owen Taylor3473f882001-02-23 17:55:21 +00001670 * xmlParserHandlePEReference:
1671 * @ctxt: the parser context
1672 *
1673 * [69] PEReference ::= '%' Name ';'
1674 *
1675 * [ WFC: No Recursion ]
1676 * A parsed entity must not contain a recursive
1677 * reference to itself, either directly or indirectly.
1678 *
1679 * [ WFC: Entity Declared ]
1680 * In a document without any DTD, a document with only an internal DTD
1681 * subset which contains no parameter entity references, or a document
1682 * with "standalone='yes'", ... ... The declaration of a parameter
1683 * entity must precede any reference to it...
1684 *
1685 * [ VC: Entity Declared ]
1686 * In a document with an external subset or external parameter entities
1687 * with "standalone='no'", ... ... The declaration of a parameter entity
1688 * must precede any reference to it...
1689 *
1690 * [ WFC: In DTD ]
1691 * Parameter-entity references may only appear in the DTD.
1692 * NOTE: misleading but this is handled.
1693 *
1694 * A PEReference may have been detected in the current input stream
1695 * the handling is done accordingly to
1696 * http://www.w3.org/TR/REC-xml#entproc
1697 * i.e.
1698 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001699 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001700 */
1701void
1702xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001703 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 xmlEntityPtr entity = NULL;
1705 xmlParserInputPtr input;
1706
Owen Taylor3473f882001-02-23 17:55:21 +00001707 if (RAW != '%') return;
1708 switch(ctxt->instate) {
1709 case XML_PARSER_CDATA_SECTION:
1710 return;
1711 case XML_PARSER_COMMENT:
1712 return;
1713 case XML_PARSER_START_TAG:
1714 return;
1715 case XML_PARSER_END_TAG:
1716 return;
1717 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001718 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001719 return;
1720 case XML_PARSER_PROLOG:
1721 case XML_PARSER_START:
1722 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001723 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001724 return;
1725 case XML_PARSER_ENTITY_DECL:
1726 case XML_PARSER_CONTENT:
1727 case XML_PARSER_ATTRIBUTE_VALUE:
1728 case XML_PARSER_PI:
1729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001731 /* we just ignore it there */
1732 return;
1733 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001734 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001735 return;
1736 case XML_PARSER_ENTITY_VALUE:
1737 /*
1738 * NOTE: in the case of entity values, we don't do the
1739 * substitution here since we need the literal
1740 * entity value to be able to save the internal
1741 * subset of the document.
1742 * This will be handled by xmlStringDecodeEntities
1743 */
1744 return;
1745 case XML_PARSER_DTD:
1746 /*
1747 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1748 * In the internal DTD subset, parameter-entity references
1749 * can occur only where markup declarations can occur, not
1750 * within markup declarations.
1751 * In that case this is handled in xmlParseMarkupDecl
1752 */
1753 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1754 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001755 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001756 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 break;
1758 case XML_PARSER_IGNORE:
1759 return;
1760 }
1761
1762 NEXT;
1763 name = xmlParseName(ctxt);
1764 if (xmlParserDebugEntities)
1765 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001766 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001767 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001768 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001769 } else {
1770 if (RAW == ';') {
1771 NEXT;
1772 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1773 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1774 if (entity == NULL) {
1775
1776 /*
1777 * [ WFC: Entity Declared ]
1778 * In a document without any DTD, a document with only an
1779 * internal DTD subset which contains no parameter entity
1780 * references, or a document with "standalone='yes'", ...
1781 * ... The declaration of a parameter entity must precede
1782 * any reference to it...
1783 */
1784 if ((ctxt->standalone == 1) ||
1785 ((ctxt->hasExternalSubset == 0) &&
1786 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001787 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001788 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001789 } else {
1790 /*
1791 * [ VC: Entity Declared ]
1792 * In a document with an external subset or external
1793 * parameter entities with "standalone='no'", ...
1794 * ... The declaration of a parameter entity must precede
1795 * any reference to it...
1796 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001797 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1798 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1799 "PEReference: %%%s; not found\n",
1800 name);
1801 } else
1802 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1803 "PEReference: %%%s; not found\n",
1804 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 ctxt->valid = 0;
1806 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001807 } else if (ctxt->input->free != deallocblankswrapper) {
1808 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1809 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 } else {
1811 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1812 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001813 xmlChar start[4];
1814 xmlCharEncoding enc;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 /*
1817 * handle the extra spaces added before and after
1818 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001819 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001820 */
1821 input = xmlNewEntityInputStream(ctxt, entity);
1822 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001823
1824 /*
1825 * Get the 4 first bytes and decode the charset
1826 * if enc != XML_CHAR_ENCODING_NONE
1827 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001828 * Note that, since we may have some non-UTF8
1829 * encoding (like UTF16, bug 135229), the 'length'
1830 * is not known, but we can calculate based upon
1831 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001832 */
1833 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001834 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001835 start[0] = RAW;
1836 start[1] = NXT(1);
1837 start[2] = NXT(2);
1838 start[3] = NXT(3);
1839 enc = xmlDetectCharEncoding(start, 4);
1840 if (enc != XML_CHAR_ENCODING_NONE) {
1841 xmlSwitchEncoding(ctxt, enc);
1842 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001843 }
1844
Owen Taylor3473f882001-02-23 17:55:21 +00001845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001846 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1847 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001848 xmlParseTextDecl(ctxt);
1849 }
Owen Taylor3473f882001-02-23 17:55:21 +00001850 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001851 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1852 "PEReference: %s is not a parameter entity\n",
1853 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001854 }
1855 }
1856 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001857 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001858 }
Owen Taylor3473f882001-02-23 17:55:21 +00001859 }
1860}
1861
1862/*
1863 * Macro used to grow the current buffer.
1864 */
1865#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001866 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001867 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001868 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001869 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001870 if (tmp == NULL) goto mem_error; \
1871 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001872}
1873
1874/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001875 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001876 * @ctxt: the parser context
1877 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001878 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001879 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1880 * @end: an end marker xmlChar, 0 if none
1881 * @end2: an end marker xmlChar, 0 if none
1882 * @end3: an end marker xmlChar, 0 if none
1883 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001884 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001885 *
1886 * [67] Reference ::= EntityRef | CharRef
1887 *
1888 * [69] PEReference ::= '%' Name ';'
1889 *
1890 * Returns A newly allocated string with the substitution done. The caller
1891 * must deallocate it !
1892 */
1893xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001894xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1895 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001896 xmlChar *buffer = NULL;
1897 int buffer_size = 0;
1898
1899 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001900 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001901 xmlEntityPtr ent;
1902 int c,l;
1903 int nbchars = 0;
1904
Daniel Veillarde57ec792003-09-10 10:50:59 +00001905 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001906 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001907 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001908
1909 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001910 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001911 return(NULL);
1912 }
1913
1914 /*
1915 * allocate a translation buffer.
1916 */
1917 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001918 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001919 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001920
1921 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001922 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001923 * we are operating on already parsed values.
1924 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001925 if (str < last)
1926 c = CUR_SCHAR(str, l);
1927 else
1928 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001929 while ((c != 0) && (c != end) && /* non input consuming loop */
1930 (c != end2) && (c != end3)) {
1931
1932 if (c == 0) break;
1933 if ((c == '&') && (str[1] == '#')) {
1934 int val = xmlParseStringCharRef(ctxt, &str);
1935 if (val != 0) {
1936 COPY_BUF(0,buffer,nbchars,val);
1937 }
1938 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1939 if (xmlParserDebugEntities)
1940 xmlGenericError(xmlGenericErrorContext,
1941 "String decoding Entity Reference: %.30s\n",
1942 str);
1943 ent = xmlParseStringEntityRef(ctxt, &str);
1944 if ((ent != NULL) &&
1945 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1946 if (ent->content != NULL) {
1947 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1948 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001949 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1950 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001951 }
1952 } else if ((ent != NULL) && (ent->content != NULL)) {
1953 xmlChar *rep;
1954
1955 ctxt->depth++;
1956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1957 0, 0, 0);
1958 ctxt->depth--;
1959 if (rep != NULL) {
1960 current = rep;
1961 while (*current != 0) { /* non input consuming loop */
1962 buffer[nbchars++] = *current++;
1963 if (nbchars >
1964 buffer_size - XML_PARSER_BUFFER_SIZE) {
1965 growBuffer(buffer);
1966 }
1967 }
1968 xmlFree(rep);
1969 }
1970 } else if (ent != NULL) {
1971 int i = xmlStrlen(ent->name);
1972 const xmlChar *cur = ent->name;
1973
1974 buffer[nbchars++] = '&';
1975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1976 growBuffer(buffer);
1977 }
1978 for (;i > 0;i--)
1979 buffer[nbchars++] = *cur++;
1980 buffer[nbchars++] = ';';
1981 }
1982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1983 if (xmlParserDebugEntities)
1984 xmlGenericError(xmlGenericErrorContext,
1985 "String decoding PE Reference: %.30s\n", str);
1986 ent = xmlParseStringPEReference(ctxt, &str);
1987 if (ent != NULL) {
1988 xmlChar *rep;
1989
1990 ctxt->depth++;
1991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1992 0, 0, 0);
1993 ctxt->depth--;
1994 if (rep != NULL) {
1995 current = rep;
1996 while (*current != 0) { /* non input consuming loop */
1997 buffer[nbchars++] = *current++;
1998 if (nbchars >
1999 buffer_size - XML_PARSER_BUFFER_SIZE) {
2000 growBuffer(buffer);
2001 }
2002 }
2003 xmlFree(rep);
2004 }
2005 }
2006 } else {
2007 COPY_BUF(l,buffer,nbchars,c);
2008 str += l;
2009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2010 growBuffer(buffer);
2011 }
2012 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002013 if (str < last)
2014 c = CUR_SCHAR(str, l);
2015 else
2016 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 }
2018 buffer[nbchars++] = 0;
2019 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002020
2021mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002022 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002023 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002024}
2025
Daniel Veillarde57ec792003-09-10 10:50:59 +00002026/**
2027 * xmlStringDecodeEntities:
2028 * @ctxt: the parser context
2029 * @str: the input string
2030 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2031 * @end: an end marker xmlChar, 0 if none
2032 * @end2: an end marker xmlChar, 0 if none
2033 * @end3: an end marker xmlChar, 0 if none
2034 *
2035 * Takes a entity string content and process to do the adequate substitutions.
2036 *
2037 * [67] Reference ::= EntityRef | CharRef
2038 *
2039 * [69] PEReference ::= '%' Name ';'
2040 *
2041 * Returns A newly allocated string with the substitution done. The caller
2042 * must deallocate it !
2043 */
2044xmlChar *
2045xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2046 xmlChar end, xmlChar end2, xmlChar end3) {
2047 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2048 end, end2, end3));
2049}
Owen Taylor3473f882001-02-23 17:55:21 +00002050
2051/************************************************************************
2052 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002053 * Commodity functions, cleanup needed ? *
2054 * *
2055 ************************************************************************/
2056
2057/**
2058 * areBlanks:
2059 * @ctxt: an XML parser context
2060 * @str: a xmlChar *
2061 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002062 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002063 *
2064 * Is this a sequence of blank chars that one can ignore ?
2065 *
2066 * Returns 1 if ignorable 0 otherwise.
2067 */
2068
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002069static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2070 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002071 int i, ret;
2072 xmlNodePtr lastChild;
2073
Daniel Veillard05c13a22001-09-09 08:38:09 +00002074 /*
2075 * Don't spend time trying to differentiate them, the same callback is
2076 * used !
2077 */
2078 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002079 return(0);
2080
Owen Taylor3473f882001-02-23 17:55:21 +00002081 /*
2082 * Check for xml:space value.
2083 */
2084 if (*(ctxt->space) == 1)
2085 return(0);
2086
2087 /*
2088 * Check that the string is made of blanks
2089 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002090 if (blank_chars == 0) {
2091 for (i = 0;i < len;i++)
2092 if (!(IS_BLANK_CH(str[i]))) return(0);
2093 }
Owen Taylor3473f882001-02-23 17:55:21 +00002094
2095 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002096 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002097 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002098 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 if (ctxt->myDoc != NULL) {
2100 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2101 if (ret == 0) return(1);
2102 if (ret == 1) return(0);
2103 }
2104
2105 /*
2106 * Otherwise, heuristic :-\
2107 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002108 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002109 if ((ctxt->node->children == NULL) &&
2110 (RAW == '<') && (NXT(1) == '/')) return(0);
2111
2112 lastChild = xmlGetLastChild(ctxt->node);
2113 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002114 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2115 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002116 } else if (xmlNodeIsText(lastChild))
2117 return(0);
2118 else if ((ctxt->node->children != NULL) &&
2119 (xmlNodeIsText(ctxt->node->children)))
2120 return(0);
2121 return(1);
2122}
2123
Owen Taylor3473f882001-02-23 17:55:21 +00002124/************************************************************************
2125 * *
2126 * Extra stuff for namespace support *
2127 * Relates to http://www.w3.org/TR/WD-xml-names *
2128 * *
2129 ************************************************************************/
2130
2131/**
2132 * xmlSplitQName:
2133 * @ctxt: an XML parser context
2134 * @name: an XML parser context
2135 * @prefix: a xmlChar **
2136 *
2137 * parse an UTF8 encoded XML qualified name string
2138 *
2139 * [NS 5] QName ::= (Prefix ':')? LocalPart
2140 *
2141 * [NS 6] Prefix ::= NCName
2142 *
2143 * [NS 7] LocalPart ::= NCName
2144 *
2145 * Returns the local part, and prefix is updated
2146 * to get the Prefix if any.
2147 */
2148
2149xmlChar *
2150xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2151 xmlChar buf[XML_MAX_NAMELEN + 5];
2152 xmlChar *buffer = NULL;
2153 int len = 0;
2154 int max = XML_MAX_NAMELEN;
2155 xmlChar *ret = NULL;
2156 const xmlChar *cur = name;
2157 int c;
2158
2159 *prefix = NULL;
2160
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002161 if (cur == NULL) return(NULL);
2162
Owen Taylor3473f882001-02-23 17:55:21 +00002163#ifndef XML_XML_NAMESPACE
2164 /* xml: prefix is not really a namespace */
2165 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2166 (cur[2] == 'l') && (cur[3] == ':'))
2167 return(xmlStrdup(name));
2168#endif
2169
Daniel Veillard597bc482003-07-24 16:08:28 +00002170 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002171 if (cur[0] == ':')
2172 return(xmlStrdup(name));
2173
2174 c = *cur++;
2175 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2176 buf[len++] = c;
2177 c = *cur++;
2178 }
2179 if (len >= max) {
2180 /*
2181 * Okay someone managed to make a huge name, so he's ready to pay
2182 * for the processing speed.
2183 */
2184 max = len * 2;
2185
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002186 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002187 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002188 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002189 return(NULL);
2190 }
2191 memcpy(buffer, buf, len);
2192 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2193 if (len + 10 > max) {
2194 max *= 2;
2195 buffer = (xmlChar *) xmlRealloc(buffer,
2196 max * sizeof(xmlChar));
2197 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002198 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002199 return(NULL);
2200 }
2201 }
2202 buffer[len++] = c;
2203 c = *cur++;
2204 }
2205 buffer[len] = 0;
2206 }
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed
2209 if ((c == ':') && (*cur == 0)) {
2210 return(xmlStrdup(name));
2211 } */
2212
Owen Taylor3473f882001-02-23 17:55:21 +00002213 if (buffer == NULL)
2214 ret = xmlStrndup(buf, len);
2215 else {
2216 ret = buffer;
2217 buffer = NULL;
2218 max = XML_MAX_NAMELEN;
2219 }
2220
2221
2222 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002223 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002224 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002225 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002226 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 len = 0;
2229
Daniel Veillardbb284f42002-10-16 18:02:47 +00002230 /*
2231 * Check that the first character is proper to start
2232 * a new name
2233 */
2234 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2235 ((c >= 0x41) && (c <= 0x5A)) ||
2236 (c == '_') || (c == ':'))) {
2237 int l;
2238 int first = CUR_SCHAR(cur, l);
2239
2240 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002241 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002242 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002243 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002244 }
2245 }
2246 cur++;
2247
Owen Taylor3473f882001-02-23 17:55:21 +00002248 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2249 buf[len++] = c;
2250 c = *cur++;
2251 }
2252 if (len >= max) {
2253 /*
2254 * Okay someone managed to make a huge name, so he's ready to pay
2255 * for the processing speed.
2256 */
2257 max = len * 2;
2258
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002259 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002260 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002261 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002262 return(NULL);
2263 }
2264 memcpy(buffer, buf, len);
2265 while (c != 0) { /* tested bigname2.xml */
2266 if (len + 10 > max) {
2267 max *= 2;
2268 buffer = (xmlChar *) xmlRealloc(buffer,
2269 max * sizeof(xmlChar));
2270 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002271 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272 return(NULL);
2273 }
2274 }
2275 buffer[len++] = c;
2276 c = *cur++;
2277 }
2278 buffer[len] = 0;
2279 }
2280
2281 if (buffer == NULL)
2282 ret = xmlStrndup(buf, len);
2283 else {
2284 ret = buffer;
2285 }
2286 }
2287
2288 return(ret);
2289}
2290
2291/************************************************************************
2292 * *
2293 * The parser itself *
2294 * Relates to http://www.w3.org/TR/REC-xml *
2295 * *
2296 ************************************************************************/
2297
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002298static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002299static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002301
Owen Taylor3473f882001-02-23 17:55:21 +00002302/**
2303 * xmlParseName:
2304 * @ctxt: an XML parser context
2305 *
2306 * parse an XML name.
2307 *
2308 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2309 * CombiningChar | Extender
2310 *
2311 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2312 *
2313 * [6] Names ::= Name (S Name)*
2314 *
2315 * Returns the Name parsed or NULL
2316 */
2317
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002318const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002319xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002320 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002321 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002322 int count = 0;
2323
2324 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002325
2326 /*
2327 * Accelerator for simple ASCII names
2328 */
2329 in = ctxt->input->cur;
2330 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2331 ((*in >= 0x41) && (*in <= 0x5A)) ||
2332 (*in == '_') || (*in == ':')) {
2333 in++;
2334 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2335 ((*in >= 0x41) && (*in <= 0x5A)) ||
2336 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002337 (*in == '_') || (*in == '-') ||
2338 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002339 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002340 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002341 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002342 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002343 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002344 ctxt->nbChars += count;
2345 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002346 if (ret == NULL)
2347 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002348 return(ret);
2349 }
2350 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002351 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002352}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002353
Daniel Veillard46de64e2002-05-29 08:21:33 +00002354/**
2355 * xmlParseNameAndCompare:
2356 * @ctxt: an XML parser context
2357 *
2358 * parse an XML name and compares for match
2359 * (specialized for endtag parsing)
2360 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002361 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2362 * and the name for mismatch
2363 */
2364
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002366xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002367 register const xmlChar *cmp = other;
2368 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002369 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002370
2371 GROW;
2372
2373 in = ctxt->input->cur;
2374 while (*in != 0 && *in == *cmp) {
2375 ++in;
2376 ++cmp;
2377 }
William M. Brack76e95df2003-10-18 16:20:14 +00002378 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002379 /* success */
2380 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002381 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002382 }
2383 /* failure (or end of input buffer), check with full function */
2384 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002385 /* strings coming from the dictionnary direct compare possible */
2386 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002387 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002388 }
2389 return ret;
2390}
2391
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002392static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002393xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002394 int len = 0, l;
2395 int c;
2396 int count = 0;
2397
2398 /*
2399 * Handler for more complex cases
2400 */
2401 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002402 c = CUR_CHAR(l);
2403 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2404 (!IS_LETTER(c) && (c != '_') &&
2405 (c != ':'))) {
2406 return(NULL);
2407 }
2408
2409 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002410 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002411 (c == '.') || (c == '-') ||
2412 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002413 (IS_COMBINING(c)) ||
2414 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002415 if (count++ > 100) {
2416 count = 0;
2417 GROW;
2418 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002419 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002420 NEXTL(l);
2421 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002422 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002423 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002424}
2425
2426/**
2427 * xmlParseStringName:
2428 * @ctxt: an XML parser context
2429 * @str: a pointer to the string pointer (IN/OUT)
2430 *
2431 * parse an XML name.
2432 *
2433 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2434 * CombiningChar | Extender
2435 *
2436 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2437 *
2438 * [6] Names ::= Name (S Name)*
2439 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002440 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002441 * is updated to the current location in the string.
2442 */
2443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002444static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002445xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2446 xmlChar buf[XML_MAX_NAMELEN + 5];
2447 const xmlChar *cur = *str;
2448 int len = 0, l;
2449 int c;
2450
2451 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002452 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002453 (c != ':')) {
2454 return(NULL);
2455 }
2456
William M. Brack871611b2003-10-18 04:53:14 +00002457 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 COPY_BUF(l,buf,len,c);
2463 cur += l;
2464 c = CUR_SCHAR(cur, l);
2465 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2466 /*
2467 * Okay someone managed to make a huge name, so he's ready to pay
2468 * for the processing speed.
2469 */
2470 xmlChar *buffer;
2471 int max = len * 2;
2472
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002473 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002474 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002475 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002476 return(NULL);
2477 }
2478 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002479 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002480 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002481 (c == '.') || (c == '-') ||
2482 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002483 (IS_COMBINING(c)) ||
2484 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (len + 10 > max) {
2486 max *= 2;
2487 buffer = (xmlChar *) xmlRealloc(buffer,
2488 max * sizeof(xmlChar));
2489 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493 }
2494 COPY_BUF(l,buffer,len,c);
2495 cur += l;
2496 c = CUR_SCHAR(cur, l);
2497 }
2498 buffer[len] = 0;
2499 *str = cur;
2500 return(buffer);
2501 }
2502 }
2503 *str = cur;
2504 return(xmlStrndup(buf, len));
2505}
2506
2507/**
2508 * xmlParseNmtoken:
2509 * @ctxt: an XML parser context
2510 *
2511 * parse an XML Nmtoken.
2512 *
2513 * [7] Nmtoken ::= (NameChar)+
2514 *
2515 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2516 *
2517 * Returns the Nmtoken parsed or NULL
2518 */
2519
2520xmlChar *
2521xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2522 xmlChar buf[XML_MAX_NAMELEN + 5];
2523 int len = 0, l;
2524 int c;
2525 int count = 0;
2526
2527 GROW;
2528 c = CUR_CHAR(l);
2529
William M. Brack871611b2003-10-18 04:53:14 +00002530 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002531 (c == '.') || (c == '-') ||
2532 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002533 (IS_COMBINING(c)) ||
2534 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002535 if (count++ > 100) {
2536 count = 0;
2537 GROW;
2538 }
2539 COPY_BUF(l,buf,len,c);
2540 NEXTL(l);
2541 c = CUR_CHAR(l);
2542 if (len >= XML_MAX_NAMELEN) {
2543 /*
2544 * Okay someone managed to make a huge token, so he's ready to pay
2545 * for the processing speed.
2546 */
2547 xmlChar *buffer;
2548 int max = len * 2;
2549
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002550 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002552 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002553 return(NULL);
2554 }
2555 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002556 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002557 (c == '.') || (c == '-') ||
2558 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002559 (IS_COMBINING(c)) ||
2560 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002561 if (count++ > 100) {
2562 count = 0;
2563 GROW;
2564 }
2565 if (len + 10 > max) {
2566 max *= 2;
2567 buffer = (xmlChar *) xmlRealloc(buffer,
2568 max * sizeof(xmlChar));
2569 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002570 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002571 return(NULL);
2572 }
2573 }
2574 COPY_BUF(l,buffer,len,c);
2575 NEXTL(l);
2576 c = CUR_CHAR(l);
2577 }
2578 buffer[len] = 0;
2579 return(buffer);
2580 }
2581 }
2582 if (len == 0)
2583 return(NULL);
2584 return(xmlStrndup(buf, len));
2585}
2586
2587/**
2588 * xmlParseEntityValue:
2589 * @ctxt: an XML parser context
2590 * @orig: if non-NULL store a copy of the original entity value
2591 *
2592 * parse a value for ENTITY declarations
2593 *
2594 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2595 * "'" ([^%&'] | PEReference | Reference)* "'"
2596 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002597 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002598 */
2599
2600xmlChar *
2601xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2602 xmlChar *buf = NULL;
2603 int len = 0;
2604 int size = XML_PARSER_BUFFER_SIZE;
2605 int c, l;
2606 xmlChar stop;
2607 xmlChar *ret = NULL;
2608 const xmlChar *cur = NULL;
2609 xmlParserInputPtr input;
2610
2611 if (RAW == '"') stop = '"';
2612 else if (RAW == '\'') stop = '\'';
2613 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002614 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002615 return(NULL);
2616 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002617 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002618 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002619 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002620 return(NULL);
2621 }
2622
2623 /*
2624 * The content of the entity definition is copied in a buffer.
2625 */
2626
2627 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2628 input = ctxt->input;
2629 GROW;
2630 NEXT;
2631 c = CUR_CHAR(l);
2632 /*
2633 * NOTE: 4.4.5 Included in Literal
2634 * When a parameter entity reference appears in a literal entity
2635 * value, ... a single or double quote character in the replacement
2636 * text is always treated as a normal data character and will not
2637 * terminate the literal.
2638 * In practice it means we stop the loop only when back at parsing
2639 * the initial entity and the quote is found
2640 */
William M. Brack871611b2003-10-18 04:53:14 +00002641 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002642 (ctxt->input != input))) {
2643 if (len + 5 >= size) {
2644 size *= 2;
2645 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2646 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002647 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002648 return(NULL);
2649 }
2650 }
2651 COPY_BUF(l,buf,len,c);
2652 NEXTL(l);
2653 /*
2654 * Pop-up of finished entities.
2655 */
2656 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2657 xmlPopInput(ctxt);
2658
2659 GROW;
2660 c = CUR_CHAR(l);
2661 if (c == 0) {
2662 GROW;
2663 c = CUR_CHAR(l);
2664 }
2665 }
2666 buf[len] = 0;
2667
2668 /*
2669 * Raise problem w.r.t. '&' and '%' being used in non-entities
2670 * reference constructs. Note Charref will be handled in
2671 * xmlStringDecodeEntities()
2672 */
2673 cur = buf;
2674 while (*cur != 0) { /* non input consuming */
2675 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2676 xmlChar *name;
2677 xmlChar tmp = *cur;
2678
2679 cur++;
2680 name = xmlParseStringName(ctxt, &cur);
2681 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002682 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002683 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002684 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002685 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002686 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2687 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002688 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002689 }
2690 if (name != NULL)
2691 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002692 if (*cur == 0)
2693 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002694 }
2695 cur++;
2696 }
2697
2698 /*
2699 * Then PEReference entities are substituted.
2700 */
2701 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002702 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002703 xmlFree(buf);
2704 } else {
2705 NEXT;
2706 /*
2707 * NOTE: 4.4.7 Bypassed
2708 * When a general entity reference appears in the EntityValue in
2709 * an entity declaration, it is bypassed and left as is.
2710 * so XML_SUBSTITUTE_REF is not set here.
2711 */
2712 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2713 0, 0, 0);
2714 if (orig != NULL)
2715 *orig = buf;
2716 else
2717 xmlFree(buf);
2718 }
2719
2720 return(ret);
2721}
2722
2723/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002724 * xmlParseAttValueComplex:
2725 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002726 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002727 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002728 *
2729 * parse a value for an attribute, this is the fallback function
2730 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002731 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002732 *
2733 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2734 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002735static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002736xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002737 xmlChar limit = 0;
2738 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002739 int len = 0;
2740 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002741 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 xmlChar *current = NULL;
2743 xmlEntityPtr ent;
2744
Owen Taylor3473f882001-02-23 17:55:21 +00002745 if (NXT(0) == '"') {
2746 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2747 limit = '"';
2748 NEXT;
2749 } else if (NXT(0) == '\'') {
2750 limit = '\'';
2751 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2752 NEXT;
2753 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002754 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002755 return(NULL);
2756 }
2757
2758 /*
2759 * allocate a translation buffer.
2760 */
2761 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002762 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002764
2765 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002766 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002767 */
2768 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002769 while ((NXT(0) != limit) && /* checked */
2770 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002771 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002772 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002773 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 if (NXT(1) == '#') {
2775 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002776
Owen Taylor3473f882001-02-23 17:55:21 +00002777 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002778 if (ctxt->replaceEntities) {
2779 if (len > buf_size - 10) {
2780 growBuffer(buf);
2781 }
2782 buf[len++] = '&';
2783 } else {
2784 /*
2785 * The reparsing will be done in xmlStringGetNodeList()
2786 * called by the attribute() function in SAX.c
2787 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002788 if (len > buf_size - 10) {
2789 growBuffer(buf);
2790 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002791 buf[len++] = '&';
2792 buf[len++] = '#';
2793 buf[len++] = '3';
2794 buf[len++] = '8';
2795 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002796 }
2797 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002798 if (len > buf_size - 10) {
2799 growBuffer(buf);
2800 }
Owen Taylor3473f882001-02-23 17:55:21 +00002801 len += xmlCopyChar(0, &buf[len], val);
2802 }
2803 } else {
2804 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002805 if ((ent != NULL) &&
2806 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2807 if (len > buf_size - 10) {
2808 growBuffer(buf);
2809 }
2810 if ((ctxt->replaceEntities == 0) &&
2811 (ent->content[0] == '&')) {
2812 buf[len++] = '&';
2813 buf[len++] = '#';
2814 buf[len++] = '3';
2815 buf[len++] = '8';
2816 buf[len++] = ';';
2817 } else {
2818 buf[len++] = ent->content[0];
2819 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002820 } else if ((ent != NULL) &&
2821 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002822 xmlChar *rep;
2823
2824 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2825 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002826 XML_SUBSTITUTE_REF,
2827 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (rep != NULL) {
2829 current = rep;
2830 while (*current != 0) { /* non input consuming */
2831 buf[len++] = *current++;
2832 if (len > buf_size - 10) {
2833 growBuffer(buf);
2834 }
2835 }
2836 xmlFree(rep);
2837 }
2838 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002839 if (len > buf_size - 10) {
2840 growBuffer(buf);
2841 }
Owen Taylor3473f882001-02-23 17:55:21 +00002842 if (ent->content != NULL)
2843 buf[len++] = ent->content[0];
2844 }
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2848
2849 /*
2850 * This may look absurd but is needed to detect
2851 * entities problems
2852 */
2853 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2854 (ent->content != NULL)) {
2855 xmlChar *rep;
2856 rep = xmlStringDecodeEntities(ctxt, ent->content,
2857 XML_SUBSTITUTE_REF, 0, 0, 0);
2858 if (rep != NULL)
2859 xmlFree(rep);
2860 }
2861
2862 /*
2863 * Just output the reference
2864 */
2865 buf[len++] = '&';
2866 if (len > buf_size - i - 10) {
2867 growBuffer(buf);
2868 }
2869 for (;i > 0;i--)
2870 buf[len++] = *cur++;
2871 buf[len++] = ';';
2872 }
2873 }
2874 } else {
2875 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002876 if ((len != 0) || (!normalize)) {
2877 if ((!normalize) || (!in_space)) {
2878 COPY_BUF(l,buf,len,0x20);
2879 if (len > buf_size - 10) {
2880 growBuffer(buf);
2881 }
2882 }
2883 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002884 }
2885 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002886 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002887 COPY_BUF(l,buf,len,c);
2888 if (len > buf_size - 10) {
2889 growBuffer(buf);
2890 }
2891 }
2892 NEXTL(l);
2893 }
2894 GROW;
2895 c = CUR_CHAR(l);
2896 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002897 if ((in_space) && (normalize)) {
2898 while (buf[len - 1] == 0x20) len--;
2899 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002900 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002901 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002902 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002903 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002904 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2905 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002906 } else
2907 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002908 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002909 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002910
2911mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002912 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002913 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002914}
2915
2916/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002917 * xmlParseAttValue:
2918 * @ctxt: an XML parser context
2919 *
2920 * parse a value for an attribute
2921 * Note: the parser won't do substitution of entities here, this
2922 * will be handled later in xmlStringGetNodeList
2923 *
2924 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2925 * "'" ([^<&'] | Reference)* "'"
2926 *
2927 * 3.3.3 Attribute-Value Normalization:
2928 * Before the value of an attribute is passed to the application or
2929 * checked for validity, the XML processor must normalize it as follows:
2930 * - a character reference is processed by appending the referenced
2931 * character to the attribute value
2932 * - an entity reference is processed by recursively processing the
2933 * replacement text of the entity
2934 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2935 * appending #x20 to the normalized value, except that only a single
2936 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2937 * parsed entity or the literal entity value of an internal parsed entity
2938 * - other characters are processed by appending them to the normalized value
2939 * If the declared value is not CDATA, then the XML processor must further
2940 * process the normalized attribute value by discarding any leading and
2941 * trailing space (#x20) characters, and by replacing sequences of space
2942 * (#x20) characters by a single space (#x20) character.
2943 * All attributes for which no declaration has been read should be treated
2944 * by a non-validating parser as if declared CDATA.
2945 *
2946 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2947 */
2948
2949
2950xmlChar *
2951xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002952 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002953}
2954
2955/**
Owen Taylor3473f882001-02-23 17:55:21 +00002956 * xmlParseSystemLiteral:
2957 * @ctxt: an XML parser context
2958 *
2959 * parse an XML Literal
2960 *
2961 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2962 *
2963 * Returns the SystemLiteral parsed or NULL
2964 */
2965
2966xmlChar *
2967xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2968 xmlChar *buf = NULL;
2969 int len = 0;
2970 int size = XML_PARSER_BUFFER_SIZE;
2971 int cur, l;
2972 xmlChar stop;
2973 int state = ctxt->instate;
2974 int count = 0;
2975
2976 SHRINK;
2977 if (RAW == '"') {
2978 NEXT;
2979 stop = '"';
2980 } else if (RAW == '\'') {
2981 NEXT;
2982 stop = '\'';
2983 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002984 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002985 return(NULL);
2986 }
2987
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002988 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002989 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002990 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002991 return(NULL);
2992 }
2993 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2994 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002995 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002996 if (len + 5 >= size) {
2997 size *= 2;
2998 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2999 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003000 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003001 ctxt->instate = (xmlParserInputState) state;
3002 return(NULL);
3003 }
3004 }
3005 count++;
3006 if (count > 50) {
3007 GROW;
3008 count = 0;
3009 }
3010 COPY_BUF(l,buf,len,cur);
3011 NEXTL(l);
3012 cur = CUR_CHAR(l);
3013 if (cur == 0) {
3014 GROW;
3015 SHRINK;
3016 cur = CUR_CHAR(l);
3017 }
3018 }
3019 buf[len] = 0;
3020 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003021 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003022 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003023 } else {
3024 NEXT;
3025 }
3026 return(buf);
3027}
3028
3029/**
3030 * xmlParsePubidLiteral:
3031 * @ctxt: an XML parser context
3032 *
3033 * parse an XML public literal
3034 *
3035 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3036 *
3037 * Returns the PubidLiteral parsed or NULL.
3038 */
3039
3040xmlChar *
3041xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3042 xmlChar *buf = NULL;
3043 int len = 0;
3044 int size = XML_PARSER_BUFFER_SIZE;
3045 xmlChar cur;
3046 xmlChar stop;
3047 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003048 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003049
3050 SHRINK;
3051 if (RAW == '"') {
3052 NEXT;
3053 stop = '"';
3054 } else if (RAW == '\'') {
3055 NEXT;
3056 stop = '\'';
3057 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003058 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003059 return(NULL);
3060 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003061 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003062 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003063 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 return(NULL);
3065 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003066 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003068 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003069 if (len + 1 >= size) {
3070 size *= 2;
3071 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3072 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003073 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003074 return(NULL);
3075 }
3076 }
3077 buf[len++] = cur;
3078 count++;
3079 if (count > 50) {
3080 GROW;
3081 count = 0;
3082 }
3083 NEXT;
3084 cur = CUR;
3085 if (cur == 0) {
3086 GROW;
3087 SHRINK;
3088 cur = CUR;
3089 }
3090 }
3091 buf[len] = 0;
3092 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003093 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003094 } else {
3095 NEXT;
3096 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003097 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 return(buf);
3099}
3100
Daniel Veillard48b2f892001-02-25 16:11:03 +00003101void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003102/**
3103 * xmlParseCharData:
3104 * @ctxt: an XML parser context
3105 * @cdata: int indicating whether we are within a CDATA section
3106 *
3107 * parse a CharData section.
3108 * if we are within a CDATA section ']]>' marks an end of section.
3109 *
3110 * The right angle bracket (>) may be represented using the string "&gt;",
3111 * and must, for compatibility, be escaped using "&gt;" or a character
3112 * reference when it appears in the string "]]>" in content, when that
3113 * string is not marking the end of a CDATA section.
3114 *
3115 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3116 */
3117
3118void
3119xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003120 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003121 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003122 int line = ctxt->input->line;
3123 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003124
3125 SHRINK;
3126 GROW;
3127 /*
3128 * Accelerated common case where input don't need to be
3129 * modified before passing it to the handler.
3130 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003131 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 in = ctxt->input->cur;
3133 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003134get_more_space:
3135 while (*in == 0x20) in++;
3136 if (*in == 0xA) {
3137 ctxt->input->line++;
3138 in++;
3139 while (*in == 0xA) {
3140 ctxt->input->line++;
3141 in++;
3142 }
3143 goto get_more_space;
3144 }
3145 if (*in == '<') {
3146 nbchar = in - ctxt->input->cur;
3147 if (nbchar > 0) {
3148 const xmlChar *tmp = ctxt->input->cur;
3149 ctxt->input->cur = in;
3150
3151 if (ctxt->sax->ignorableWhitespace !=
3152 ctxt->sax->characters) {
3153 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3154 ctxt->sax->ignorableWhitespace(ctxt->userData,
3155 tmp, nbchar);
3156 } else if (ctxt->sax->characters != NULL)
3157 ctxt->sax->characters(ctxt->userData,
3158 tmp, nbchar);
3159 } else if (ctxt->sax->characters != NULL) {
3160 ctxt->sax->characters(ctxt->userData,
3161 tmp, nbchar);
3162 }
3163 }
3164 return;
3165 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003166get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003167 while (((*in > ']') && (*in <= 0x7F)) ||
3168 ((*in > '&') && (*in < '<')) ||
3169 ((*in > '<') && (*in < ']')) ||
3170 ((*in >= 0x20) && (*in < '&')) ||
3171 (*in == 0x09))
3172 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003173 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003174 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003175 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003176 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003177 ctxt->input->line++;
3178 in++;
3179 }
3180 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003181 }
3182 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003183 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003185 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003186 return;
3187 }
3188 in++;
3189 goto get_more;
3190 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003191 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003192 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003193 if ((ctxt->sax->ignorableWhitespace !=
3194 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003195 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003196 const xmlChar *tmp = ctxt->input->cur;
3197 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003198
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003199 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003200 ctxt->sax->ignorableWhitespace(ctxt->userData,
3201 tmp, nbchar);
3202 } else if (ctxt->sax->characters != NULL)
3203 ctxt->sax->characters(ctxt->userData,
3204 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003205 line = ctxt->input->line;
3206 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003207 } else {
3208 if (ctxt->sax->characters != NULL)
3209 ctxt->sax->characters(ctxt->userData,
3210 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003211 line = ctxt->input->line;
3212 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003213 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003214 }
3215 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003216 if (*in == 0xD) {
3217 in++;
3218 if (*in == 0xA) {
3219 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003220 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003221 ctxt->input->line++;
3222 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003223 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003224 in--;
3225 }
3226 if (*in == '<') {
3227 return;
3228 }
3229 if (*in == '&') {
3230 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003231 }
3232 SHRINK;
3233 GROW;
3234 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003235 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003236 nbchar = 0;
3237 }
Daniel Veillard50582112001-03-26 22:52:16 +00003238 ctxt->input->line = line;
3239 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003240 xmlParseCharDataComplex(ctxt, cdata);
3241}
3242
Daniel Veillard01c13b52002-12-10 15:19:08 +00003243/**
3244 * xmlParseCharDataComplex:
3245 * @ctxt: an XML parser context
3246 * @cdata: int indicating whether we are within a CDATA section
3247 *
3248 * parse a CharData section.this is the fallback function
3249 * of xmlParseCharData() when the parsing requires handling
3250 * of non-ASCII characters.
3251 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003252void
3253xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003254 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3255 int nbchar = 0;
3256 int cur, l;
3257 int count = 0;
3258
3259 SHRINK;
3260 GROW;
3261 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003262 while ((cur != '<') && /* checked */
3263 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003264 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003265 if ((cur == ']') && (NXT(1) == ']') &&
3266 (NXT(2) == '>')) {
3267 if (cdata) break;
3268 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003269 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003270 }
3271 }
3272 COPY_BUF(l,buf,nbchar,cur);
3273 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003274 buf[nbchar] = 0;
3275
Owen Taylor3473f882001-02-23 17:55:21 +00003276 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003277 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003278 */
3279 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003280 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003281 if (ctxt->sax->ignorableWhitespace != NULL)
3282 ctxt->sax->ignorableWhitespace(ctxt->userData,
3283 buf, nbchar);
3284 } else {
3285 if (ctxt->sax->characters != NULL)
3286 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3287 }
3288 }
3289 nbchar = 0;
3290 }
3291 count++;
3292 if (count > 50) {
3293 GROW;
3294 count = 0;
3295 }
3296 NEXTL(l);
3297 cur = CUR_CHAR(l);
3298 }
3299 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003300 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003301 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003302 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003303 */
3304 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003305 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003306 if (ctxt->sax->ignorableWhitespace != NULL)
3307 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3308 } else {
3309 if (ctxt->sax->characters != NULL)
3310 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3311 }
3312 }
3313 }
3314}
3315
3316/**
3317 * xmlParseExternalID:
3318 * @ctxt: an XML parser context
3319 * @publicID: a xmlChar** receiving PubidLiteral
3320 * @strict: indicate whether we should restrict parsing to only
3321 * production [75], see NOTE below
3322 *
3323 * Parse an External ID or a Public ID
3324 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003325 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003326 * 'PUBLIC' S PubidLiteral S SystemLiteral
3327 *
3328 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3329 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3330 *
3331 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3332 *
3333 * Returns the function returns SystemLiteral and in the second
3334 * case publicID receives PubidLiteral, is strict is off
3335 * it is possible to return NULL and have publicID set.
3336 */
3337
3338xmlChar *
3339xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3340 xmlChar *URI = NULL;
3341
3342 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003343
3344 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003345 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003346 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003347 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3349 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003350 }
3351 SKIP_BLANKS;
3352 URI = xmlParseSystemLiteral(ctxt);
3353 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003354 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003355 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003356 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003357 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003358 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003360 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003361 }
3362 SKIP_BLANKS;
3363 *publicID = xmlParsePubidLiteral(ctxt);
3364 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003365 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003366 }
3367 if (strict) {
3368 /*
3369 * We don't handle [83] so "S SystemLiteral" is required.
3370 */
William M. Brack76e95df2003-10-18 16:20:14 +00003371 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003373 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003374 }
3375 } else {
3376 /*
3377 * We handle [83] so we return immediately, if
3378 * "S SystemLiteral" is not detected. From a purely parsing
3379 * point of view that's a nice mess.
3380 */
3381 const xmlChar *ptr;
3382 GROW;
3383
3384 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003385 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003386
William M. Brack76e95df2003-10-18 16:20:14 +00003387 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003388 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3389 }
3390 SKIP_BLANKS;
3391 URI = xmlParseSystemLiteral(ctxt);
3392 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003393 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003394 }
3395 }
3396 return(URI);
3397}
3398
3399/**
3400 * xmlParseComment:
3401 * @ctxt: an XML parser context
3402 *
3403 * Skip an XML (SGML) comment <!-- .... -->
3404 * The spec says that "For compatibility, the string "--" (double-hyphen)
3405 * must not occur within comments. "
3406 *
3407 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3408 */
3409void
3410xmlParseComment(xmlParserCtxtPtr ctxt) {
3411 xmlChar *buf = NULL;
3412 int len;
3413 int size = XML_PARSER_BUFFER_SIZE;
3414 int q, ql;
3415 int r, rl;
3416 int cur, l;
3417 xmlParserInputState state;
3418 xmlParserInputPtr input = ctxt->input;
3419 int count = 0;
3420
3421 /*
3422 * Check that there is a comment right here.
3423 */
3424 if ((RAW != '<') || (NXT(1) != '!') ||
3425 (NXT(2) != '-') || (NXT(3) != '-')) return;
3426
3427 state = ctxt->instate;
3428 ctxt->instate = XML_PARSER_COMMENT;
3429 SHRINK;
3430 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003431 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003432 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003433 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003434 ctxt->instate = state;
3435 return;
3436 }
3437 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003438 if (q == 0)
3439 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003440 NEXTL(ql);
3441 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003442 if (r == 0)
3443 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 NEXTL(rl);
3445 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003446 if (cur == 0)
3447 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003448 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003449 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003450 ((cur != '>') ||
3451 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003452 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003453 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003454 }
3455 if (len + 5 >= size) {
3456 size *= 2;
3457 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3458 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003459 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003460 ctxt->instate = state;
3461 return;
3462 }
3463 }
3464 COPY_BUF(ql,buf,len,q);
3465 q = r;
3466 ql = rl;
3467 r = cur;
3468 rl = l;
3469
3470 count++;
3471 if (count > 50) {
3472 GROW;
3473 count = 0;
3474 }
3475 NEXTL(l);
3476 cur = CUR_CHAR(l);
3477 if (cur == 0) {
3478 SHRINK;
3479 GROW;
3480 cur = CUR_CHAR(l);
3481 }
3482 }
3483 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003484 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003485 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003486 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003487 xmlFree(buf);
3488 } else {
3489 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003490 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3491 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003492 }
3493 NEXT;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3495 (!ctxt->disableSAX))
3496 ctxt->sax->comment(ctxt->userData, buf);
3497 xmlFree(buf);
3498 }
3499 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003500 return;
3501not_terminated:
3502 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3503 "Comment not terminated\n", NULL);
3504 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003505}
3506
3507/**
3508 * xmlParsePITarget:
3509 * @ctxt: an XML parser context
3510 *
3511 * parse the name of a PI
3512 *
3513 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3514 *
3515 * Returns the PITarget name or NULL
3516 */
3517
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003518const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003519xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003520 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003521
3522 name = xmlParseName(ctxt);
3523 if ((name != NULL) &&
3524 ((name[0] == 'x') || (name[0] == 'X')) &&
3525 ((name[1] == 'm') || (name[1] == 'M')) &&
3526 ((name[2] == 'l') || (name[2] == 'L'))) {
3527 int i;
3528 if ((name[0] == 'x') && (name[1] == 'm') &&
3529 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003530 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003531 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003532 return(name);
3533 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003534 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003535 return(name);
3536 }
3537 for (i = 0;;i++) {
3538 if (xmlW3CPIs[i] == NULL) break;
3539 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3540 return(name);
3541 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003542 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3543 "xmlParsePITarget: invalid name prefix 'xml'\n",
3544 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003545 }
3546 return(name);
3547}
3548
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003549#ifdef LIBXML_CATALOG_ENABLED
3550/**
3551 * xmlParseCatalogPI:
3552 * @ctxt: an XML parser context
3553 * @catalog: the PI value string
3554 *
3555 * parse an XML Catalog Processing Instruction.
3556 *
3557 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3558 *
3559 * Occurs only if allowed by the user and if happening in the Misc
3560 * part of the document before any doctype informations
3561 * This will add the given catalog to the parsing context in order
3562 * to be used if there is a resolution need further down in the document
3563 */
3564
3565static void
3566xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3567 xmlChar *URL = NULL;
3568 const xmlChar *tmp, *base;
3569 xmlChar marker;
3570
3571 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003572 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003573 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3574 goto error;
3575 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003576 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003577 if (*tmp != '=') {
3578 return;
3579 }
3580 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003581 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003582 marker = *tmp;
3583 if ((marker != '\'') && (marker != '"'))
3584 goto error;
3585 tmp++;
3586 base = tmp;
3587 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3588 if (*tmp == 0)
3589 goto error;
3590 URL = xmlStrndup(base, tmp - base);
3591 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003592 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003593 if (*tmp != 0)
3594 goto error;
3595
3596 if (URL != NULL) {
3597 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3598 xmlFree(URL);
3599 }
3600 return;
3601
3602error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003603 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3604 "Catalog PI syntax error: %s\n",
3605 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003606 if (URL != NULL)
3607 xmlFree(URL);
3608}
3609#endif
3610
Owen Taylor3473f882001-02-23 17:55:21 +00003611/**
3612 * xmlParsePI:
3613 * @ctxt: an XML parser context
3614 *
3615 * parse an XML Processing Instruction.
3616 *
3617 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3618 *
3619 * The processing is transfered to SAX once parsed.
3620 */
3621
3622void
3623xmlParsePI(xmlParserCtxtPtr ctxt) {
3624 xmlChar *buf = NULL;
3625 int len = 0;
3626 int size = XML_PARSER_BUFFER_SIZE;
3627 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003628 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003629 xmlParserInputState state;
3630 int count = 0;
3631
3632 if ((RAW == '<') && (NXT(1) == '?')) {
3633 xmlParserInputPtr input = ctxt->input;
3634 state = ctxt->instate;
3635 ctxt->instate = XML_PARSER_PI;
3636 /*
3637 * this is a Processing Instruction.
3638 */
3639 SKIP(2);
3640 SHRINK;
3641
3642 /*
3643 * Parse the target name and check for special support like
3644 * namespace.
3645 */
3646 target = xmlParsePITarget(ctxt);
3647 if (target != NULL) {
3648 if ((RAW == '?') && (NXT(1) == '>')) {
3649 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003650 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3651 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003652 }
3653 SKIP(2);
3654
3655 /*
3656 * SAX: PI detected.
3657 */
3658 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3659 (ctxt->sax->processingInstruction != NULL))
3660 ctxt->sax->processingInstruction(ctxt->userData,
3661 target, NULL);
3662 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003663 return;
3664 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003665 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003666 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003667 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003668 ctxt->instate = state;
3669 return;
3670 }
3671 cur = CUR;
3672 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003673 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3674 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 }
3676 SKIP_BLANKS;
3677 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003678 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003679 ((cur != '?') || (NXT(1) != '>'))) {
3680 if (len + 5 >= size) {
3681 size *= 2;
3682 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3683 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 ctxt->instate = state;
3686 return;
3687 }
3688 }
3689 count++;
3690 if (count > 50) {
3691 GROW;
3692 count = 0;
3693 }
3694 COPY_BUF(l,buf,len,cur);
3695 NEXTL(l);
3696 cur = CUR_CHAR(l);
3697 if (cur == 0) {
3698 SHRINK;
3699 GROW;
3700 cur = CUR_CHAR(l);
3701 }
3702 }
3703 buf[len] = 0;
3704 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003705 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3706 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003707 } else {
3708 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3710 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 SKIP(2);
3713
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003714#ifdef LIBXML_CATALOG_ENABLED
3715 if (((state == XML_PARSER_MISC) ||
3716 (state == XML_PARSER_START)) &&
3717 (xmlStrEqual(target, XML_CATALOG_PI))) {
3718 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3719 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3720 (allow == XML_CATA_ALLOW_ALL))
3721 xmlParseCatalogPI(ctxt, buf);
3722 }
3723#endif
3724
3725
Owen Taylor3473f882001-02-23 17:55:21 +00003726 /*
3727 * SAX: PI detected.
3728 */
3729 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3730 (ctxt->sax->processingInstruction != NULL))
3731 ctxt->sax->processingInstruction(ctxt->userData,
3732 target, buf);
3733 }
3734 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003735 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003736 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 ctxt->instate = state;
3739 }
3740}
3741
3742/**
3743 * xmlParseNotationDecl:
3744 * @ctxt: an XML parser context
3745 *
3746 * parse a notation declaration
3747 *
3748 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3749 *
3750 * Hence there is actually 3 choices:
3751 * 'PUBLIC' S PubidLiteral
3752 * 'PUBLIC' S PubidLiteral S SystemLiteral
3753 * and 'SYSTEM' S SystemLiteral
3754 *
3755 * See the NOTE on xmlParseExternalID().
3756 */
3757
3758void
3759xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003760 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 xmlChar *Pubid;
3762 xmlChar *Systemid;
3763
Daniel Veillarda07050d2003-10-19 14:46:32 +00003764 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003765 xmlParserInputPtr input = ctxt->input;
3766 SHRINK;
3767 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003768 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003769 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3770 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003771 return;
3772 }
3773 SKIP_BLANKS;
3774
Daniel Veillard76d66f42001-05-16 21:05:17 +00003775 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003776 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 return;
3779 }
William M. Brack76e95df2003-10-18 16:20:14 +00003780 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003782 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return;
3784 }
3785 SKIP_BLANKS;
3786
3787 /*
3788 * Parse the IDs.
3789 */
3790 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3791 SKIP_BLANKS;
3792
3793 if (RAW == '>') {
3794 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003795 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3796 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003797 }
3798 NEXT;
3799 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3800 (ctxt->sax->notationDecl != NULL))
3801 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3802 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003803 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
Owen Taylor3473f882001-02-23 17:55:21 +00003805 if (Systemid != NULL) xmlFree(Systemid);
3806 if (Pubid != NULL) xmlFree(Pubid);
3807 }
3808}
3809
3810/**
3811 * xmlParseEntityDecl:
3812 * @ctxt: an XML parser context
3813 *
3814 * parse <!ENTITY declarations
3815 *
3816 * [70] EntityDecl ::= GEDecl | PEDecl
3817 *
3818 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3819 *
3820 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3821 *
3822 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3823 *
3824 * [74] PEDef ::= EntityValue | ExternalID
3825 *
3826 * [76] NDataDecl ::= S 'NDATA' S Name
3827 *
3828 * [ VC: Notation Declared ]
3829 * The Name must match the declared name of a notation.
3830 */
3831
3832void
3833xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003834 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 xmlChar *value = NULL;
3836 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003837 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 int isParameter = 0;
3839 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003840 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003841
3842 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003843 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003844 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003845 SHRINK;
3846 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003847 skipped = SKIP_BLANKS;
3848 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003849 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3850 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003851 }
Owen Taylor3473f882001-02-23 17:55:21 +00003852
3853 if (RAW == '%') {
3854 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003855 skipped = SKIP_BLANKS;
3856 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3858 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
Owen Taylor3473f882001-02-23 17:55:21 +00003860 isParameter = 1;
3861 }
3862
Daniel Veillard76d66f42001-05-16 21:05:17 +00003863 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003864 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3866 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003867 return;
3868 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003869 skipped = SKIP_BLANKS;
3870 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003871 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3872 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003873 }
Owen Taylor3473f882001-02-23 17:55:21 +00003874
Daniel Veillardf5582f12002-06-11 10:08:16 +00003875 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003876 /*
3877 * handle the various case of definitions...
3878 */
3879 if (isParameter) {
3880 if ((RAW == '"') || (RAW == '\'')) {
3881 value = xmlParseEntityValue(ctxt, &orig);
3882 if (value) {
3883 if ((ctxt->sax != NULL) &&
3884 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3885 ctxt->sax->entityDecl(ctxt->userData, name,
3886 XML_INTERNAL_PARAMETER_ENTITY,
3887 NULL, NULL, value);
3888 }
3889 } else {
3890 URI = xmlParseExternalID(ctxt, &literal, 1);
3891 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 }
3894 if (URI) {
3895 xmlURIPtr uri;
3896
3897 uri = xmlParseURI((const char *) URI);
3898 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003899 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3900 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003901 /*
3902 * This really ought to be a well formedness error
3903 * but the XML Core WG decided otherwise c.f. issue
3904 * E26 of the XML erratas.
3905 */
Owen Taylor3473f882001-02-23 17:55:21 +00003906 } else {
3907 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003908 /*
3909 * Okay this is foolish to block those but not
3910 * invalid URIs.
3911 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003912 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 } else {
3914 if ((ctxt->sax != NULL) &&
3915 (!ctxt->disableSAX) &&
3916 (ctxt->sax->entityDecl != NULL))
3917 ctxt->sax->entityDecl(ctxt->userData, name,
3918 XML_EXTERNAL_PARAMETER_ENTITY,
3919 literal, URI, NULL);
3920 }
3921 xmlFreeURI(uri);
3922 }
3923 }
3924 }
3925 } else {
3926 if ((RAW == '"') || (RAW == '\'')) {
3927 value = xmlParseEntityValue(ctxt, &orig);
3928 if ((ctxt->sax != NULL) &&
3929 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3930 ctxt->sax->entityDecl(ctxt->userData, name,
3931 XML_INTERNAL_GENERAL_ENTITY,
3932 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003933 /*
3934 * For expat compatibility in SAX mode.
3935 */
3936 if ((ctxt->myDoc == NULL) ||
3937 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3938 if (ctxt->myDoc == NULL) {
3939 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3940 }
3941 if (ctxt->myDoc->intSubset == NULL)
3942 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3943 BAD_CAST "fake", NULL, NULL);
3944
Daniel Veillard1af9a412003-08-20 22:54:39 +00003945 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3946 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003947 }
Owen Taylor3473f882001-02-23 17:55:21 +00003948 } else {
3949 URI = xmlParseExternalID(ctxt, &literal, 1);
3950 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 }
3953 if (URI) {
3954 xmlURIPtr uri;
3955
3956 uri = xmlParseURI((const char *)URI);
3957 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003958 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3959 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003960 /*
3961 * This really ought to be a well formedness error
3962 * but the XML Core WG decided otherwise c.f. issue
3963 * E26 of the XML erratas.
3964 */
Owen Taylor3473f882001-02-23 17:55:21 +00003965 } else {
3966 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003967 /*
3968 * Okay this is foolish to block those but not
3969 * invalid URIs.
3970 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003971 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 }
3973 xmlFreeURI(uri);
3974 }
3975 }
William M. Brack76e95df2003-10-18 16:20:14 +00003976 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3978 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003979 }
3980 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003981 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003982 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003983 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3985 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003986 }
3987 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003988 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003989 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3990 (ctxt->sax->unparsedEntityDecl != NULL))
3991 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3992 literal, URI, ndata);
3993 } else {
3994 if ((ctxt->sax != NULL) &&
3995 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3996 ctxt->sax->entityDecl(ctxt->userData, name,
3997 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3998 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003999 /*
4000 * For expat compatibility in SAX mode.
4001 * assuming the entity repalcement was asked for
4002 */
4003 if ((ctxt->replaceEntities != 0) &&
4004 ((ctxt->myDoc == NULL) ||
4005 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4006 if (ctxt->myDoc == NULL) {
4007 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4008 }
4009
4010 if (ctxt->myDoc->intSubset == NULL)
4011 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4012 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004013 xmlSAX2EntityDecl(ctxt, name,
4014 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4015 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004016 }
Owen Taylor3473f882001-02-23 17:55:21 +00004017 }
4018 }
4019 }
4020 SKIP_BLANKS;
4021 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004022 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004023 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004024 } else {
4025 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004026 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4027 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004028 }
4029 NEXT;
4030 }
4031 if (orig != NULL) {
4032 /*
4033 * Ugly mechanism to save the raw entity value.
4034 */
4035 xmlEntityPtr cur = NULL;
4036
4037 if (isParameter) {
4038 if ((ctxt->sax != NULL) &&
4039 (ctxt->sax->getParameterEntity != NULL))
4040 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4041 } else {
4042 if ((ctxt->sax != NULL) &&
4043 (ctxt->sax->getEntity != NULL))
4044 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004045 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004046 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004047 }
Owen Taylor3473f882001-02-23 17:55:21 +00004048 }
4049 if (cur != NULL) {
4050 if (cur->orig != NULL)
4051 xmlFree(orig);
4052 else
4053 cur->orig = orig;
4054 } else
4055 xmlFree(orig);
4056 }
Owen Taylor3473f882001-02-23 17:55:21 +00004057 if (value != NULL) xmlFree(value);
4058 if (URI != NULL) xmlFree(URI);
4059 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 }
4061}
4062
4063/**
4064 * xmlParseDefaultDecl:
4065 * @ctxt: an XML parser context
4066 * @value: Receive a possible fixed default value for the attribute
4067 *
4068 * Parse an attribute default declaration
4069 *
4070 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4071 *
4072 * [ VC: Required Attribute ]
4073 * if the default declaration is the keyword #REQUIRED, then the
4074 * attribute must be specified for all elements of the type in the
4075 * attribute-list declaration.
4076 *
4077 * [ VC: Attribute Default Legal ]
4078 * The declared default value must meet the lexical constraints of
4079 * the declared attribute type c.f. xmlValidateAttributeDecl()
4080 *
4081 * [ VC: Fixed Attribute Default ]
4082 * if an attribute has a default value declared with the #FIXED
4083 * keyword, instances of that attribute must match the default value.
4084 *
4085 * [ WFC: No < in Attribute Values ]
4086 * handled in xmlParseAttValue()
4087 *
4088 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4089 * or XML_ATTRIBUTE_FIXED.
4090 */
4091
4092int
4093xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4094 int val;
4095 xmlChar *ret;
4096
4097 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004098 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004099 SKIP(9);
4100 return(XML_ATTRIBUTE_REQUIRED);
4101 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004102 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004103 SKIP(8);
4104 return(XML_ATTRIBUTE_IMPLIED);
4105 }
4106 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004107 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004108 SKIP(6);
4109 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004110 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004111 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4112 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
4114 SKIP_BLANKS;
4115 }
4116 ret = xmlParseAttValue(ctxt);
4117 ctxt->instate = XML_PARSER_DTD;
4118 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004119 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004120 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004121 } else
4122 *value = ret;
4123 return(val);
4124}
4125
4126/**
4127 * xmlParseNotationType:
4128 * @ctxt: an XML parser context
4129 *
4130 * parse an Notation attribute type.
4131 *
4132 * Note: the leading 'NOTATION' S part has already being parsed...
4133 *
4134 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4135 *
4136 * [ VC: Notation Attributes ]
4137 * Values of this type must match one of the notation names included
4138 * in the declaration; all notation names in the declaration must be declared.
4139 *
4140 * Returns: the notation attribute tree built while parsing
4141 */
4142
4143xmlEnumerationPtr
4144xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004145 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004146 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4147
4148 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004149 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004150 return(NULL);
4151 }
4152 SHRINK;
4153 do {
4154 NEXT;
4155 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004156 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004158 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4159 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004160 return(ret);
4161 }
4162 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 if (cur == NULL) return(ret);
4164 if (last == NULL) ret = last = cur;
4165 else {
4166 last->next = cur;
4167 last = cur;
4168 }
4169 SKIP_BLANKS;
4170 } while (RAW == '|');
4171 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004172 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004173 if ((last != NULL) && (last != ret))
4174 xmlFreeEnumeration(last);
4175 return(ret);
4176 }
4177 NEXT;
4178 return(ret);
4179}
4180
4181/**
4182 * xmlParseEnumerationType:
4183 * @ctxt: an XML parser context
4184 *
4185 * parse an Enumeration attribute type.
4186 *
4187 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4188 *
4189 * [ VC: Enumeration ]
4190 * Values of this type must match one of the Nmtoken tokens in
4191 * the declaration
4192 *
4193 * Returns: the enumeration attribute tree built while parsing
4194 */
4195
4196xmlEnumerationPtr
4197xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4198 xmlChar *name;
4199 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4200
4201 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004202 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004203 return(NULL);
4204 }
4205 SHRINK;
4206 do {
4207 NEXT;
4208 SKIP_BLANKS;
4209 name = xmlParseNmtoken(ctxt);
4210 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 return(ret);
4213 }
4214 cur = xmlCreateEnumeration(name);
4215 xmlFree(name);
4216 if (cur == NULL) return(ret);
4217 if (last == NULL) ret = last = cur;
4218 else {
4219 last->next = cur;
4220 last = cur;
4221 }
4222 SKIP_BLANKS;
4223 } while (RAW == '|');
4224 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004225 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 return(ret);
4227 }
4228 NEXT;
4229 return(ret);
4230}
4231
4232/**
4233 * xmlParseEnumeratedType:
4234 * @ctxt: an XML parser context
4235 * @tree: the enumeration tree built while parsing
4236 *
4237 * parse an Enumerated attribute type.
4238 *
4239 * [57] EnumeratedType ::= NotationType | Enumeration
4240 *
4241 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4242 *
4243 *
4244 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4245 */
4246
4247int
4248xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004249 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004250 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004251 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4253 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return(0);
4255 }
4256 SKIP_BLANKS;
4257 *tree = xmlParseNotationType(ctxt);
4258 if (*tree == NULL) return(0);
4259 return(XML_ATTRIBUTE_NOTATION);
4260 }
4261 *tree = xmlParseEnumerationType(ctxt);
4262 if (*tree == NULL) return(0);
4263 return(XML_ATTRIBUTE_ENUMERATION);
4264}
4265
4266/**
4267 * xmlParseAttributeType:
4268 * @ctxt: an XML parser context
4269 * @tree: the enumeration tree built while parsing
4270 *
4271 * parse the Attribute list def for an element
4272 *
4273 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4274 *
4275 * [55] StringType ::= 'CDATA'
4276 *
4277 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4278 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4279 *
4280 * Validity constraints for attribute values syntax are checked in
4281 * xmlValidateAttributeValue()
4282 *
4283 * [ VC: ID ]
4284 * Values of type ID must match the Name production. A name must not
4285 * appear more than once in an XML document as a value of this type;
4286 * i.e., ID values must uniquely identify the elements which bear them.
4287 *
4288 * [ VC: One ID per Element Type ]
4289 * No element type may have more than one ID attribute specified.
4290 *
4291 * [ VC: ID Attribute Default ]
4292 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4293 *
4294 * [ VC: IDREF ]
4295 * Values of type IDREF must match the Name production, and values
4296 * of type IDREFS must match Names; each IDREF Name must match the value
4297 * of an ID attribute on some element in the XML document; i.e. IDREF
4298 * values must match the value of some ID attribute.
4299 *
4300 * [ VC: Entity Name ]
4301 * Values of type ENTITY must match the Name production, values
4302 * of type ENTITIES must match Names; each Entity Name must match the
4303 * name of an unparsed entity declared in the DTD.
4304 *
4305 * [ VC: Name Token ]
4306 * Values of type NMTOKEN must match the Nmtoken production; values
4307 * of type NMTOKENS must match Nmtokens.
4308 *
4309 * Returns the attribute type
4310 */
4311int
4312xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4313 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004314 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004315 SKIP(5);
4316 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004317 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004318 SKIP(6);
4319 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004320 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004321 SKIP(5);
4322 return(XML_ATTRIBUTE_IDREF);
4323 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4324 SKIP(2);
4325 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004326 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 SKIP(6);
4328 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004329 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004330 SKIP(8);
4331 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004332 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004333 SKIP(8);
4334 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004335 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004336 SKIP(7);
4337 return(XML_ATTRIBUTE_NMTOKEN);
4338 }
4339 return(xmlParseEnumeratedType(ctxt, tree));
4340}
4341
4342/**
4343 * xmlParseAttributeListDecl:
4344 * @ctxt: an XML parser context
4345 *
4346 * : parse the Attribute list def for an element
4347 *
4348 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4349 *
4350 * [53] AttDef ::= S Name S AttType S DefaultDecl
4351 *
4352 */
4353void
4354xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004355 const xmlChar *elemName;
4356 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004357 xmlEnumerationPtr tree;
4358
Daniel Veillarda07050d2003-10-19 14:46:32 +00004359 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004360 xmlParserInputPtr input = ctxt->input;
4361
4362 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004363 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004365 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004366 }
4367 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004368 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004369 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004370 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4371 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004372 return;
4373 }
4374 SKIP_BLANKS;
4375 GROW;
4376 while (RAW != '>') {
4377 const xmlChar *check = CUR_PTR;
4378 int type;
4379 int def;
4380 xmlChar *defaultValue = NULL;
4381
4382 GROW;
4383 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004384 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004386 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4387 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004388 break;
4389 }
4390 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004391 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004392 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004393 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004394 if (defaultValue != NULL)
4395 xmlFree(defaultValue);
4396 break;
4397 }
4398 SKIP_BLANKS;
4399
4400 type = xmlParseAttributeType(ctxt, &tree);
4401 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004402 if (defaultValue != NULL)
4403 xmlFree(defaultValue);
4404 break;
4405 }
4406
4407 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004408 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4410 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004411 if (defaultValue != NULL)
4412 xmlFree(defaultValue);
4413 if (tree != NULL)
4414 xmlFreeEnumeration(tree);
4415 break;
4416 }
4417 SKIP_BLANKS;
4418
4419 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4420 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004421 if (defaultValue != NULL)
4422 xmlFree(defaultValue);
4423 if (tree != NULL)
4424 xmlFreeEnumeration(tree);
4425 break;
4426 }
4427
4428 GROW;
4429 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004430 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004431 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004432 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004433 if (defaultValue != NULL)
4434 xmlFree(defaultValue);
4435 if (tree != NULL)
4436 xmlFreeEnumeration(tree);
4437 break;
4438 }
4439 SKIP_BLANKS;
4440 }
4441 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004442 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4443 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004444 if (defaultValue != NULL)
4445 xmlFree(defaultValue);
4446 if (tree != NULL)
4447 xmlFreeEnumeration(tree);
4448 break;
4449 }
4450 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4451 (ctxt->sax->attributeDecl != NULL))
4452 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4453 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004454 else if (tree != NULL)
4455 xmlFreeEnumeration(tree);
4456
4457 if ((ctxt->sax2) && (defaultValue != NULL) &&
4458 (def != XML_ATTRIBUTE_IMPLIED) &&
4459 (def != XML_ATTRIBUTE_REQUIRED)) {
4460 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4461 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004462 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4463 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4464 }
Owen Taylor3473f882001-02-23 17:55:21 +00004465 if (defaultValue != NULL)
4466 xmlFree(defaultValue);
4467 GROW;
4468 }
4469 if (RAW == '>') {
4470 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004471 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4472 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004473 }
4474 NEXT;
4475 }
Owen Taylor3473f882001-02-23 17:55:21 +00004476 }
4477}
4478
4479/**
4480 * xmlParseElementMixedContentDecl:
4481 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004482 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004483 *
4484 * parse the declaration for a Mixed Element content
4485 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4486 *
4487 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4488 * '(' S? '#PCDATA' S? ')'
4489 *
4490 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4491 *
4492 * [ VC: No Duplicate Types ]
4493 * The same name must not appear more than once in a single
4494 * mixed-content declaration.
4495 *
4496 * returns: the list of the xmlElementContentPtr describing the element choices
4497 */
4498xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004499xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004500 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004501 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004502
4503 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004504 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004505 SKIP(7);
4506 SKIP_BLANKS;
4507 SHRINK;
4508 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004510 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4511"Element content declaration doesn't start and stop in the same entity\n",
4512 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004513 }
Owen Taylor3473f882001-02-23 17:55:21 +00004514 NEXT;
4515 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4516 if (RAW == '*') {
4517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4518 NEXT;
4519 }
4520 return(ret);
4521 }
4522 if ((RAW == '(') || (RAW == '|')) {
4523 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4524 if (ret == NULL) return(NULL);
4525 }
4526 while (RAW == '|') {
4527 NEXT;
4528 if (elem == NULL) {
4529 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4530 if (ret == NULL) return(NULL);
4531 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004532 if (cur != NULL)
4533 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 cur = ret;
4535 } else {
4536 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4537 if (n == NULL) return(NULL);
4538 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004539 if (n->c1 != NULL)
4540 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004541 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004542 if (n != NULL)
4543 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004545 }
4546 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004547 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004548 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004549 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004550 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlFreeElementContent(cur);
4552 return(NULL);
4553 }
4554 SKIP_BLANKS;
4555 GROW;
4556 }
4557 if ((RAW == ')') && (NXT(1) == '*')) {
4558 if (elem != NULL) {
4559 cur->c2 = xmlNewElementContent(elem,
4560 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004561 if (cur->c2 != NULL)
4562 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 }
4564 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004565 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004566 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4567"Element content declaration doesn't start and stop in the same entity\n",
4568 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004569 }
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP(2);
4571 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004572 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004573 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004574 return(NULL);
4575 }
4576
4577 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004578 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 }
4580 return(ret);
4581}
4582
4583/**
4584 * xmlParseElementChildrenContentDecl:
4585 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004586 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004587 *
4588 * parse the declaration for a Mixed Element content
4589 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4590 *
4591 *
4592 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4593 *
4594 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4595 *
4596 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4597 *
4598 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4599 *
4600 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4601 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004602 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004603 * opening or closing parentheses in a choice, seq, or Mixed
4604 * construct is contained in the replacement text for a parameter
4605 * entity, both must be contained in the same replacement text. For
4606 * interoperability, if a parameter-entity reference appears in a
4607 * choice, seq, or Mixed construct, its replacement text should not
4608 * be empty, and neither the first nor last non-blank character of
4609 * the replacement text should be a connector (| or ,).
4610 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004611 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004612 * hierarchy.
4613 */
4614xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004615xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004616 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004617 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004618 xmlChar type = 0;
4619
4620 SKIP_BLANKS;
4621 GROW;
4622 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004623 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004624
Owen Taylor3473f882001-02-23 17:55:21 +00004625 /* Recurse on first child */
4626 NEXT;
4627 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004628 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004629 SKIP_BLANKS;
4630 GROW;
4631 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004632 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004634 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004635 return(NULL);
4636 }
4637 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004638 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004639 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004640 return(NULL);
4641 }
Owen Taylor3473f882001-02-23 17:55:21 +00004642 GROW;
4643 if (RAW == '?') {
4644 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4645 NEXT;
4646 } else if (RAW == '*') {
4647 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4648 NEXT;
4649 } else if (RAW == '+') {
4650 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4651 NEXT;
4652 } else {
4653 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 GROW;
4656 }
4657 SKIP_BLANKS;
4658 SHRINK;
4659 while (RAW != ')') {
4660 /*
4661 * Each loop we parse one separator and one element.
4662 */
4663 if (RAW == ',') {
4664 if (type == 0) type = CUR;
4665
4666 /*
4667 * Detect "Name | Name , Name" error
4668 */
4669 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004670 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004671 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004672 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004673 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004674 xmlFreeElementContent(last);
4675 if (ret != NULL)
4676 xmlFreeElementContent(ret);
4677 return(NULL);
4678 }
4679 NEXT;
4680
4681 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4682 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004683 if ((last != NULL) && (last != ret))
4684 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 xmlFreeElementContent(ret);
4686 return(NULL);
4687 }
4688 if (last == NULL) {
4689 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004690 if (ret != NULL)
4691 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004692 ret = cur = op;
4693 } else {
4694 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004695 if (op != NULL)
4696 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004698 if (last != NULL)
4699 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004700 cur =op;
4701 last = NULL;
4702 }
4703 } else if (RAW == '|') {
4704 if (type == 0) type = CUR;
4705
4706 /*
4707 * Detect "Name , Name | Name" error
4708 */
4709 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004710 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004711 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004712 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004713 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004714 xmlFreeElementContent(last);
4715 if (ret != NULL)
4716 xmlFreeElementContent(ret);
4717 return(NULL);
4718 }
4719 NEXT;
4720
4721 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4722 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004723 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004724 xmlFreeElementContent(last);
4725 if (ret != NULL)
4726 xmlFreeElementContent(ret);
4727 return(NULL);
4728 }
4729 if (last == NULL) {
4730 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004731 if (ret != NULL)
4732 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004733 ret = cur = op;
4734 } else {
4735 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004736 if (op != NULL)
4737 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004738 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004739 if (last != NULL)
4740 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004741 cur =op;
4742 last = NULL;
4743 }
4744 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004745 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004746 if (ret != NULL)
4747 xmlFreeElementContent(ret);
4748 return(NULL);
4749 }
4750 GROW;
4751 SKIP_BLANKS;
4752 GROW;
4753 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004754 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 /* Recurse on second child */
4756 NEXT;
4757 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004758 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004759 SKIP_BLANKS;
4760 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004761 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004762 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004763 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (ret != NULL)
4765 xmlFreeElementContent(ret);
4766 return(NULL);
4767 }
4768 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004769 if (RAW == '?') {
4770 last->ocur = XML_ELEMENT_CONTENT_OPT;
4771 NEXT;
4772 } else if (RAW == '*') {
4773 last->ocur = XML_ELEMENT_CONTENT_MULT;
4774 NEXT;
4775 } else if (RAW == '+') {
4776 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4777 NEXT;
4778 } else {
4779 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4780 }
4781 }
4782 SKIP_BLANKS;
4783 GROW;
4784 }
4785 if ((cur != NULL) && (last != NULL)) {
4786 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004787 if (last != NULL)
4788 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004789 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004790 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004791 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4792"Element content declaration doesn't start and stop in the same entity\n",
4793 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004794 }
Owen Taylor3473f882001-02-23 17:55:21 +00004795 NEXT;
4796 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004797 if (ret != NULL) {
4798 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4799 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4800 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4801 else
4802 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4803 }
Owen Taylor3473f882001-02-23 17:55:21 +00004804 NEXT;
4805 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004806 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004807 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004808 cur = ret;
4809 /*
4810 * Some normalization:
4811 * (a | b* | c?)* == (a | b | c)*
4812 */
4813 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4814 if ((cur->c1 != NULL) &&
4815 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4816 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4817 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4818 if ((cur->c2 != NULL) &&
4819 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4820 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4821 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4822 cur = cur->c2;
4823 }
4824 }
Owen Taylor3473f882001-02-23 17:55:21 +00004825 NEXT;
4826 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004827 if (ret != NULL) {
4828 int found = 0;
4829
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004830 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4831 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4832 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004833 else
4834 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004835 /*
4836 * Some normalization:
4837 * (a | b*)+ == (a | b)*
4838 * (a | b?)+ == (a | b)*
4839 */
4840 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4841 if ((cur->c1 != NULL) &&
4842 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4843 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4844 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4845 found = 1;
4846 }
4847 if ((cur->c2 != NULL) &&
4848 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4849 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4850 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4851 found = 1;
4852 }
4853 cur = cur->c2;
4854 }
4855 if (found)
4856 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4857 }
Owen Taylor3473f882001-02-23 17:55:21 +00004858 NEXT;
4859 }
4860 return(ret);
4861}
4862
4863/**
4864 * xmlParseElementContentDecl:
4865 * @ctxt: an XML parser context
4866 * @name: the name of the element being defined.
4867 * @result: the Element Content pointer will be stored here if any
4868 *
4869 * parse the declaration for an Element content either Mixed or Children,
4870 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4871 *
4872 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4873 *
4874 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4875 */
4876
4877int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004878xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004879 xmlElementContentPtr *result) {
4880
4881 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004882 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 int res;
4884
4885 *result = NULL;
4886
4887 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004888 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004889 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004890 return(-1);
4891 }
4892 NEXT;
4893 GROW;
4894 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004895 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004897 res = XML_ELEMENT_TYPE_MIXED;
4898 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004900 res = XML_ELEMENT_TYPE_ELEMENT;
4901 }
Owen Taylor3473f882001-02-23 17:55:21 +00004902 SKIP_BLANKS;
4903 *result = tree;
4904 return(res);
4905}
4906
4907/**
4908 * xmlParseElementDecl:
4909 * @ctxt: an XML parser context
4910 *
4911 * parse an Element declaration.
4912 *
4913 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4914 *
4915 * [ VC: Unique Element Type Declaration ]
4916 * No element type may be declared more than once
4917 *
4918 * Returns the type of the element, or -1 in case of error
4919 */
4920int
4921xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004922 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 int ret = -1;
4924 xmlElementContentPtr content = NULL;
4925
4926 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004927 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 xmlParserInputPtr input = ctxt->input;
4929
4930 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004931 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4933 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004934 }
4935 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004936 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004937 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004938 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4939 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004940 return(-1);
4941 }
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004944 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4946 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004947 }
4948 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004949 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004950 SKIP(5);
4951 /*
4952 * Element must always be empty.
4953 */
4954 ret = XML_ELEMENT_TYPE_EMPTY;
4955 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4956 (NXT(2) == 'Y')) {
4957 SKIP(3);
4958 /*
4959 * Element is a generic container.
4960 */
4961 ret = XML_ELEMENT_TYPE_ANY;
4962 } else if (RAW == '(') {
4963 ret = xmlParseElementContentDecl(ctxt, name, &content);
4964 } else {
4965 /*
4966 * [ WFC: PEs in Internal Subset ] error handling.
4967 */
4968 if ((RAW == '%') && (ctxt->external == 0) &&
4969 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004970 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004971 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004972 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004973 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004974 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4975 }
Owen Taylor3473f882001-02-23 17:55:21 +00004976 return(-1);
4977 }
4978
4979 SKIP_BLANKS;
4980 /*
4981 * Pop-up of finished entities.
4982 */
4983 while ((RAW == 0) && (ctxt->inputNr > 1))
4984 xmlPopInput(ctxt);
4985 SKIP_BLANKS;
4986
4987 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004988 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004989 } else {
4990 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004991 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4992 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004993 }
4994
4995 NEXT;
4996 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4997 (ctxt->sax->elementDecl != NULL))
4998 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4999 content);
5000 }
5001 if (content != NULL) {
5002 xmlFreeElementContent(content);
5003 }
Owen Taylor3473f882001-02-23 17:55:21 +00005004 }
5005 return(ret);
5006}
5007
5008/**
Owen Taylor3473f882001-02-23 17:55:21 +00005009 * xmlParseConditionalSections
5010 * @ctxt: an XML parser context
5011 *
5012 * [61] conditionalSect ::= includeSect | ignoreSect
5013 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5014 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5015 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5016 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5017 */
5018
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005019static void
Owen Taylor3473f882001-02-23 17:55:21 +00005020xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5021 SKIP(3);
5022 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005023 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP(7);
5025 SKIP_BLANKS;
5026 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005027 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005028 } else {
5029 NEXT;
5030 }
5031 if (xmlParserDebugEntities) {
5032 if ((ctxt->input != NULL) && (ctxt->input->filename))
5033 xmlGenericError(xmlGenericErrorContext,
5034 "%s(%d): ", ctxt->input->filename,
5035 ctxt->input->line);
5036 xmlGenericError(xmlGenericErrorContext,
5037 "Entering INCLUDE Conditional Section\n");
5038 }
5039
5040 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5041 (NXT(2) != '>'))) {
5042 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005043 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005044
5045 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5046 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005047 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005048 NEXT;
5049 } else if (RAW == '%') {
5050 xmlParsePEReference(ctxt);
5051 } else
5052 xmlParseMarkupDecl(ctxt);
5053
5054 /*
5055 * Pop-up of finished entities.
5056 */
5057 while ((RAW == 0) && (ctxt->inputNr > 1))
5058 xmlPopInput(ctxt);
5059
Daniel Veillardfdc91562002-07-01 21:52:03 +00005060 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005062 break;
5063 }
5064 }
5065 if (xmlParserDebugEntities) {
5066 if ((ctxt->input != NULL) && (ctxt->input->filename))
5067 xmlGenericError(xmlGenericErrorContext,
5068 "%s(%d): ", ctxt->input->filename,
5069 ctxt->input->line);
5070 xmlGenericError(xmlGenericErrorContext,
5071 "Leaving INCLUDE Conditional Section\n");
5072 }
5073
Daniel Veillarda07050d2003-10-19 14:46:32 +00005074 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005075 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005076 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005077 int depth = 0;
5078
5079 SKIP(6);
5080 SKIP_BLANKS;
5081 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005082 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005083 } else {
5084 NEXT;
5085 }
5086 if (xmlParserDebugEntities) {
5087 if ((ctxt->input != NULL) && (ctxt->input->filename))
5088 xmlGenericError(xmlGenericErrorContext,
5089 "%s(%d): ", ctxt->input->filename,
5090 ctxt->input->line);
5091 xmlGenericError(xmlGenericErrorContext,
5092 "Entering IGNORE Conditional Section\n");
5093 }
5094
5095 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005096 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005097 * But disable SAX event generating DTD building in the meantime
5098 */
5099 state = ctxt->disableSAX;
5100 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005101 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005102 ctxt->instate = XML_PARSER_IGNORE;
5103
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005104 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005105 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5106 depth++;
5107 SKIP(3);
5108 continue;
5109 }
5110 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5111 if (--depth >= 0) SKIP(3);
5112 continue;
5113 }
5114 NEXT;
5115 continue;
5116 }
5117
5118 ctxt->disableSAX = state;
5119 ctxt->instate = instate;
5120
5121 if (xmlParserDebugEntities) {
5122 if ((ctxt->input != NULL) && (ctxt->input->filename))
5123 xmlGenericError(xmlGenericErrorContext,
5124 "%s(%d): ", ctxt->input->filename,
5125 ctxt->input->line);
5126 xmlGenericError(xmlGenericErrorContext,
5127 "Leaving IGNORE Conditional Section\n");
5128 }
5129
5130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005131 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005132 }
5133
5134 if (RAW == 0)
5135 SHRINK;
5136
5137 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 } else {
5140 SKIP(3);
5141 }
5142}
5143
5144/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005145 * xmlParseMarkupDecl:
5146 * @ctxt: an XML parser context
5147 *
5148 * parse Markup declarations
5149 *
5150 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5151 * NotationDecl | PI | Comment
5152 *
5153 * [ VC: Proper Declaration/PE Nesting ]
5154 * Parameter-entity replacement text must be properly nested with
5155 * markup declarations. That is to say, if either the first character
5156 * or the last character of a markup declaration (markupdecl above) is
5157 * contained in the replacement text for a parameter-entity reference,
5158 * both must be contained in the same replacement text.
5159 *
5160 * [ WFC: PEs in Internal Subset ]
5161 * In the internal DTD subset, parameter-entity references can occur
5162 * only where markup declarations can occur, not within markup declarations.
5163 * (This does not apply to references that occur in external parameter
5164 * entities or to the external subset.)
5165 */
5166void
5167xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5168 GROW;
5169 xmlParseElementDecl(ctxt);
5170 xmlParseAttributeListDecl(ctxt);
5171 xmlParseEntityDecl(ctxt);
5172 xmlParseNotationDecl(ctxt);
5173 xmlParsePI(ctxt);
5174 xmlParseComment(ctxt);
5175 /*
5176 * This is only for internal subset. On external entities,
5177 * the replacement is done before parsing stage
5178 */
5179 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5180 xmlParsePEReference(ctxt);
5181
5182 /*
5183 * Conditional sections are allowed from entities included
5184 * by PE References in the internal subset.
5185 */
5186 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5187 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5188 xmlParseConditionalSections(ctxt);
5189 }
5190 }
5191
5192 ctxt->instate = XML_PARSER_DTD;
5193}
5194
5195/**
5196 * xmlParseTextDecl:
5197 * @ctxt: an XML parser context
5198 *
5199 * parse an XML declaration header for external entities
5200 *
5201 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5202 *
5203 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5204 */
5205
5206void
5207xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5208 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005209 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005210
5211 /*
5212 * We know that '<?xml' is here.
5213 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005214 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005215 SKIP(5);
5216 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005217 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005218 return;
5219 }
5220
William M. Brack76e95df2003-10-18 16:20:14 +00005221 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005222 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5223 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005224 }
5225 SKIP_BLANKS;
5226
5227 /*
5228 * We may have the VersionInfo here.
5229 */
5230 version = xmlParseVersionInfo(ctxt);
5231 if (version == NULL)
5232 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005233 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005234 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005235 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5236 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005237 }
5238 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005239 ctxt->input->version = version;
5240
5241 /*
5242 * We must have the encoding declaration
5243 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005244 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005245 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5246 /*
5247 * The XML REC instructs us to stop parsing right here
5248 */
5249 return;
5250 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005251 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5252 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5253 "Missing encoding in text declaration\n");
5254 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005255
5256 SKIP_BLANKS;
5257 if ((RAW == '?') && (NXT(1) == '>')) {
5258 SKIP(2);
5259 } else if (RAW == '>') {
5260 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005261 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005262 NEXT;
5263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005264 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005265 MOVETO_ENDTAG(CUR_PTR);
5266 NEXT;
5267 }
5268}
5269
5270/**
Owen Taylor3473f882001-02-23 17:55:21 +00005271 * xmlParseExternalSubset:
5272 * @ctxt: an XML parser context
5273 * @ExternalID: the external identifier
5274 * @SystemID: the system identifier (or URL)
5275 *
5276 * parse Markup declarations from an external subset
5277 *
5278 * [30] extSubset ::= textDecl? extSubsetDecl
5279 *
5280 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5281 */
5282void
5283xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5284 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005285 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005287 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005288 xmlParseTextDecl(ctxt);
5289 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5290 /*
5291 * The XML REC instructs us to stop parsing right here
5292 */
5293 ctxt->instate = XML_PARSER_EOF;
5294 return;
5295 }
5296 }
5297 if (ctxt->myDoc == NULL) {
5298 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5299 }
5300 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5301 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5302
5303 ctxt->instate = XML_PARSER_DTD;
5304 ctxt->external = 1;
5305 while (((RAW == '<') && (NXT(1) == '?')) ||
5306 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005307 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005308 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005309 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005310
5311 GROW;
5312 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5313 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005314 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005315 NEXT;
5316 } else if (RAW == '%') {
5317 xmlParsePEReference(ctxt);
5318 } else
5319 xmlParseMarkupDecl(ctxt);
5320
5321 /*
5322 * Pop-up of finished entities.
5323 */
5324 while ((RAW == 0) && (ctxt->inputNr > 1))
5325 xmlPopInput(ctxt);
5326
Daniel Veillardfdc91562002-07-01 21:52:03 +00005327 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005328 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005329 break;
5330 }
5331 }
5332
5333 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005335 }
5336
5337}
5338
5339/**
5340 * xmlParseReference:
5341 * @ctxt: an XML parser context
5342 *
5343 * parse and handle entity references in content, depending on the SAX
5344 * interface, this may end-up in a call to character() if this is a
5345 * CharRef, a predefined entity, if there is no reference() callback.
5346 * or if the parser was asked to switch to that mode.
5347 *
5348 * [67] Reference ::= EntityRef | CharRef
5349 */
5350void
5351xmlParseReference(xmlParserCtxtPtr ctxt) {
5352 xmlEntityPtr ent;
5353 xmlChar *val;
5354 if (RAW != '&') return;
5355
5356 if (NXT(1) == '#') {
5357 int i = 0;
5358 xmlChar out[10];
5359 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005360 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005361
5362 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5363 /*
5364 * So we are using non-UTF-8 buffers
5365 * Check that the char fit on 8bits, if not
5366 * generate a CharRef.
5367 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005368 if (value <= 0xFF) {
5369 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005370 out[1] = 0;
5371 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5372 (!ctxt->disableSAX))
5373 ctxt->sax->characters(ctxt->userData, out, 1);
5374 } else {
5375 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005376 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005377 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005378 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5380 (!ctxt->disableSAX))
5381 ctxt->sax->reference(ctxt->userData, out);
5382 }
5383 } else {
5384 /*
5385 * Just encode the value in UTF-8
5386 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005387 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005388 out[i] = 0;
5389 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5390 (!ctxt->disableSAX))
5391 ctxt->sax->characters(ctxt->userData, out, i);
5392 }
5393 } else {
5394 ent = xmlParseEntityRef(ctxt);
5395 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005396 if (!ctxt->wellFormed)
5397 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005398 if ((ent->name != NULL) &&
5399 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5400 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005401 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005402
5403
5404 /*
5405 * The first reference to the entity trigger a parsing phase
5406 * where the ent->children is filled with the result from
5407 * the parsing.
5408 */
5409 if (ent->children == NULL) {
5410 xmlChar *value;
5411 value = ent->content;
5412
5413 /*
5414 * Check that this entity is well formed
5415 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005416 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005417 (value[1] == 0) && (value[0] == '<') &&
5418 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5419 /*
5420 * DONE: get definite answer on this !!!
5421 * Lots of entity decls are used to declare a single
5422 * char
5423 * <!ENTITY lt "<">
5424 * Which seems to be valid since
5425 * 2.4: The ampersand character (&) and the left angle
5426 * bracket (<) may appear in their literal form only
5427 * when used ... They are also legal within the literal
5428 * entity value of an internal entity declaration;i
5429 * see "4.3.2 Well-Formed Parsed Entities".
5430 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5431 * Looking at the OASIS test suite and James Clark
5432 * tests, this is broken. However the XML REC uses
5433 * it. Is the XML REC not well-formed ????
5434 * This is a hack to avoid this problem
5435 *
5436 * ANSWER: since lt gt amp .. are already defined,
5437 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005438 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005439 * is lousy but acceptable.
5440 */
5441 list = xmlNewDocText(ctxt->myDoc, value);
5442 if (list != NULL) {
5443 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5444 (ent->children == NULL)) {
5445 ent->children = list;
5446 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005447 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005448 list->parent = (xmlNodePtr) ent;
5449 } else {
5450 xmlFreeNodeList(list);
5451 }
5452 } else if (list != NULL) {
5453 xmlFreeNodeList(list);
5454 }
5455 } else {
5456 /*
5457 * 4.3.2: An internal general parsed entity is well-formed
5458 * if its replacement text matches the production labeled
5459 * content.
5460 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005461
5462 void *user_data;
5463 /*
5464 * This is a bit hackish but this seems the best
5465 * way to make sure both SAX and DOM entity support
5466 * behaves okay.
5467 */
5468 if (ctxt->userData == ctxt)
5469 user_data = NULL;
5470 else
5471 user_data = ctxt->userData;
5472
Owen Taylor3473f882001-02-23 17:55:21 +00005473 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5474 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005475 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5476 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005477 ctxt->depth--;
5478 } else if (ent->etype ==
5479 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5480 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005481 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005482 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005483 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 ctxt->depth--;
5485 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005486 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005487 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5488 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005491 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005492 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005493 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005494 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5495 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005496 (ent->children == NULL)) {
5497 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005498 if (ctxt->replaceEntities) {
5499 /*
5500 * Prune it directly in the generated document
5501 * except for single text nodes.
5502 */
5503 if ((list->type == XML_TEXT_NODE) &&
5504 (list->next == NULL)) {
5505 list->parent = (xmlNodePtr) ent;
5506 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005507 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005508 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005509 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005510 while (list != NULL) {
5511 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005512 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005513 if (list->next == NULL)
5514 ent->last = list;
5515 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005516 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005517 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005518#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005519 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5520 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005521#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005522 }
5523 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005524 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005525 while (list != NULL) {
5526 list->parent = (xmlNodePtr) ent;
5527 if (list->next == NULL)
5528 ent->last = list;
5529 list = list->next;
5530 }
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
5532 } else {
5533 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005534 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005535 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005536 } else if ((ret != XML_ERR_OK) &&
5537 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005538 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005539 } else if (list != NULL) {
5540 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005541 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005542 }
5543 }
5544 }
5545 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5546 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5547 /*
5548 * Create a node.
5549 */
5550 ctxt->sax->reference(ctxt->userData, ent->name);
5551 return;
5552 } else if (ctxt->replaceEntities) {
5553 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5554 /*
5555 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005557 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005558 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005559 if ((list == NULL) && (ent->owner == 0)) {
5560 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005561 cur = ent->children;
5562 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005563 nw = xmlCopyNode(cur, 1);
5564 if (nw != NULL) {
5565 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005566 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005567 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005568 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005569 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005570 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005571 if (cur == ent->last)
5572 break;
5573 cur = cur->next;
5574 }
Daniel Veillard81273902003-09-30 00:43:48 +00005575#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005576 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005577 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005578#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005579 } else if (list == NULL) {
5580 xmlNodePtr nw = NULL, cur, next, last,
5581 firstChild = NULL;
5582 /*
5583 * Copy the entity child list and make it the new
5584 * entity child list. The goal is to make sure any
5585 * ID or REF referenced will be the one from the
5586 * document content and not the entity copy.
5587 */
5588 cur = ent->children;
5589 ent->children = NULL;
5590 last = ent->last;
5591 ent->last = NULL;
5592 while (cur != NULL) {
5593 next = cur->next;
5594 cur->next = NULL;
5595 cur->parent = NULL;
5596 nw = xmlCopyNode(cur, 1);
5597 if (nw != NULL) {
5598 nw->_private = cur->_private;
5599 if (firstChild == NULL){
5600 firstChild = cur;
5601 }
5602 xmlAddChild((xmlNodePtr) ent, nw);
5603 xmlAddChild(ctxt->node, cur);
5604 }
5605 if (cur == last)
5606 break;
5607 cur = next;
5608 }
5609 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005610#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005611 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5612 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005613#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005614 } else {
5615 /*
5616 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005617 * node with a possible previous text one which
5618 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 */
5620 if (ent->children->type == XML_TEXT_NODE)
5621 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5622 if ((ent->last != ent->children) &&
5623 (ent->last->type == XML_TEXT_NODE))
5624 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5625 xmlAddChildList(ctxt->node, ent->children);
5626 }
5627
Owen Taylor3473f882001-02-23 17:55:21 +00005628 /*
5629 * This is to avoid a nasty side effect, see
5630 * characters() in SAX.c
5631 */
5632 ctxt->nodemem = 0;
5633 ctxt->nodelen = 0;
5634 return;
5635 } else {
5636 /*
5637 * Probably running in SAX mode
5638 */
5639 xmlParserInputPtr input;
5640
5641 input = xmlNewEntityInputStream(ctxt, ent);
5642 xmlPushInput(ctxt, input);
5643 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005644 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5645 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005646 xmlParseTextDecl(ctxt);
5647 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5648 /*
5649 * The XML REC instructs us to stop parsing right here
5650 */
5651 ctxt->instate = XML_PARSER_EOF;
5652 return;
5653 }
5654 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005655 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5656 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005657 }
5658 }
5659 return;
5660 }
5661 }
5662 } else {
5663 val = ent->content;
5664 if (val == NULL) return;
5665 /*
5666 * inline the entity.
5667 */
5668 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5669 (!ctxt->disableSAX))
5670 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5671 }
5672 }
5673}
5674
5675/**
5676 * xmlParseEntityRef:
5677 * @ctxt: an XML parser context
5678 *
5679 * parse ENTITY references declarations
5680 *
5681 * [68] EntityRef ::= '&' Name ';'
5682 *
5683 * [ WFC: Entity Declared ]
5684 * In a document without any DTD, a document with only an internal DTD
5685 * subset which contains no parameter entity references, or a document
5686 * with "standalone='yes'", the Name given in the entity reference
5687 * must match that in an entity declaration, except that well-formed
5688 * documents need not declare any of the following entities: amp, lt,
5689 * gt, apos, quot. The declaration of a parameter entity must precede
5690 * any reference to it. Similarly, the declaration of a general entity
5691 * must precede any reference to it which appears in a default value in an
5692 * attribute-list declaration. Note that if entities are declared in the
5693 * external subset or in external parameter entities, a non-validating
5694 * processor is not obligated to read and process their declarations;
5695 * for such documents, the rule that an entity must be declared is a
5696 * well-formedness constraint only if standalone='yes'.
5697 *
5698 * [ WFC: Parsed Entity ]
5699 * An entity reference must not contain the name of an unparsed entity
5700 *
5701 * Returns the xmlEntityPtr if found, or NULL otherwise.
5702 */
5703xmlEntityPtr
5704xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005705 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005706 xmlEntityPtr ent = NULL;
5707
5708 GROW;
5709
5710 if (RAW == '&') {
5711 NEXT;
5712 name = xmlParseName(ctxt);
5713 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005714 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5715 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005716 } else {
5717 if (RAW == ';') {
5718 NEXT;
5719 /*
5720 * Ask first SAX for entity resolution, otherwise try the
5721 * predefined set.
5722 */
5723 if (ctxt->sax != NULL) {
5724 if (ctxt->sax->getEntity != NULL)
5725 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005726 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005727 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005728 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5729 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005730 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 }
Owen Taylor3473f882001-02-23 17:55:21 +00005732 }
5733 /*
5734 * [ WFC: Entity Declared ]
5735 * In a document without any DTD, a document with only an
5736 * internal DTD subset which contains no parameter entity
5737 * references, or a document with "standalone='yes'", the
5738 * Name given in the entity reference must match that in an
5739 * entity declaration, except that well-formed documents
5740 * need not declare any of the following entities: amp, lt,
5741 * gt, apos, quot.
5742 * The declaration of a parameter entity must precede any
5743 * reference to it.
5744 * Similarly, the declaration of a general entity must
5745 * precede any reference to it which appears in a default
5746 * value in an attribute-list declaration. Note that if
5747 * entities are declared in the external subset or in
5748 * external parameter entities, a non-validating processor
5749 * is not obligated to read and process their declarations;
5750 * for such documents, the rule that an entity must be
5751 * declared is a well-formedness constraint only if
5752 * standalone='yes'.
5753 */
5754 if (ent == NULL) {
5755 if ((ctxt->standalone == 1) ||
5756 ((ctxt->hasExternalSubset == 0) &&
5757 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005758 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005759 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005761 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005762 "Entity '%s' not defined\n", name);
5763 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005764 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005765 }
5766
5767 /*
5768 * [ WFC: Parsed Entity ]
5769 * An entity reference must not contain the name of an
5770 * unparsed entity
5771 */
5772 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005773 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005774 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776
5777 /*
5778 * [ WFC: No External Entity References ]
5779 * Attribute values cannot contain direct or indirect
5780 * entity references to external entities.
5781 */
5782 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5783 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005784 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5785 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005786 }
5787 /*
5788 * [ WFC: No < in Attribute Values ]
5789 * The replacement text of any entity referred to directly or
5790 * indirectly in an attribute value (other than "&lt;") must
5791 * not contain a <.
5792 */
5793 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5794 (ent != NULL) &&
5795 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5796 (ent->content != NULL) &&
5797 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005798 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005799 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801
5802 /*
5803 * Internal check, no parameter entities here ...
5804 */
5805 else {
5806 switch (ent->etype) {
5807 case XML_INTERNAL_PARAMETER_ENTITY:
5808 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005809 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5810 "Attempt to reference the parameter entity '%s'\n",
5811 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005812 break;
5813 default:
5814 break;
5815 }
5816 }
5817
5818 /*
5819 * [ WFC: No Recursion ]
5820 * A parsed entity must not contain a recursive reference
5821 * to itself, either directly or indirectly.
5822 * Done somewhere else
5823 */
5824
5825 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005826 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005827 }
Owen Taylor3473f882001-02-23 17:55:21 +00005828 }
5829 }
5830 return(ent);
5831}
5832
5833/**
5834 * xmlParseStringEntityRef:
5835 * @ctxt: an XML parser context
5836 * @str: a pointer to an index in the string
5837 *
5838 * parse ENTITY references declarations, but this version parses it from
5839 * a string value.
5840 *
5841 * [68] EntityRef ::= '&' Name ';'
5842 *
5843 * [ WFC: Entity Declared ]
5844 * In a document without any DTD, a document with only an internal DTD
5845 * subset which contains no parameter entity references, or a document
5846 * with "standalone='yes'", the Name given in the entity reference
5847 * must match that in an entity declaration, except that well-formed
5848 * documents need not declare any of the following entities: amp, lt,
5849 * gt, apos, quot. The declaration of a parameter entity must precede
5850 * any reference to it. Similarly, the declaration of a general entity
5851 * must precede any reference to it which appears in a default value in an
5852 * attribute-list declaration. Note that if entities are declared in the
5853 * external subset or in external parameter entities, a non-validating
5854 * processor is not obligated to read and process their declarations;
5855 * for such documents, the rule that an entity must be declared is a
5856 * well-formedness constraint only if standalone='yes'.
5857 *
5858 * [ WFC: Parsed Entity ]
5859 * An entity reference must not contain the name of an unparsed entity
5860 *
5861 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5862 * is updated to the current location in the string.
5863 */
5864xmlEntityPtr
5865xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5866 xmlChar *name;
5867 const xmlChar *ptr;
5868 xmlChar cur;
5869 xmlEntityPtr ent = NULL;
5870
5871 if ((str == NULL) || (*str == NULL))
5872 return(NULL);
5873 ptr = *str;
5874 cur = *ptr;
5875 if (cur == '&') {
5876 ptr++;
5877 cur = *ptr;
5878 name = xmlParseStringName(ctxt, &ptr);
5879 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005880 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5881 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005882 } else {
5883 if (*ptr == ';') {
5884 ptr++;
5885 /*
5886 * Ask first SAX for entity resolution, otherwise try the
5887 * predefined set.
5888 */
5889 if (ctxt->sax != NULL) {
5890 if (ctxt->sax->getEntity != NULL)
5891 ent = ctxt->sax->getEntity(ctxt->userData, name);
5892 if (ent == NULL)
5893 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005894 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005895 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005896 }
Owen Taylor3473f882001-02-23 17:55:21 +00005897 }
5898 /*
5899 * [ WFC: Entity Declared ]
5900 * In a document without any DTD, a document with only an
5901 * internal DTD subset which contains no parameter entity
5902 * references, or a document with "standalone='yes'", the
5903 * Name given in the entity reference must match that in an
5904 * entity declaration, except that well-formed documents
5905 * need not declare any of the following entities: amp, lt,
5906 * gt, apos, quot.
5907 * The declaration of a parameter entity must precede any
5908 * reference to it.
5909 * Similarly, the declaration of a general entity must
5910 * precede any reference to it which appears in a default
5911 * value in an attribute-list declaration. Note that if
5912 * entities are declared in the external subset or in
5913 * external parameter entities, a non-validating processor
5914 * is not obligated to read and process their declarations;
5915 * for such documents, the rule that an entity must be
5916 * declared is a well-formedness constraint only if
5917 * standalone='yes'.
5918 */
5919 if (ent == NULL) {
5920 if ((ctxt->standalone == 1) ||
5921 ((ctxt->hasExternalSubset == 0) &&
5922 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005923 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005924 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005926 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005927 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005928 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005929 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005930 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005931 }
5932
5933 /*
5934 * [ WFC: Parsed Entity ]
5935 * An entity reference must not contain the name of an
5936 * unparsed entity
5937 */
5938 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005939 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005940 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005941 }
5942
5943 /*
5944 * [ WFC: No External Entity References ]
5945 * Attribute values cannot contain direct or indirect
5946 * entity references to external entities.
5947 */
5948 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5949 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005950 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005951 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005952 }
5953 /*
5954 * [ WFC: No < in Attribute Values ]
5955 * The replacement text of any entity referred to directly or
5956 * indirectly in an attribute value (other than "&lt;") must
5957 * not contain a <.
5958 */
5959 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5960 (ent != NULL) &&
5961 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5962 (ent->content != NULL) &&
5963 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005964 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5965 "'<' in entity '%s' is not allowed in attributes values\n",
5966 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005967 }
5968
5969 /*
5970 * Internal check, no parameter entities here ...
5971 */
5972 else {
5973 switch (ent->etype) {
5974 case XML_INTERNAL_PARAMETER_ENTITY:
5975 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005976 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5977 "Attempt to reference the parameter entity '%s'\n",
5978 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 break;
5980 default:
5981 break;
5982 }
5983 }
5984
5985 /*
5986 * [ WFC: No Recursion ]
5987 * A parsed entity must not contain a recursive reference
5988 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005989 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005990 */
5991
5992 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005993 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005994 }
5995 xmlFree(name);
5996 }
5997 }
5998 *str = ptr;
5999 return(ent);
6000}
6001
6002/**
6003 * xmlParsePEReference:
6004 * @ctxt: an XML parser context
6005 *
6006 * parse PEReference declarations
6007 * The entity content is handled directly by pushing it's content as
6008 * a new input stream.
6009 *
6010 * [69] PEReference ::= '%' Name ';'
6011 *
6012 * [ WFC: No Recursion ]
6013 * A parsed entity must not contain a recursive
6014 * reference to itself, either directly or indirectly.
6015 *
6016 * [ WFC: Entity Declared ]
6017 * In a document without any DTD, a document with only an internal DTD
6018 * subset which contains no parameter entity references, or a document
6019 * with "standalone='yes'", ... ... The declaration of a parameter
6020 * entity must precede any reference to it...
6021 *
6022 * [ VC: Entity Declared ]
6023 * In a document with an external subset or external parameter entities
6024 * with "standalone='no'", ... ... The declaration of a parameter entity
6025 * must precede any reference to it...
6026 *
6027 * [ WFC: In DTD ]
6028 * Parameter-entity references may only appear in the DTD.
6029 * NOTE: misleading but this is handled.
6030 */
6031void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006032xmlParsePEReference(xmlParserCtxtPtr ctxt)
6033{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006034 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006035 xmlEntityPtr entity = NULL;
6036 xmlParserInputPtr input;
6037
6038 if (RAW == '%') {
6039 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006040 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006041 if (name == NULL) {
6042 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6043 "xmlParsePEReference: no name\n");
6044 } else {
6045 if (RAW == ';') {
6046 NEXT;
6047 if ((ctxt->sax != NULL) &&
6048 (ctxt->sax->getParameterEntity != NULL))
6049 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6050 name);
6051 if (entity == NULL) {
6052 /*
6053 * [ WFC: Entity Declared ]
6054 * In a document without any DTD, a document with only an
6055 * internal DTD subset which contains no parameter entity
6056 * references, or a document with "standalone='yes'", ...
6057 * ... The declaration of a parameter entity must precede
6058 * any reference to it...
6059 */
6060 if ((ctxt->standalone == 1) ||
6061 ((ctxt->hasExternalSubset == 0) &&
6062 (ctxt->hasPErefs == 0))) {
6063 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6064 "PEReference: %%%s; not found\n",
6065 name);
6066 } else {
6067 /*
6068 * [ VC: Entity Declared ]
6069 * In a document with an external subset or external
6070 * parameter entities with "standalone='no'", ...
6071 * ... The declaration of a parameter entity must
6072 * precede any reference to it...
6073 */
6074 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6075 "PEReference: %%%s; not found\n",
6076 name, NULL);
6077 ctxt->valid = 0;
6078 }
6079 } else {
6080 /*
6081 * Internal checking in case the entity quest barfed
6082 */
6083 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6084 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6085 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6086 "Internal: %%%s; is not a parameter entity\n",
6087 name, NULL);
6088 } else if (ctxt->input->free != deallocblankswrapper) {
6089 input =
6090 xmlNewBlanksWrapperInputStream(ctxt, entity);
6091 xmlPushInput(ctxt, input);
6092 } else {
6093 /*
6094 * TODO !!!
6095 * handle the extra spaces added before and after
6096 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6097 */
6098 input = xmlNewEntityInputStream(ctxt, entity);
6099 xmlPushInput(ctxt, input);
6100 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006101 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006102 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006103 xmlParseTextDecl(ctxt);
6104 if (ctxt->errNo ==
6105 XML_ERR_UNSUPPORTED_ENCODING) {
6106 /*
6107 * The XML REC instructs us to stop parsing
6108 * right here
6109 */
6110 ctxt->instate = XML_PARSER_EOF;
6111 return;
6112 }
6113 }
6114 }
6115 }
6116 ctxt->hasPErefs = 1;
6117 } else {
6118 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6119 }
6120 }
Owen Taylor3473f882001-02-23 17:55:21 +00006121 }
6122}
6123
6124/**
6125 * xmlParseStringPEReference:
6126 * @ctxt: an XML parser context
6127 * @str: a pointer to an index in the string
6128 *
6129 * parse PEReference declarations
6130 *
6131 * [69] PEReference ::= '%' Name ';'
6132 *
6133 * [ WFC: No Recursion ]
6134 * A parsed entity must not contain a recursive
6135 * reference to itself, either directly or indirectly.
6136 *
6137 * [ WFC: Entity Declared ]
6138 * In a document without any DTD, a document with only an internal DTD
6139 * subset which contains no parameter entity references, or a document
6140 * with "standalone='yes'", ... ... The declaration of a parameter
6141 * entity must precede any reference to it...
6142 *
6143 * [ VC: Entity Declared ]
6144 * In a document with an external subset or external parameter entities
6145 * with "standalone='no'", ... ... The declaration of a parameter entity
6146 * must precede any reference to it...
6147 *
6148 * [ WFC: In DTD ]
6149 * Parameter-entity references may only appear in the DTD.
6150 * NOTE: misleading but this is handled.
6151 *
6152 * Returns the string of the entity content.
6153 * str is updated to the current value of the index
6154 */
6155xmlEntityPtr
6156xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6157 const xmlChar *ptr;
6158 xmlChar cur;
6159 xmlChar *name;
6160 xmlEntityPtr entity = NULL;
6161
6162 if ((str == NULL) || (*str == NULL)) return(NULL);
6163 ptr = *str;
6164 cur = *ptr;
6165 if (cur == '%') {
6166 ptr++;
6167 cur = *ptr;
6168 name = xmlParseStringName(ctxt, &ptr);
6169 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006170 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6171 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006172 } else {
6173 cur = *ptr;
6174 if (cur == ';') {
6175 ptr++;
6176 cur = *ptr;
6177 if ((ctxt->sax != NULL) &&
6178 (ctxt->sax->getParameterEntity != NULL))
6179 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6180 name);
6181 if (entity == NULL) {
6182 /*
6183 * [ WFC: Entity Declared ]
6184 * In a document without any DTD, a document with only an
6185 * internal DTD subset which contains no parameter entity
6186 * references, or a document with "standalone='yes'", ...
6187 * ... The declaration of a parameter entity must precede
6188 * any reference to it...
6189 */
6190 if ((ctxt->standalone == 1) ||
6191 ((ctxt->hasExternalSubset == 0) &&
6192 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006193 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006194 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006195 } else {
6196 /*
6197 * [ VC: Entity Declared ]
6198 * In a document with an external subset or external
6199 * parameter entities with "standalone='no'", ...
6200 * ... The declaration of a parameter entity must
6201 * precede any reference to it...
6202 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006203 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6204 "PEReference: %%%s; not found\n",
6205 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006206 ctxt->valid = 0;
6207 }
6208 } else {
6209 /*
6210 * Internal checking in case the entity quest barfed
6211 */
6212 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6213 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006214 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6215 "%%%s; is not a parameter entity\n",
6216 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006217 }
6218 }
6219 ctxt->hasPErefs = 1;
6220 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006221 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 }
6223 xmlFree(name);
6224 }
6225 }
6226 *str = ptr;
6227 return(entity);
6228}
6229
6230/**
6231 * xmlParseDocTypeDecl:
6232 * @ctxt: an XML parser context
6233 *
6234 * parse a DOCTYPE declaration
6235 *
6236 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6237 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6238 *
6239 * [ VC: Root Element Type ]
6240 * The Name in the document type declaration must match the element
6241 * type of the root element.
6242 */
6243
6244void
6245xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006246 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006247 xmlChar *ExternalID = NULL;
6248 xmlChar *URI = NULL;
6249
6250 /*
6251 * We know that '<!DOCTYPE' has been detected.
6252 */
6253 SKIP(9);
6254
6255 SKIP_BLANKS;
6256
6257 /*
6258 * Parse the DOCTYPE name.
6259 */
6260 name = xmlParseName(ctxt);
6261 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006262 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6263 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006264 }
6265 ctxt->intSubName = name;
6266
6267 SKIP_BLANKS;
6268
6269 /*
6270 * Check for SystemID and ExternalID
6271 */
6272 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6273
6274 if ((URI != NULL) || (ExternalID != NULL)) {
6275 ctxt->hasExternalSubset = 1;
6276 }
6277 ctxt->extSubURI = URI;
6278 ctxt->extSubSystem = ExternalID;
6279
6280 SKIP_BLANKS;
6281
6282 /*
6283 * Create and update the internal subset.
6284 */
6285 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6286 (!ctxt->disableSAX))
6287 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6288
6289 /*
6290 * Is there any internal subset declarations ?
6291 * they are handled separately in xmlParseInternalSubset()
6292 */
6293 if (RAW == '[')
6294 return;
6295
6296 /*
6297 * We should be at the end of the DOCTYPE declaration.
6298 */
6299 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006300 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 }
6302 NEXT;
6303}
6304
6305/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006306 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006307 * @ctxt: an XML parser context
6308 *
6309 * parse the internal subset declaration
6310 *
6311 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6312 */
6313
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006314static void
Owen Taylor3473f882001-02-23 17:55:21 +00006315xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6316 /*
6317 * Is there any DTD definition ?
6318 */
6319 if (RAW == '[') {
6320 ctxt->instate = XML_PARSER_DTD;
6321 NEXT;
6322 /*
6323 * Parse the succession of Markup declarations and
6324 * PEReferences.
6325 * Subsequence (markupdecl | PEReference | S)*
6326 */
6327 while (RAW != ']') {
6328 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006329 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006330
6331 SKIP_BLANKS;
6332 xmlParseMarkupDecl(ctxt);
6333 xmlParsePEReference(ctxt);
6334
6335 /*
6336 * Pop-up of finished entities.
6337 */
6338 while ((RAW == 0) && (ctxt->inputNr > 1))
6339 xmlPopInput(ctxt);
6340
6341 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006342 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006343 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006344 break;
6345 }
6346 }
6347 if (RAW == ']') {
6348 NEXT;
6349 SKIP_BLANKS;
6350 }
6351 }
6352
6353 /*
6354 * We should be at the end of the DOCTYPE declaration.
6355 */
6356 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006357 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 }
6359 NEXT;
6360}
6361
Daniel Veillard81273902003-09-30 00:43:48 +00006362#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006363/**
6364 * xmlParseAttribute:
6365 * @ctxt: an XML parser context
6366 * @value: a xmlChar ** used to store the value of the attribute
6367 *
6368 * parse an attribute
6369 *
6370 * [41] Attribute ::= Name Eq AttValue
6371 *
6372 * [ WFC: No External Entity References ]
6373 * Attribute values cannot contain direct or indirect entity references
6374 * to external entities.
6375 *
6376 * [ WFC: No < in Attribute Values ]
6377 * The replacement text of any entity referred to directly or indirectly in
6378 * an attribute value (other than "&lt;") must not contain a <.
6379 *
6380 * [ VC: Attribute Value Type ]
6381 * The attribute must have been declared; the value must be of the type
6382 * declared for it.
6383 *
6384 * [25] Eq ::= S? '=' S?
6385 *
6386 * With namespace:
6387 *
6388 * [NS 11] Attribute ::= QName Eq AttValue
6389 *
6390 * Also the case QName == xmlns:??? is handled independently as a namespace
6391 * definition.
6392 *
6393 * Returns the attribute name, and the value in *value.
6394 */
6395
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006396const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006397xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006398 const xmlChar *name;
6399 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006400
6401 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006402 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006403 name = xmlParseName(ctxt);
6404 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006405 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006406 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006407 return(NULL);
6408 }
6409
6410 /*
6411 * read the value
6412 */
6413 SKIP_BLANKS;
6414 if (RAW == '=') {
6415 NEXT;
6416 SKIP_BLANKS;
6417 val = xmlParseAttValue(ctxt);
6418 ctxt->instate = XML_PARSER_CONTENT;
6419 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006420 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006421 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 return(NULL);
6423 }
6424
6425 /*
6426 * Check that xml:lang conforms to the specification
6427 * No more registered as an error, just generate a warning now
6428 * since this was deprecated in XML second edition
6429 */
6430 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6431 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006432 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6433 "Malformed value for xml:lang : %s\n",
6434 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006435 }
6436 }
6437
6438 /*
6439 * Check that xml:space conforms to the specification
6440 */
6441 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6442 if (xmlStrEqual(val, BAD_CAST "default"))
6443 *(ctxt->space) = 0;
6444 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6445 *(ctxt->space) = 1;
6446 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006447 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006448"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006449 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 }
6451 }
6452
6453 *value = val;
6454 return(name);
6455}
6456
6457/**
6458 * xmlParseStartTag:
6459 * @ctxt: an XML parser context
6460 *
6461 * parse a start of tag either for rule element or
6462 * EmptyElement. In both case we don't parse the tag closing chars.
6463 *
6464 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6465 *
6466 * [ WFC: Unique Att Spec ]
6467 * No attribute name may appear more than once in the same start-tag or
6468 * empty-element tag.
6469 *
6470 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6471 *
6472 * [ WFC: Unique Att Spec ]
6473 * No attribute name may appear more than once in the same start-tag or
6474 * empty-element tag.
6475 *
6476 * With namespace:
6477 *
6478 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6479 *
6480 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6481 *
6482 * Returns the element name parsed
6483 */
6484
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006485const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006486xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006487 const xmlChar *name;
6488 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006489 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006490 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006491 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006492 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006493 int i;
6494
6495 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006496 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006497
6498 name = xmlParseName(ctxt);
6499 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006500 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006501 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006502 return(NULL);
6503 }
6504
6505 /*
6506 * Now parse the attributes, it ends up with the ending
6507 *
6508 * (S Attribute)* S?
6509 */
6510 SKIP_BLANKS;
6511 GROW;
6512
Daniel Veillard21a0f912001-02-25 19:54:14 +00006513 while ((RAW != '>') &&
6514 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006515 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006516 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006517 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006518
6519 attname = xmlParseAttribute(ctxt, &attvalue);
6520 if ((attname != NULL) && (attvalue != NULL)) {
6521 /*
6522 * [ WFC: Unique Att Spec ]
6523 * No attribute name may appear more than once in the same
6524 * start-tag or empty-element tag.
6525 */
6526 for (i = 0; i < nbatts;i += 2) {
6527 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006528 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006529 xmlFree(attvalue);
6530 goto failed;
6531 }
6532 }
Owen Taylor3473f882001-02-23 17:55:21 +00006533 /*
6534 * Add the pair to atts
6535 */
6536 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006537 maxatts = 22; /* allow for 10 attrs by default */
6538 atts = (const xmlChar **)
6539 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006540 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006541 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006542 if (attvalue != NULL)
6543 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006544 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006545 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006546 ctxt->atts = atts;
6547 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006548 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006549 const xmlChar **n;
6550
Owen Taylor3473f882001-02-23 17:55:21 +00006551 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006552 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006553 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006554 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006555 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006556 if (attvalue != NULL)
6557 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006558 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006559 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006560 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006561 ctxt->atts = atts;
6562 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006563 }
6564 atts[nbatts++] = attname;
6565 atts[nbatts++] = attvalue;
6566 atts[nbatts] = NULL;
6567 atts[nbatts + 1] = NULL;
6568 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006569 if (attvalue != NULL)
6570 xmlFree(attvalue);
6571 }
6572
6573failed:
6574
Daniel Veillard3772de32002-12-17 10:31:45 +00006575 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006576 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6577 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006578 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006579 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6580 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006581 }
6582 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006583 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6584 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006585 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6586 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006587 break;
6588 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006589 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006590 GROW;
6591 }
6592
6593 /*
6594 * SAX: Start of Element !
6595 */
6596 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006597 (!ctxt->disableSAX)) {
6598 if (nbatts > 0)
6599 ctxt->sax->startElement(ctxt->userData, name, atts);
6600 else
6601 ctxt->sax->startElement(ctxt->userData, name, NULL);
6602 }
Owen Taylor3473f882001-02-23 17:55:21 +00006603
6604 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006605 /* Free only the content strings */
6606 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006607 if (atts[i] != NULL)
6608 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006609 }
6610 return(name);
6611}
6612
6613/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006614 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006615 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006616 * @line: line of the start tag
6617 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006618 *
6619 * parse an end of tag
6620 *
6621 * [42] ETag ::= '</' Name S? '>'
6622 *
6623 * With namespace
6624 *
6625 * [NS 9] ETag ::= '</' QName S? '>'
6626 */
6627
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006628static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006629xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006630 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006631
6632 GROW;
6633 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006634 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006635 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006636 return;
6637 }
6638 SKIP(2);
6639
Daniel Veillard46de64e2002-05-29 08:21:33 +00006640 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006641
6642 /*
6643 * We should definitely be at the ending "S? '>'" part
6644 */
6645 GROW;
6646 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006647 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006648 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006650 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006651
6652 /*
6653 * [ WFC: Element Type Match ]
6654 * The Name in an element's end-tag must match the element type in the
6655 * start-tag.
6656 *
6657 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006658 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006659 if (name == NULL) name = BAD_CAST "unparseable";
6660 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006661 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006662 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664
6665 /*
6666 * SAX: End of Tag
6667 */
6668 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6669 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006670 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006671
Daniel Veillarde57ec792003-09-10 10:50:59 +00006672 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006674 return;
6675}
6676
6677/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006678 * xmlParseEndTag:
6679 * @ctxt: an XML parser context
6680 *
6681 * parse an end of tag
6682 *
6683 * [42] ETag ::= '</' Name S? '>'
6684 *
6685 * With namespace
6686 *
6687 * [NS 9] ETag ::= '</' QName S? '>'
6688 */
6689
6690void
6691xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006692 xmlParseEndTag1(ctxt, 0);
6693}
Daniel Veillard81273902003-09-30 00:43:48 +00006694#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006695
6696/************************************************************************
6697 * *
6698 * SAX 2 specific operations *
6699 * *
6700 ************************************************************************/
6701
6702static const xmlChar *
6703xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6704 int len = 0, l;
6705 int c;
6706 int count = 0;
6707
6708 /*
6709 * Handler for more complex cases
6710 */
6711 GROW;
6712 c = CUR_CHAR(l);
6713 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006714 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006715 return(NULL);
6716 }
6717
6718 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006719 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006720 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006721 (IS_COMBINING(c)) ||
6722 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006723 if (count++ > 100) {
6724 count = 0;
6725 GROW;
6726 }
6727 len += l;
6728 NEXTL(l);
6729 c = CUR_CHAR(l);
6730 }
6731 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6732}
6733
6734/*
6735 * xmlGetNamespace:
6736 * @ctxt: an XML parser context
6737 * @prefix: the prefix to lookup
6738 *
6739 * Lookup the namespace name for the @prefix (which ca be NULL)
6740 * The prefix must come from the @ctxt->dict dictionnary
6741 *
6742 * Returns the namespace name or NULL if not bound
6743 */
6744static const xmlChar *
6745xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6746 int i;
6747
Daniel Veillarde57ec792003-09-10 10:50:59 +00006748 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006749 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006750 if (ctxt->nsTab[i] == prefix) {
6751 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6752 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006753 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006754 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006755 return(NULL);
6756}
6757
6758/**
6759 * xmlParseNCName:
6760 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006761 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006762 *
6763 * parse an XML name.
6764 *
6765 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6766 * CombiningChar | Extender
6767 *
6768 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6769 *
6770 * Returns the Name parsed or NULL
6771 */
6772
6773static const xmlChar *
6774xmlParseNCName(xmlParserCtxtPtr ctxt) {
6775 const xmlChar *in;
6776 const xmlChar *ret;
6777 int count = 0;
6778
6779 /*
6780 * Accelerator for simple ASCII names
6781 */
6782 in = ctxt->input->cur;
6783 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6784 ((*in >= 0x41) && (*in <= 0x5A)) ||
6785 (*in == '_')) {
6786 in++;
6787 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6788 ((*in >= 0x41) && (*in <= 0x5A)) ||
6789 ((*in >= 0x30) && (*in <= 0x39)) ||
6790 (*in == '_') || (*in == '-') ||
6791 (*in == '.'))
6792 in++;
6793 if ((*in > 0) && (*in < 0x80)) {
6794 count = in - ctxt->input->cur;
6795 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6796 ctxt->input->cur = in;
6797 ctxt->nbChars += count;
6798 ctxt->input->col += count;
6799 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006800 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006801 }
6802 return(ret);
6803 }
6804 }
6805 return(xmlParseNCNameComplex(ctxt));
6806}
6807
6808/**
6809 * xmlParseQName:
6810 * @ctxt: an XML parser context
6811 * @prefix: pointer to store the prefix part
6812 *
6813 * parse an XML Namespace QName
6814 *
6815 * [6] QName ::= (Prefix ':')? LocalPart
6816 * [7] Prefix ::= NCName
6817 * [8] LocalPart ::= NCName
6818 *
6819 * Returns the Name parsed or NULL
6820 */
6821
6822static const xmlChar *
6823xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6824 const xmlChar *l, *p;
6825
6826 GROW;
6827
6828 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006829 if (l == NULL) {
6830 if (CUR == ':') {
6831 l = xmlParseName(ctxt);
6832 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006833 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6834 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006835 *prefix = NULL;
6836 return(l);
6837 }
6838 }
6839 return(NULL);
6840 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006841 if (CUR == ':') {
6842 NEXT;
6843 p = l;
6844 l = xmlParseNCName(ctxt);
6845 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006846 xmlChar *tmp;
6847
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006848 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6849 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006850 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6851 p = xmlDictLookup(ctxt->dict, tmp, -1);
6852 if (tmp != NULL) xmlFree(tmp);
6853 *prefix = NULL;
6854 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006855 }
6856 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006857 xmlChar *tmp;
6858
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006859 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6860 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006861 NEXT;
6862 tmp = (xmlChar *) xmlParseName(ctxt);
6863 if (tmp != NULL) {
6864 tmp = xmlBuildQName(tmp, l, NULL, 0);
6865 l = xmlDictLookup(ctxt->dict, tmp, -1);
6866 if (tmp != NULL) xmlFree(tmp);
6867 *prefix = p;
6868 return(l);
6869 }
6870 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6871 l = xmlDictLookup(ctxt->dict, tmp, -1);
6872 if (tmp != NULL) xmlFree(tmp);
6873 *prefix = p;
6874 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006875 }
6876 *prefix = p;
6877 } else
6878 *prefix = NULL;
6879 return(l);
6880}
6881
6882/**
6883 * xmlParseQNameAndCompare:
6884 * @ctxt: an XML parser context
6885 * @name: the localname
6886 * @prefix: the prefix, if any.
6887 *
6888 * parse an XML name and compares for match
6889 * (specialized for endtag parsing)
6890 *
6891 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6892 * and the name for mismatch
6893 */
6894
6895static const xmlChar *
6896xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6897 xmlChar const *prefix) {
6898 const xmlChar *cmp = name;
6899 const xmlChar *in;
6900 const xmlChar *ret;
6901 const xmlChar *prefix2;
6902
6903 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6904
6905 GROW;
6906 in = ctxt->input->cur;
6907
6908 cmp = prefix;
6909 while (*in != 0 && *in == *cmp) {
6910 ++in;
6911 ++cmp;
6912 }
6913 if ((*cmp == 0) && (*in == ':')) {
6914 in++;
6915 cmp = name;
6916 while (*in != 0 && *in == *cmp) {
6917 ++in;
6918 ++cmp;
6919 }
William M. Brack76e95df2003-10-18 16:20:14 +00006920 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006921 /* success */
6922 ctxt->input->cur = in;
6923 return((const xmlChar*) 1);
6924 }
6925 }
6926 /*
6927 * all strings coms from the dictionary, equality can be done directly
6928 */
6929 ret = xmlParseQName (ctxt, &prefix2);
6930 if ((ret == name) && (prefix == prefix2))
6931 return((const xmlChar*) 1);
6932 return ret;
6933}
6934
6935/**
6936 * xmlParseAttValueInternal:
6937 * @ctxt: an XML parser context
6938 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006939 * @alloc: whether the attribute was reallocated as a new string
6940 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006941 *
6942 * parse a value for an attribute.
6943 * NOTE: if no normalization is needed, the routine will return pointers
6944 * directly from the data buffer.
6945 *
6946 * 3.3.3 Attribute-Value Normalization:
6947 * Before the value of an attribute is passed to the application or
6948 * checked for validity, the XML processor must normalize it as follows:
6949 * - a character reference is processed by appending the referenced
6950 * character to the attribute value
6951 * - an entity reference is processed by recursively processing the
6952 * replacement text of the entity
6953 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6954 * appending #x20 to the normalized value, except that only a single
6955 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6956 * parsed entity or the literal entity value of an internal parsed entity
6957 * - other characters are processed by appending them to the normalized value
6958 * If the declared value is not CDATA, then the XML processor must further
6959 * process the normalized attribute value by discarding any leading and
6960 * trailing space (#x20) characters, and by replacing sequences of space
6961 * (#x20) characters by a single space (#x20) character.
6962 * All attributes for which no declaration has been read should be treated
6963 * by a non-validating parser as if declared CDATA.
6964 *
6965 * Returns the AttValue parsed or NULL. The value has to be freed by the
6966 * caller if it was copied, this can be detected by val[*len] == 0.
6967 */
6968
6969static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006970xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6971 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006972{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006973 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006974 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006975 xmlChar *ret = NULL;
6976
6977 GROW;
6978 in = (xmlChar *) CUR_PTR;
6979 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006980 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006981 return (NULL);
6982 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006983 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006984
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006985 /*
6986 * try to handle in this routine the most common case where no
6987 * allocation of a new string is required and where content is
6988 * pure ASCII.
6989 */
6990 limit = *in++;
6991 end = ctxt->input->end;
6992 start = in;
6993 if (in >= end) {
6994 const xmlChar *oldbase = ctxt->input->base;
6995 GROW;
6996 if (oldbase != ctxt->input->base) {
6997 long delta = ctxt->input->base - oldbase;
6998 start = start + delta;
6999 in = in + delta;
7000 }
7001 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007002 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007003 if (normalize) {
7004 /*
7005 * Skip any leading spaces
7006 */
7007 while ((in < end) && (*in != limit) &&
7008 ((*in == 0x20) || (*in == 0x9) ||
7009 (*in == 0xA) || (*in == 0xD))) {
7010 in++;
7011 start = in;
7012 if (in >= end) {
7013 const xmlChar *oldbase = ctxt->input->base;
7014 GROW;
7015 if (oldbase != ctxt->input->base) {
7016 long delta = ctxt->input->base - oldbase;
7017 start = start + delta;
7018 in = in + delta;
7019 }
7020 end = ctxt->input->end;
7021 }
7022 }
7023 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7025 if ((*in++ == 0x20) && (*in == 0x20)) break;
7026 if (in >= end) {
7027 const xmlChar *oldbase = ctxt->input->base;
7028 GROW;
7029 if (oldbase != ctxt->input->base) {
7030 long delta = ctxt->input->base - oldbase;
7031 start = start + delta;
7032 in = in + delta;
7033 }
7034 end = ctxt->input->end;
7035 }
7036 }
7037 last = in;
7038 /*
7039 * skip the trailing blanks
7040 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007041 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007042 while ((in < end) && (*in != limit) &&
7043 ((*in == 0x20) || (*in == 0x9) ||
7044 (*in == 0xA) || (*in == 0xD))) {
7045 in++;
7046 if (in >= end) {
7047 const xmlChar *oldbase = ctxt->input->base;
7048 GROW;
7049 if (oldbase != ctxt->input->base) {
7050 long delta = ctxt->input->base - oldbase;
7051 start = start + delta;
7052 in = in + delta;
7053 last = last + delta;
7054 }
7055 end = ctxt->input->end;
7056 }
7057 }
7058 if (*in != limit) goto need_complex;
7059 } else {
7060 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7061 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7062 in++;
7063 if (in >= end) {
7064 const xmlChar *oldbase = ctxt->input->base;
7065 GROW;
7066 if (oldbase != ctxt->input->base) {
7067 long delta = ctxt->input->base - oldbase;
7068 start = start + delta;
7069 in = in + delta;
7070 }
7071 end = ctxt->input->end;
7072 }
7073 }
7074 last = in;
7075 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007076 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007077 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007078 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007079 *len = last - start;
7080 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007081 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007082 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007083 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007084 }
7085 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007086 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007087 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007088need_complex:
7089 if (alloc) *alloc = 1;
7090 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091}
7092
7093/**
7094 * xmlParseAttribute2:
7095 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007096 * @pref: the element prefix
7097 * @elem: the element name
7098 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007099 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007100 * @len: an int * to save the length of the attribute
7101 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007102 *
7103 * parse an attribute in the new SAX2 framework.
7104 *
7105 * Returns the attribute name, and the value in *value, .
7106 */
7107
7108static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007109xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7110 const xmlChar *pref, const xmlChar *elem,
7111 const xmlChar **prefix, xmlChar **value,
7112 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007113 const xmlChar *name;
7114 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007115 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007116
7117 *value = NULL;
7118 GROW;
7119 name = xmlParseQName(ctxt, prefix);
7120 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007121 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7122 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007123 return(NULL);
7124 }
7125
7126 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007127 * get the type if needed
7128 */
7129 if (ctxt->attsSpecial != NULL) {
7130 int type;
7131
7132 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7133 pref, elem, *prefix, name);
7134 if (type != 0) normalize = 1;
7135 }
7136
7137 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007138 * read the value
7139 */
7140 SKIP_BLANKS;
7141 if (RAW == '=') {
7142 NEXT;
7143 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007144 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007145 ctxt->instate = XML_PARSER_CONTENT;
7146 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007147 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007148 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 return(NULL);
7150 }
7151
7152 /*
7153 * Check that xml:lang conforms to the specification
7154 * No more registered as an error, just generate a warning now
7155 * since this was deprecated in XML second edition
7156 */
7157 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7158 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007159 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7160 "Malformed value for xml:lang : %s\n",
7161 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007162 }
7163 }
7164
7165 /*
7166 * Check that xml:space conforms to the specification
7167 */
7168 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7169 if (xmlStrEqual(val, BAD_CAST "default"))
7170 *(ctxt->space) = 0;
7171 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7172 *(ctxt->space) = 1;
7173 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007174 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007175"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7176 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007177 }
7178 }
7179
7180 *value = val;
7181 return(name);
7182}
7183
7184/**
7185 * xmlParseStartTag2:
7186 * @ctxt: an XML parser context
7187 *
7188 * parse a start of tag either for rule element or
7189 * EmptyElement. In both case we don't parse the tag closing chars.
7190 * This routine is called when running SAX2 parsing
7191 *
7192 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7193 *
7194 * [ WFC: Unique Att Spec ]
7195 * No attribute name may appear more than once in the same start-tag or
7196 * empty-element tag.
7197 *
7198 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7199 *
7200 * [ WFC: Unique Att Spec ]
7201 * No attribute name may appear more than once in the same start-tag or
7202 * empty-element tag.
7203 *
7204 * With namespace:
7205 *
7206 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7207 *
7208 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7209 *
7210 * Returns the element name parsed
7211 */
7212
7213static const xmlChar *
7214xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007215 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007216 const xmlChar *localname;
7217 const xmlChar *prefix;
7218 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007219 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007220 const xmlChar *nsname;
7221 xmlChar *attvalue;
7222 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007224 int nratts, nbatts, nbdef;
7225 int i, j, nbNs, attval;
7226 const xmlChar *base;
7227 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007228
7229 if (RAW != '<') return(NULL);
7230 NEXT1;
7231
7232 /*
7233 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7234 * point since the attribute values may be stored as pointers to
7235 * the buffer and calling SHRINK would destroy them !
7236 * The Shrinking is only possible once the full set of attribute
7237 * callbacks have been done.
7238 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007239reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007240 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007241 base = ctxt->input->base;
7242 cur = ctxt->input->cur - ctxt->input->base;
7243 nbatts = 0;
7244 nratts = 0;
7245 nbdef = 0;
7246 nbNs = 0;
7247 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248
7249 localname = xmlParseQName(ctxt, &prefix);
7250 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7252 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007253 return(NULL);
7254 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007255 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007256
7257 /*
7258 * Now parse the attributes, it ends up with the ending
7259 *
7260 * (S Attribute)* S?
7261 */
7262 SKIP_BLANKS;
7263 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007264 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007265
7266 while ((RAW != '>') &&
7267 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007268 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007269 const xmlChar *q = CUR_PTR;
7270 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007271 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007273 attname = xmlParseAttribute2(ctxt, prefix, localname,
7274 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007275 if ((attname != NULL) && (attvalue != NULL)) {
7276 if (len < 0) len = xmlStrlen(attvalue);
7277 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007278 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7279 xmlURIPtr uri;
7280
7281 if (*URL != 0) {
7282 uri = xmlParseURI((const char *) URL);
7283 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007284 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7285 "xmlns: %s not a valid URI\n",
7286 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007287 } else {
7288 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007289 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7290 "xmlns: URI %s is not absolute\n",
7291 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007292 }
7293 xmlFreeURI(uri);
7294 }
7295 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007296 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007297 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007298 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007299 for (j = 1;j <= nbNs;j++)
7300 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7301 break;
7302 if (j <= nbNs)
7303 xmlErrAttributeDup(ctxt, NULL, attname);
7304 else
7305 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007306 if (alloc != 0) xmlFree(attvalue);
7307 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 continue;
7309 }
7310 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007311 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7312 xmlURIPtr uri;
7313
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007314 if (attname == ctxt->str_xml) {
7315 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007316 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7317 "xml namespace prefix mapped to wrong URI\n",
7318 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007319 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007320 /*
7321 * Do not keep a namespace definition node
7322 */
7323 if (alloc != 0) xmlFree(attvalue);
7324 SKIP_BLANKS;
7325 continue;
7326 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007327 uri = xmlParseURI((const char *) URL);
7328 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007329 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7330 "xmlns:%s: '%s' is not a valid URI\n",
7331 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007332 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007333 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007334 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7335 "xmlns:%s: URI %s is not absolute\n",
7336 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007337 }
7338 xmlFreeURI(uri);
7339 }
7340
Daniel Veillard0fb18932003-09-07 09:14:37 +00007341 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007342 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007343 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007344 for (j = 1;j <= nbNs;j++)
7345 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7346 break;
7347 if (j <= nbNs)
7348 xmlErrAttributeDup(ctxt, aprefix, attname);
7349 else
7350 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007351 if (alloc != 0) xmlFree(attvalue);
7352 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007353 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007354 continue;
7355 }
7356
7357 /*
7358 * Add the pair to atts
7359 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007360 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7361 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007362 if (attvalue[len] == 0)
7363 xmlFree(attvalue);
7364 goto failed;
7365 }
7366 maxatts = ctxt->maxatts;
7367 atts = ctxt->atts;
7368 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007369 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007370 atts[nbatts++] = attname;
7371 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007372 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007373 atts[nbatts++] = attvalue;
7374 attvalue += len;
7375 atts[nbatts++] = attvalue;
7376 /*
7377 * tag if some deallocation is needed
7378 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007379 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380 } else {
7381 if ((attvalue != NULL) && (attvalue[len] == 0))
7382 xmlFree(attvalue);
7383 }
7384
7385failed:
7386
7387 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007388 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7390 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007391 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007392 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7393 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007394 }
7395 SKIP_BLANKS;
7396 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7397 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007398 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007399 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 break;
7401 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007402 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007403 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404 }
7405
Daniel Veillard0fb18932003-09-07 09:14:37 +00007406 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007407 * The attributes defaulting
7408 */
7409 if (ctxt->attsDefault != NULL) {
7410 xmlDefAttrsPtr defaults;
7411
7412 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7413 if (defaults != NULL) {
7414 for (i = 0;i < defaults->nbAttrs;i++) {
7415 attname = defaults->values[4 * i];
7416 aprefix = defaults->values[4 * i + 1];
7417
7418 /*
7419 * special work for namespaces defaulted defs
7420 */
7421 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7422 /*
7423 * check that it's not a defined namespace
7424 */
7425 for (j = 1;j <= nbNs;j++)
7426 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7427 break;
7428 if (j <= nbNs) continue;
7429
7430 nsname = xmlGetNamespace(ctxt, NULL);
7431 if (nsname != defaults->values[4 * i + 2]) {
7432 if (nsPush(ctxt, NULL,
7433 defaults->values[4 * i + 2]) > 0)
7434 nbNs++;
7435 }
7436 } else if (aprefix == ctxt->str_xmlns) {
7437 /*
7438 * check that it's not a defined namespace
7439 */
7440 for (j = 1;j <= nbNs;j++)
7441 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7442 break;
7443 if (j <= nbNs) continue;
7444
7445 nsname = xmlGetNamespace(ctxt, attname);
7446 if (nsname != defaults->values[2]) {
7447 if (nsPush(ctxt, attname,
7448 defaults->values[4 * i + 2]) > 0)
7449 nbNs++;
7450 }
7451 } else {
7452 /*
7453 * check that it's not a defined attribute
7454 */
7455 for (j = 0;j < nbatts;j+=5) {
7456 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7457 break;
7458 }
7459 if (j < nbatts) continue;
7460
7461 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7462 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007463 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007464 }
7465 maxatts = ctxt->maxatts;
7466 atts = ctxt->atts;
7467 }
7468 atts[nbatts++] = attname;
7469 atts[nbatts++] = aprefix;
7470 if (aprefix == NULL)
7471 atts[nbatts++] = NULL;
7472 else
7473 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7474 atts[nbatts++] = defaults->values[4 * i + 2];
7475 atts[nbatts++] = defaults->values[4 * i + 3];
7476 nbdef++;
7477 }
7478 }
7479 }
7480 }
7481
Daniel Veillarde70c8772003-11-25 07:21:18 +00007482 /*
7483 * The attributes checkings
7484 */
7485 for (i = 0; i < nbatts;i += 5) {
7486 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7487 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7488 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7489 "Namespace prefix %s for %s on %s is not defined\n",
7490 atts[i + 1], atts[i], localname);
7491 }
7492 atts[i + 2] = nsname;
7493 /*
7494 * [ WFC: Unique Att Spec ]
7495 * No attribute name may appear more than once in the same
7496 * start-tag or empty-element tag.
7497 * As extended by the Namespace in XML REC.
7498 */
7499 for (j = 0; j < i;j += 5) {
7500 if (atts[i] == atts[j]) {
7501 if (atts[i+1] == atts[j+1]) {
7502 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7503 break;
7504 }
7505 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7506 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7507 "Namespaced Attribute %s in '%s' redefined\n",
7508 atts[i], nsname, NULL);
7509 break;
7510 }
7511 }
7512 }
7513 }
7514
Daniel Veillarde57ec792003-09-10 10:50:59 +00007515 nsname = xmlGetNamespace(ctxt, prefix);
7516 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007517 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7518 "Namespace prefix %s on %s is not defined\n",
7519 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007520 }
7521 *pref = prefix;
7522 *URI = nsname;
7523
7524 /*
7525 * SAX: Start of Element !
7526 */
7527 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7528 (!ctxt->disableSAX)) {
7529 if (nbNs > 0)
7530 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7531 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7532 nbatts / 5, nbdef, atts);
7533 else
7534 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7535 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7536 }
7537
7538 /*
7539 * Free up attribute allocated strings if needed
7540 */
7541 if (attval != 0) {
7542 for (i = 3,j = 0; j < nratts;i += 5,j++)
7543 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7544 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007545 }
7546
7547 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007548
7549base_changed:
7550 /*
7551 * the attribute strings are valid iif the base didn't changed
7552 */
7553 if (attval != 0) {
7554 for (i = 3,j = 0; j < nratts;i += 5,j++)
7555 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7556 xmlFree((xmlChar *) atts[i]);
7557 }
7558 ctxt->input->cur = ctxt->input->base + cur;
7559 if (ctxt->wellFormed == 1) {
7560 goto reparse;
7561 }
7562 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007563}
7564
7565/**
7566 * xmlParseEndTag2:
7567 * @ctxt: an XML parser context
7568 * @line: line of the start tag
7569 * @nsNr: number of namespaces on the start tag
7570 *
7571 * parse an end of tag
7572 *
7573 * [42] ETag ::= '</' Name S? '>'
7574 *
7575 * With namespace
7576 *
7577 * [NS 9] ETag ::= '</' QName S? '>'
7578 */
7579
7580static void
7581xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007582 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007583 const xmlChar *name;
7584
7585 GROW;
7586 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007587 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 return;
7589 }
7590 SKIP(2);
7591
Daniel Veillard453e71b2004-04-20 17:44:46 +00007592 if ((tlen > 0) && (strncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007593 if (ctxt->input->cur[tlen] == '>') {
7594 ctxt->input->cur += tlen + 1;
7595 goto done;
7596 }
7597 ctxt->input->cur += tlen;
7598 name = (xmlChar*)1;
7599 } else {
7600 if (prefix == NULL)
7601 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7602 else
7603 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7604 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007605
7606 /*
7607 * We should definitely be at the ending "S? '>'" part
7608 */
7609 GROW;
7610 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007611 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007612 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007613 } else
7614 NEXT1;
7615
7616 /*
7617 * [ WFC: Element Type Match ]
7618 * The Name in an element's end-tag must match the element type in the
7619 * start-tag.
7620 *
7621 */
7622 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007623 if (name == NULL) name = BAD_CAST "unparseable";
7624 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007626 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 }
7628
7629 /*
7630 * SAX: End of Tag
7631 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007632done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7634 (!ctxt->disableSAX))
7635 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7636
Daniel Veillard0fb18932003-09-07 09:14:37 +00007637 spacePop(ctxt);
7638 if (nsNr != 0)
7639 nsPop(ctxt, nsNr);
7640 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007641}
7642
7643/**
Owen Taylor3473f882001-02-23 17:55:21 +00007644 * xmlParseCDSect:
7645 * @ctxt: an XML parser context
7646 *
7647 * Parse escaped pure raw content.
7648 *
7649 * [18] CDSect ::= CDStart CData CDEnd
7650 *
7651 * [19] CDStart ::= '<![CDATA['
7652 *
7653 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7654 *
7655 * [21] CDEnd ::= ']]>'
7656 */
7657void
7658xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7659 xmlChar *buf = NULL;
7660 int len = 0;
7661 int size = XML_PARSER_BUFFER_SIZE;
7662 int r, rl;
7663 int s, sl;
7664 int cur, l;
7665 int count = 0;
7666
Daniel Veillard8f597c32003-10-06 08:19:27 +00007667 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007668 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007669 SKIP(9);
7670 } else
7671 return;
7672
7673 ctxt->instate = XML_PARSER_CDATA_SECTION;
7674 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007675 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007676 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007677 ctxt->instate = XML_PARSER_CONTENT;
7678 return;
7679 }
7680 NEXTL(rl);
7681 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007682 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007683 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007684 ctxt->instate = XML_PARSER_CONTENT;
7685 return;
7686 }
7687 NEXTL(sl);
7688 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007689 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007690 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007691 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007692 return;
7693 }
William M. Brack871611b2003-10-18 04:53:14 +00007694 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007695 ((r != ']') || (s != ']') || (cur != '>'))) {
7696 if (len + 5 >= size) {
7697 size *= 2;
7698 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7699 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007700 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007701 return;
7702 }
7703 }
7704 COPY_BUF(rl,buf,len,r);
7705 r = s;
7706 rl = sl;
7707 s = cur;
7708 sl = l;
7709 count++;
7710 if (count > 50) {
7711 GROW;
7712 count = 0;
7713 }
7714 NEXTL(l);
7715 cur = CUR_CHAR(l);
7716 }
7717 buf[len] = 0;
7718 ctxt->instate = XML_PARSER_CONTENT;
7719 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007720 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007721 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007722 xmlFree(buf);
7723 return;
7724 }
7725 NEXTL(l);
7726
7727 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007728 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007729 */
7730 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7731 if (ctxt->sax->cdataBlock != NULL)
7732 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007733 else if (ctxt->sax->characters != NULL)
7734 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007735 }
7736 xmlFree(buf);
7737}
7738
7739/**
7740 * xmlParseContent:
7741 * @ctxt: an XML parser context
7742 *
7743 * Parse a content:
7744 *
7745 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7746 */
7747
7748void
7749xmlParseContent(xmlParserCtxtPtr ctxt) {
7750 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007751 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007752 ((RAW != '<') || (NXT(1) != '/'))) {
7753 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007754 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007755 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007756
7757 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007758 * First case : a Processing Instruction.
7759 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007760 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007761 xmlParsePI(ctxt);
7762 }
7763
7764 /*
7765 * Second case : a CDSection
7766 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007767 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007768 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007769 xmlParseCDSect(ctxt);
7770 }
7771
7772 /*
7773 * Third case : a comment
7774 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007775 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007776 (NXT(2) == '-') && (NXT(3) == '-')) {
7777 xmlParseComment(ctxt);
7778 ctxt->instate = XML_PARSER_CONTENT;
7779 }
7780
7781 /*
7782 * Fourth case : a sub-element.
7783 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007784 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007785 xmlParseElement(ctxt);
7786 }
7787
7788 /*
7789 * Fifth case : a reference. If if has not been resolved,
7790 * parsing returns it's Name, create the node
7791 */
7792
Daniel Veillard21a0f912001-02-25 19:54:14 +00007793 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007794 xmlParseReference(ctxt);
7795 }
7796
7797 /*
7798 * Last case, text. Note that References are handled directly.
7799 */
7800 else {
7801 xmlParseCharData(ctxt, 0);
7802 }
7803
7804 GROW;
7805 /*
7806 * Pop-up of finished entities.
7807 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007808 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007809 xmlPopInput(ctxt);
7810 SHRINK;
7811
Daniel Veillardfdc91562002-07-01 21:52:03 +00007812 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007813 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7814 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007815 ctxt->instate = XML_PARSER_EOF;
7816 break;
7817 }
7818 }
7819}
7820
7821/**
7822 * xmlParseElement:
7823 * @ctxt: an XML parser context
7824 *
7825 * parse an XML element, this is highly recursive
7826 *
7827 * [39] element ::= EmptyElemTag | STag content ETag
7828 *
7829 * [ WFC: Element Type Match ]
7830 * The Name in an element's end-tag must match the element type in the
7831 * start-tag.
7832 *
Owen Taylor3473f882001-02-23 17:55:21 +00007833 */
7834
7835void
7836xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007837 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007838 const xmlChar *prefix;
7839 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007840 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007841 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007842 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007843 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007844
7845 /* Capture start position */
7846 if (ctxt->record_info) {
7847 node_info.begin_pos = ctxt->input->consumed +
7848 (CUR_PTR - ctxt->input->base);
7849 node_info.begin_line = ctxt->input->line;
7850 }
7851
7852 if (ctxt->spaceNr == 0)
7853 spacePush(ctxt, -1);
7854 else
7855 spacePush(ctxt, *ctxt->space);
7856
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007857 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007858#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007860#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007861 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007862#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 else
7864 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007865#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007866 if (name == NULL) {
7867 spacePop(ctxt);
7868 return;
7869 }
7870 namePush(ctxt, name);
7871 ret = ctxt->node;
7872
Daniel Veillard4432df22003-09-28 18:58:27 +00007873#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007874 /*
7875 * [ VC: Root Element Type ]
7876 * The Name in the document type declaration must match the element
7877 * type of the root element.
7878 */
7879 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7880 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7881 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007882#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007883
7884 /*
7885 * Check for an Empty Element.
7886 */
7887 if ((RAW == '/') && (NXT(1) == '>')) {
7888 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007889 if (ctxt->sax2) {
7890 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7891 (!ctxt->disableSAX))
7892 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007893#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007894 } else {
7895 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7896 (!ctxt->disableSAX))
7897 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007898#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007899 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900 namePop(ctxt);
7901 spacePop(ctxt);
7902 if (nsNr != ctxt->nsNr)
7903 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007904 if ( ret != NULL && ctxt->record_info ) {
7905 node_info.end_pos = ctxt->input->consumed +
7906 (CUR_PTR - ctxt->input->base);
7907 node_info.end_line = ctxt->input->line;
7908 node_info.node = ret;
7909 xmlParserAddNodeInfo(ctxt, &node_info);
7910 }
7911 return;
7912 }
7913 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007914 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007915 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007916 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7917 "Couldn't find end of Start Tag %s line %d\n",
7918 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007919
7920 /*
7921 * end of parsing of this node.
7922 */
7923 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007924 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007925 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007926 if (nsNr != ctxt->nsNr)
7927 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007928
7929 /*
7930 * Capture end position and add node
7931 */
7932 if ( ret != NULL && ctxt->record_info ) {
7933 node_info.end_pos = ctxt->input->consumed +
7934 (CUR_PTR - ctxt->input->base);
7935 node_info.end_line = ctxt->input->line;
7936 node_info.node = ret;
7937 xmlParserAddNodeInfo(ctxt, &node_info);
7938 }
7939 return;
7940 }
7941
7942 /*
7943 * Parse the content of the element:
7944 */
7945 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007946 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007947 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007948 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007949 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007950
7951 /*
7952 * end of parsing of this node.
7953 */
7954 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007955 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007956 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007957 if (nsNr != ctxt->nsNr)
7958 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007959 return;
7960 }
7961
7962 /*
7963 * parse the end of tag: '</' should be here.
7964 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007965 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007966 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007967 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007968 }
7969#ifdef LIBXML_SAX1_ENABLED
7970 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007971 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007972#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007973
7974 /*
7975 * Capture end position and add node
7976 */
7977 if ( ret != NULL && ctxt->record_info ) {
7978 node_info.end_pos = ctxt->input->consumed +
7979 (CUR_PTR - ctxt->input->base);
7980 node_info.end_line = ctxt->input->line;
7981 node_info.node = ret;
7982 xmlParserAddNodeInfo(ctxt, &node_info);
7983 }
7984}
7985
7986/**
7987 * xmlParseVersionNum:
7988 * @ctxt: an XML parser context
7989 *
7990 * parse the XML version value.
7991 *
7992 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7993 *
7994 * Returns the string giving the XML version number, or NULL
7995 */
7996xmlChar *
7997xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7998 xmlChar *buf = NULL;
7999 int len = 0;
8000 int size = 10;
8001 xmlChar cur;
8002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008003 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008004 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008006 return(NULL);
8007 }
8008 cur = CUR;
8009 while (((cur >= 'a') && (cur <= 'z')) ||
8010 ((cur >= 'A') && (cur <= 'Z')) ||
8011 ((cur >= '0') && (cur <= '9')) ||
8012 (cur == '_') || (cur == '.') ||
8013 (cur == ':') || (cur == '-')) {
8014 if (len + 1 >= size) {
8015 size *= 2;
8016 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8017 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008018 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008019 return(NULL);
8020 }
8021 }
8022 buf[len++] = cur;
8023 NEXT;
8024 cur=CUR;
8025 }
8026 buf[len] = 0;
8027 return(buf);
8028}
8029
8030/**
8031 * xmlParseVersionInfo:
8032 * @ctxt: an XML parser context
8033 *
8034 * parse the XML version.
8035 *
8036 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8037 *
8038 * [25] Eq ::= S? '=' S?
8039 *
8040 * Returns the version string, e.g. "1.0"
8041 */
8042
8043xmlChar *
8044xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8045 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008046
Daniel Veillarda07050d2003-10-19 14:46:32 +00008047 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008048 SKIP(7);
8049 SKIP_BLANKS;
8050 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008051 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008052 return(NULL);
8053 }
8054 NEXT;
8055 SKIP_BLANKS;
8056 if (RAW == '"') {
8057 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008058 version = xmlParseVersionNum(ctxt);
8059 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008060 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008061 } else
8062 NEXT;
8063 } else if (RAW == '\''){
8064 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008065 version = xmlParseVersionNum(ctxt);
8066 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008067 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008068 } else
8069 NEXT;
8070 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008071 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008072 }
8073 }
8074 return(version);
8075}
8076
8077/**
8078 * xmlParseEncName:
8079 * @ctxt: an XML parser context
8080 *
8081 * parse the XML encoding name
8082 *
8083 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8084 *
8085 * Returns the encoding name value or NULL
8086 */
8087xmlChar *
8088xmlParseEncName(xmlParserCtxtPtr ctxt) {
8089 xmlChar *buf = NULL;
8090 int len = 0;
8091 int size = 10;
8092 xmlChar cur;
8093
8094 cur = CUR;
8095 if (((cur >= 'a') && (cur <= 'z')) ||
8096 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008097 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008098 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008099 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008100 return(NULL);
8101 }
8102
8103 buf[len++] = cur;
8104 NEXT;
8105 cur = CUR;
8106 while (((cur >= 'a') && (cur <= 'z')) ||
8107 ((cur >= 'A') && (cur <= 'Z')) ||
8108 ((cur >= '0') && (cur <= '9')) ||
8109 (cur == '.') || (cur == '_') ||
8110 (cur == '-')) {
8111 if (len + 1 >= size) {
8112 size *= 2;
8113 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8114 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008115 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008116 return(NULL);
8117 }
8118 }
8119 buf[len++] = cur;
8120 NEXT;
8121 cur = CUR;
8122 if (cur == 0) {
8123 SHRINK;
8124 GROW;
8125 cur = CUR;
8126 }
8127 }
8128 buf[len] = 0;
8129 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008130 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008131 }
8132 return(buf);
8133}
8134
8135/**
8136 * xmlParseEncodingDecl:
8137 * @ctxt: an XML parser context
8138 *
8139 * parse the XML encoding declaration
8140 *
8141 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8142 *
8143 * this setups the conversion filters.
8144 *
8145 * Returns the encoding value or NULL
8146 */
8147
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008148const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008149xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8150 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008151
8152 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008153 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008154 SKIP(8);
8155 SKIP_BLANKS;
8156 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008157 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008158 return(NULL);
8159 }
8160 NEXT;
8161 SKIP_BLANKS;
8162 if (RAW == '"') {
8163 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008164 encoding = xmlParseEncName(ctxt);
8165 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008166 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008167 } else
8168 NEXT;
8169 } else if (RAW == '\''){
8170 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008171 encoding = xmlParseEncName(ctxt);
8172 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008173 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008174 } else
8175 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008176 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008177 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008178 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008179 /*
8180 * UTF-16 encoding stwich has already taken place at this stage,
8181 * more over the little-endian/big-endian selection is already done
8182 */
8183 if ((encoding != NULL) &&
8184 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8185 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008186 if (ctxt->encoding != NULL)
8187 xmlFree((xmlChar *) ctxt->encoding);
8188 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008189 }
8190 /*
8191 * UTF-8 encoding is handled natively
8192 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008193 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008194 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8195 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008196 if (ctxt->encoding != NULL)
8197 xmlFree((xmlChar *) ctxt->encoding);
8198 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008199 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008200 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008201 xmlCharEncodingHandlerPtr handler;
8202
8203 if (ctxt->input->encoding != NULL)
8204 xmlFree((xmlChar *) ctxt->input->encoding);
8205 ctxt->input->encoding = encoding;
8206
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008207 handler = xmlFindCharEncodingHandler((const char *) encoding);
8208 if (handler != NULL) {
8209 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008210 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008211 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008212 "Unsupported encoding %s\n", encoding);
8213 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 }
8215 }
8216 }
8217 return(encoding);
8218}
8219
8220/**
8221 * xmlParseSDDecl:
8222 * @ctxt: an XML parser context
8223 *
8224 * parse the XML standalone declaration
8225 *
8226 * [32] SDDecl ::= S 'standalone' Eq
8227 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8228 *
8229 * [ VC: Standalone Document Declaration ]
8230 * TODO The standalone document declaration must have the value "no"
8231 * if any external markup declarations contain declarations of:
8232 * - attributes with default values, if elements to which these
8233 * attributes apply appear in the document without specifications
8234 * of values for these attributes, or
8235 * - entities (other than amp, lt, gt, apos, quot), if references
8236 * to those entities appear in the document, or
8237 * - attributes with values subject to normalization, where the
8238 * attribute appears in the document with a value which will change
8239 * as a result of normalization, or
8240 * - element types with element content, if white space occurs directly
8241 * within any instance of those types.
8242 *
8243 * Returns 1 if standalone, 0 otherwise
8244 */
8245
8246int
8247xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8248 int standalone = -1;
8249
8250 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008251 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008252 SKIP(10);
8253 SKIP_BLANKS;
8254 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008255 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008256 return(standalone);
8257 }
8258 NEXT;
8259 SKIP_BLANKS;
8260 if (RAW == '\''){
8261 NEXT;
8262 if ((RAW == 'n') && (NXT(1) == 'o')) {
8263 standalone = 0;
8264 SKIP(2);
8265 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8266 (NXT(2) == 's')) {
8267 standalone = 1;
8268 SKIP(3);
8269 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008270 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008271 }
8272 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008273 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008274 } else
8275 NEXT;
8276 } else if (RAW == '"'){
8277 NEXT;
8278 if ((RAW == 'n') && (NXT(1) == 'o')) {
8279 standalone = 0;
8280 SKIP(2);
8281 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8282 (NXT(2) == 's')) {
8283 standalone = 1;
8284 SKIP(3);
8285 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008286 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008287 }
8288 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008289 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008290 } else
8291 NEXT;
8292 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008293 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008294 }
8295 }
8296 return(standalone);
8297}
8298
8299/**
8300 * xmlParseXMLDecl:
8301 * @ctxt: an XML parser context
8302 *
8303 * parse an XML declaration header
8304 *
8305 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8306 */
8307
8308void
8309xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8310 xmlChar *version;
8311
8312 /*
8313 * We know that '<?xml' is here.
8314 */
8315 SKIP(5);
8316
William M. Brack76e95df2003-10-18 16:20:14 +00008317 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8319 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008320 }
8321 SKIP_BLANKS;
8322
8323 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008324 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008325 */
8326 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008327 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008328 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008329 } else {
8330 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8331 /*
8332 * TODO: Blueberry should be detected here
8333 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008334 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8335 "Unsupported version '%s'\n",
8336 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008337 }
8338 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008339 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008340 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008341 }
Owen Taylor3473f882001-02-23 17:55:21 +00008342
8343 /*
8344 * We may have the encoding declaration
8345 */
William M. Brack76e95df2003-10-18 16:20:14 +00008346 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008347 if ((RAW == '?') && (NXT(1) == '>')) {
8348 SKIP(2);
8349 return;
8350 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008352 }
8353 xmlParseEncodingDecl(ctxt);
8354 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8355 /*
8356 * The XML REC instructs us to stop parsing right here
8357 */
8358 return;
8359 }
8360
8361 /*
8362 * We may have the standalone status.
8363 */
William M. Brack76e95df2003-10-18 16:20:14 +00008364 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008365 if ((RAW == '?') && (NXT(1) == '>')) {
8366 SKIP(2);
8367 return;
8368 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008369 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008370 }
8371 SKIP_BLANKS;
8372 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8373
8374 SKIP_BLANKS;
8375 if ((RAW == '?') && (NXT(1) == '>')) {
8376 SKIP(2);
8377 } else if (RAW == '>') {
8378 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008379 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008380 NEXT;
8381 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008382 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008383 MOVETO_ENDTAG(CUR_PTR);
8384 NEXT;
8385 }
8386}
8387
8388/**
8389 * xmlParseMisc:
8390 * @ctxt: an XML parser context
8391 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008392 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008393 *
8394 * [27] Misc ::= Comment | PI | S
8395 */
8396
8397void
8398xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008399 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008400 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008401 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008402 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008403 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008404 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008405 NEXT;
8406 } else
8407 xmlParseComment(ctxt);
8408 }
8409}
8410
8411/**
8412 * xmlParseDocument:
8413 * @ctxt: an XML parser context
8414 *
8415 * parse an XML document (and build a tree if using the standard SAX
8416 * interface).
8417 *
8418 * [1] document ::= prolog element Misc*
8419 *
8420 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8421 *
8422 * Returns 0, -1 in case of error. the parser context is augmented
8423 * as a result of the parsing.
8424 */
8425
8426int
8427xmlParseDocument(xmlParserCtxtPtr ctxt) {
8428 xmlChar start[4];
8429 xmlCharEncoding enc;
8430
8431 xmlInitParser();
8432
8433 GROW;
8434
8435 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008436 * SAX: detecting the level.
8437 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008438 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439
8440 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008441 * SAX: beginning of the document processing.
8442 */
8443 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8444 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8445
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008446 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8447 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008448 /*
8449 * Get the 4 first bytes and decode the charset
8450 * if enc != XML_CHAR_ENCODING_NONE
8451 * plug some encoding conversion routines.
8452 */
8453 start[0] = RAW;
8454 start[1] = NXT(1);
8455 start[2] = NXT(2);
8456 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008457 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008458 if (enc != XML_CHAR_ENCODING_NONE) {
8459 xmlSwitchEncoding(ctxt, enc);
8460 }
Owen Taylor3473f882001-02-23 17:55:21 +00008461 }
8462
8463
8464 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008465 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008466 }
8467
8468 /*
8469 * Check for the XMLDecl in the Prolog.
8470 */
8471 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008472 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008473
8474 /*
8475 * Note that we will switch encoding on the fly.
8476 */
8477 xmlParseXMLDecl(ctxt);
8478 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8479 /*
8480 * The XML REC instructs us to stop parsing right here
8481 */
8482 return(-1);
8483 }
8484 ctxt->standalone = ctxt->input->standalone;
8485 SKIP_BLANKS;
8486 } else {
8487 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8488 }
8489 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8490 ctxt->sax->startDocument(ctxt->userData);
8491
8492 /*
8493 * The Misc part of the Prolog
8494 */
8495 GROW;
8496 xmlParseMisc(ctxt);
8497
8498 /*
8499 * Then possibly doc type declaration(s) and more Misc
8500 * (doctypedecl Misc*)?
8501 */
8502 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008503 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008504
8505 ctxt->inSubset = 1;
8506 xmlParseDocTypeDecl(ctxt);
8507 if (RAW == '[') {
8508 ctxt->instate = XML_PARSER_DTD;
8509 xmlParseInternalSubset(ctxt);
8510 }
8511
8512 /*
8513 * Create and update the external subset.
8514 */
8515 ctxt->inSubset = 2;
8516 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8517 (!ctxt->disableSAX))
8518 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8519 ctxt->extSubSystem, ctxt->extSubURI);
8520 ctxt->inSubset = 0;
8521
8522
8523 ctxt->instate = XML_PARSER_PROLOG;
8524 xmlParseMisc(ctxt);
8525 }
8526
8527 /*
8528 * Time to start parsing the tree itself
8529 */
8530 GROW;
8531 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008532 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8533 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008534 } else {
8535 ctxt->instate = XML_PARSER_CONTENT;
8536 xmlParseElement(ctxt);
8537 ctxt->instate = XML_PARSER_EPILOG;
8538
8539
8540 /*
8541 * The Misc part at the end
8542 */
8543 xmlParseMisc(ctxt);
8544
Daniel Veillard561b7f82002-03-20 21:55:57 +00008545 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008546 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008547 }
8548 ctxt->instate = XML_PARSER_EOF;
8549 }
8550
8551 /*
8552 * SAX: end of the document processing.
8553 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008554 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008555 ctxt->sax->endDocument(ctxt->userData);
8556
Daniel Veillard5997aca2002-03-18 18:36:20 +00008557 /*
8558 * Remove locally kept entity definitions if the tree was not built
8559 */
8560 if ((ctxt->myDoc != NULL) &&
8561 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8562 xmlFreeDoc(ctxt->myDoc);
8563 ctxt->myDoc = NULL;
8564 }
8565
Daniel Veillardc7612992002-02-17 22:47:37 +00008566 if (! ctxt->wellFormed) {
8567 ctxt->valid = 0;
8568 return(-1);
8569 }
Owen Taylor3473f882001-02-23 17:55:21 +00008570 return(0);
8571}
8572
8573/**
8574 * xmlParseExtParsedEnt:
8575 * @ctxt: an XML parser context
8576 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008577 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008578 * An external general parsed entity is well-formed if it matches the
8579 * production labeled extParsedEnt.
8580 *
8581 * [78] extParsedEnt ::= TextDecl? content
8582 *
8583 * Returns 0, -1 in case of error. the parser context is augmented
8584 * as a result of the parsing.
8585 */
8586
8587int
8588xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8589 xmlChar start[4];
8590 xmlCharEncoding enc;
8591
8592 xmlDefaultSAXHandlerInit();
8593
Daniel Veillard309f81d2003-09-23 09:02:53 +00008594 xmlDetectSAX2(ctxt);
8595
Owen Taylor3473f882001-02-23 17:55:21 +00008596 GROW;
8597
8598 /*
8599 * SAX: beginning of the document processing.
8600 */
8601 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8602 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8603
8604 /*
8605 * Get the 4 first bytes and decode the charset
8606 * if enc != XML_CHAR_ENCODING_NONE
8607 * plug some encoding conversion routines.
8608 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008609 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8610 start[0] = RAW;
8611 start[1] = NXT(1);
8612 start[2] = NXT(2);
8613 start[3] = NXT(3);
8614 enc = xmlDetectCharEncoding(start, 4);
8615 if (enc != XML_CHAR_ENCODING_NONE) {
8616 xmlSwitchEncoding(ctxt, enc);
8617 }
Owen Taylor3473f882001-02-23 17:55:21 +00008618 }
8619
8620
8621 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 }
8624
8625 /*
8626 * Check for the XMLDecl in the Prolog.
8627 */
8628 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008629 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008630
8631 /*
8632 * Note that we will switch encoding on the fly.
8633 */
8634 xmlParseXMLDecl(ctxt);
8635 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8636 /*
8637 * The XML REC instructs us to stop parsing right here
8638 */
8639 return(-1);
8640 }
8641 SKIP_BLANKS;
8642 } else {
8643 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8644 }
8645 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8646 ctxt->sax->startDocument(ctxt->userData);
8647
8648 /*
8649 * Doing validity checking on chunk doesn't make sense
8650 */
8651 ctxt->instate = XML_PARSER_CONTENT;
8652 ctxt->validate = 0;
8653 ctxt->loadsubset = 0;
8654 ctxt->depth = 0;
8655
8656 xmlParseContent(ctxt);
8657
8658 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008659 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008660 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008661 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008662 }
8663
8664 /*
8665 * SAX: end of the document processing.
8666 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008667 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008668 ctxt->sax->endDocument(ctxt->userData);
8669
8670 if (! ctxt->wellFormed) return(-1);
8671 return(0);
8672}
8673
Daniel Veillard73b013f2003-09-30 12:36:01 +00008674#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008675/************************************************************************
8676 * *
8677 * Progressive parsing interfaces *
8678 * *
8679 ************************************************************************/
8680
8681/**
8682 * xmlParseLookupSequence:
8683 * @ctxt: an XML parser context
8684 * @first: the first char to lookup
8685 * @next: the next char to lookup or zero
8686 * @third: the next char to lookup or zero
8687 *
8688 * Try to find if a sequence (first, next, third) or just (first next) or
8689 * (first) is available in the input stream.
8690 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8691 * to avoid rescanning sequences of bytes, it DOES change the state of the
8692 * parser, do not use liberally.
8693 *
8694 * Returns the index to the current parsing point if the full sequence
8695 * is available, -1 otherwise.
8696 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008697static int
Owen Taylor3473f882001-02-23 17:55:21 +00008698xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8699 xmlChar next, xmlChar third) {
8700 int base, len;
8701 xmlParserInputPtr in;
8702 const xmlChar *buf;
8703
8704 in = ctxt->input;
8705 if (in == NULL) return(-1);
8706 base = in->cur - in->base;
8707 if (base < 0) return(-1);
8708 if (ctxt->checkIndex > base)
8709 base = ctxt->checkIndex;
8710 if (in->buf == NULL) {
8711 buf = in->base;
8712 len = in->length;
8713 } else {
8714 buf = in->buf->buffer->content;
8715 len = in->buf->buffer->use;
8716 }
8717 /* take into account the sequence length */
8718 if (third) len -= 2;
8719 else if (next) len --;
8720 for (;base < len;base++) {
8721 if (buf[base] == first) {
8722 if (third != 0) {
8723 if ((buf[base + 1] != next) ||
8724 (buf[base + 2] != third)) continue;
8725 } else if (next != 0) {
8726 if (buf[base + 1] != next) continue;
8727 }
8728 ctxt->checkIndex = 0;
8729#ifdef DEBUG_PUSH
8730 if (next == 0)
8731 xmlGenericError(xmlGenericErrorContext,
8732 "PP: lookup '%c' found at %d\n",
8733 first, base);
8734 else if (third == 0)
8735 xmlGenericError(xmlGenericErrorContext,
8736 "PP: lookup '%c%c' found at %d\n",
8737 first, next, base);
8738 else
8739 xmlGenericError(xmlGenericErrorContext,
8740 "PP: lookup '%c%c%c' found at %d\n",
8741 first, next, third, base);
8742#endif
8743 return(base - (in->cur - in->base));
8744 }
8745 }
8746 ctxt->checkIndex = base;
8747#ifdef DEBUG_PUSH
8748 if (next == 0)
8749 xmlGenericError(xmlGenericErrorContext,
8750 "PP: lookup '%c' failed\n", first);
8751 else if (third == 0)
8752 xmlGenericError(xmlGenericErrorContext,
8753 "PP: lookup '%c%c' failed\n", first, next);
8754 else
8755 xmlGenericError(xmlGenericErrorContext,
8756 "PP: lookup '%c%c%c' failed\n", first, next, third);
8757#endif
8758 return(-1);
8759}
8760
8761/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008762 * xmlParseGetLasts:
8763 * @ctxt: an XML parser context
8764 * @lastlt: pointer to store the last '<' from the input
8765 * @lastgt: pointer to store the last '>' from the input
8766 *
8767 * Lookup the last < and > in the current chunk
8768 */
8769static void
8770xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8771 const xmlChar **lastgt) {
8772 const xmlChar *tmp;
8773
8774 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8775 xmlGenericError(xmlGenericErrorContext,
8776 "Internal error: xmlParseGetLasts\n");
8777 return;
8778 }
8779 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8780 tmp = ctxt->input->end;
8781 tmp--;
8782 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8783 (*tmp != '>')) tmp--;
8784 if (tmp < ctxt->input->base) {
8785 *lastlt = NULL;
8786 *lastgt = NULL;
8787 } else if (*tmp == '<') {
8788 *lastlt = tmp;
8789 tmp--;
8790 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8791 if (tmp < ctxt->input->base)
8792 *lastgt = NULL;
8793 else
8794 *lastgt = tmp;
8795 } else {
8796 *lastgt = tmp;
8797 tmp--;
8798 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8799 if (tmp < ctxt->input->base)
8800 *lastlt = NULL;
8801 else
8802 *lastlt = tmp;
8803 }
8804
8805 } else {
8806 *lastlt = NULL;
8807 *lastgt = NULL;
8808 }
8809}
8810/**
Owen Taylor3473f882001-02-23 17:55:21 +00008811 * xmlParseTryOrFinish:
8812 * @ctxt: an XML parser context
8813 * @terminate: last chunk indicator
8814 *
8815 * Try to progress on parsing
8816 *
8817 * Returns zero if no parsing was possible
8818 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008819static int
Owen Taylor3473f882001-02-23 17:55:21 +00008820xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8821 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008822 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008823 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008824 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008825
8826#ifdef DEBUG_PUSH
8827 switch (ctxt->instate) {
8828 case XML_PARSER_EOF:
8829 xmlGenericError(xmlGenericErrorContext,
8830 "PP: try EOF\n"); break;
8831 case XML_PARSER_START:
8832 xmlGenericError(xmlGenericErrorContext,
8833 "PP: try START\n"); break;
8834 case XML_PARSER_MISC:
8835 xmlGenericError(xmlGenericErrorContext,
8836 "PP: try MISC\n");break;
8837 case XML_PARSER_COMMENT:
8838 xmlGenericError(xmlGenericErrorContext,
8839 "PP: try COMMENT\n");break;
8840 case XML_PARSER_PROLOG:
8841 xmlGenericError(xmlGenericErrorContext,
8842 "PP: try PROLOG\n");break;
8843 case XML_PARSER_START_TAG:
8844 xmlGenericError(xmlGenericErrorContext,
8845 "PP: try START_TAG\n");break;
8846 case XML_PARSER_CONTENT:
8847 xmlGenericError(xmlGenericErrorContext,
8848 "PP: try CONTENT\n");break;
8849 case XML_PARSER_CDATA_SECTION:
8850 xmlGenericError(xmlGenericErrorContext,
8851 "PP: try CDATA_SECTION\n");break;
8852 case XML_PARSER_END_TAG:
8853 xmlGenericError(xmlGenericErrorContext,
8854 "PP: try END_TAG\n");break;
8855 case XML_PARSER_ENTITY_DECL:
8856 xmlGenericError(xmlGenericErrorContext,
8857 "PP: try ENTITY_DECL\n");break;
8858 case XML_PARSER_ENTITY_VALUE:
8859 xmlGenericError(xmlGenericErrorContext,
8860 "PP: try ENTITY_VALUE\n");break;
8861 case XML_PARSER_ATTRIBUTE_VALUE:
8862 xmlGenericError(xmlGenericErrorContext,
8863 "PP: try ATTRIBUTE_VALUE\n");break;
8864 case XML_PARSER_DTD:
8865 xmlGenericError(xmlGenericErrorContext,
8866 "PP: try DTD\n");break;
8867 case XML_PARSER_EPILOG:
8868 xmlGenericError(xmlGenericErrorContext,
8869 "PP: try EPILOG\n");break;
8870 case XML_PARSER_PI:
8871 xmlGenericError(xmlGenericErrorContext,
8872 "PP: try PI\n");break;
8873 case XML_PARSER_IGNORE:
8874 xmlGenericError(xmlGenericErrorContext,
8875 "PP: try IGNORE\n");break;
8876 }
8877#endif
8878
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008879 if ((ctxt->input != NULL) &&
8880 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008881 xmlSHRINK(ctxt);
8882 ctxt->checkIndex = 0;
8883 }
8884 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008885
Daniel Veillarda880b122003-04-21 21:36:41 +00008886 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008887 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8888 return(0);
8889
8890
Owen Taylor3473f882001-02-23 17:55:21 +00008891 /*
8892 * Pop-up of finished entities.
8893 */
8894 while ((RAW == 0) && (ctxt->inputNr > 1))
8895 xmlPopInput(ctxt);
8896
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008897 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008898 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008899 avail = ctxt->input->length -
8900 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008901 else {
8902 /*
8903 * If we are operating on converted input, try to flush
8904 * remainng chars to avoid them stalling in the non-converted
8905 * buffer.
8906 */
8907 if ((ctxt->input->buf->raw != NULL) &&
8908 (ctxt->input->buf->raw->use > 0)) {
8909 int base = ctxt->input->base -
8910 ctxt->input->buf->buffer->content;
8911 int current = ctxt->input->cur - ctxt->input->base;
8912
8913 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8914 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8915 ctxt->input->cur = ctxt->input->base + current;
8916 ctxt->input->end =
8917 &ctxt->input->buf->buffer->content[
8918 ctxt->input->buf->buffer->use];
8919 }
8920 avail = ctxt->input->buf->buffer->use -
8921 (ctxt->input->cur - ctxt->input->base);
8922 }
Owen Taylor3473f882001-02-23 17:55:21 +00008923 if (avail < 1)
8924 goto done;
8925 switch (ctxt->instate) {
8926 case XML_PARSER_EOF:
8927 /*
8928 * Document parsing is done !
8929 */
8930 goto done;
8931 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008932 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8933 xmlChar start[4];
8934 xmlCharEncoding enc;
8935
8936 /*
8937 * Very first chars read from the document flow.
8938 */
8939 if (avail < 4)
8940 goto done;
8941
8942 /*
8943 * Get the 4 first bytes and decode the charset
8944 * if enc != XML_CHAR_ENCODING_NONE
8945 * plug some encoding conversion routines.
8946 */
8947 start[0] = RAW;
8948 start[1] = NXT(1);
8949 start[2] = NXT(2);
8950 start[3] = NXT(3);
8951 enc = xmlDetectCharEncoding(start, 4);
8952 if (enc != XML_CHAR_ENCODING_NONE) {
8953 xmlSwitchEncoding(ctxt, enc);
8954 }
8955 break;
8956 }
Owen Taylor3473f882001-02-23 17:55:21 +00008957
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008958 if (avail < 2)
8959 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008960 cur = ctxt->input->cur[0];
8961 next = ctxt->input->cur[1];
8962 if (cur == 0) {
8963 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8964 ctxt->sax->setDocumentLocator(ctxt->userData,
8965 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 ctxt->instate = XML_PARSER_EOF;
8968#ifdef DEBUG_PUSH
8969 xmlGenericError(xmlGenericErrorContext,
8970 "PP: entering EOF\n");
8971#endif
8972 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8973 ctxt->sax->endDocument(ctxt->userData);
8974 goto done;
8975 }
8976 if ((cur == '<') && (next == '?')) {
8977 /* PI or XML decl */
8978 if (avail < 5) return(ret);
8979 if ((!terminate) &&
8980 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8981 return(ret);
8982 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8983 ctxt->sax->setDocumentLocator(ctxt->userData,
8984 &xmlDefaultSAXLocator);
8985 if ((ctxt->input->cur[2] == 'x') &&
8986 (ctxt->input->cur[3] == 'm') &&
8987 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008988 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008989 ret += 5;
8990#ifdef DEBUG_PUSH
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: Parsing XML Decl\n");
8993#endif
8994 xmlParseXMLDecl(ctxt);
8995 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8996 /*
8997 * The XML REC instructs us to stop parsing right
8998 * here
8999 */
9000 ctxt->instate = XML_PARSER_EOF;
9001 return(0);
9002 }
9003 ctxt->standalone = ctxt->input->standalone;
9004 if ((ctxt->encoding == NULL) &&
9005 (ctxt->input->encoding != NULL))
9006 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9007 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9008 (!ctxt->disableSAX))
9009 ctxt->sax->startDocument(ctxt->userData);
9010 ctxt->instate = XML_PARSER_MISC;
9011#ifdef DEBUG_PUSH
9012 xmlGenericError(xmlGenericErrorContext,
9013 "PP: entering MISC\n");
9014#endif
9015 } else {
9016 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9017 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9018 (!ctxt->disableSAX))
9019 ctxt->sax->startDocument(ctxt->userData);
9020 ctxt->instate = XML_PARSER_MISC;
9021#ifdef DEBUG_PUSH
9022 xmlGenericError(xmlGenericErrorContext,
9023 "PP: entering MISC\n");
9024#endif
9025 }
9026 } else {
9027 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9028 ctxt->sax->setDocumentLocator(ctxt->userData,
9029 &xmlDefaultSAXLocator);
9030 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9031 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9032 (!ctxt->disableSAX))
9033 ctxt->sax->startDocument(ctxt->userData);
9034 ctxt->instate = XML_PARSER_MISC;
9035#ifdef DEBUG_PUSH
9036 xmlGenericError(xmlGenericErrorContext,
9037 "PP: entering MISC\n");
9038#endif
9039 }
9040 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009041 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009042 const xmlChar *name;
9043 const xmlChar *prefix;
9044 const xmlChar *URI;
9045 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009046
9047 if ((avail < 2) && (ctxt->inputNr == 1))
9048 goto done;
9049 cur = ctxt->input->cur[0];
9050 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009051 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009052 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009053 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9054 ctxt->sax->endDocument(ctxt->userData);
9055 goto done;
9056 }
9057 if (!terminate) {
9058 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009059 /* > can be found unescaped in attribute values */
9060 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009061 goto done;
9062 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9063 goto done;
9064 }
9065 }
9066 if (ctxt->spaceNr == 0)
9067 spacePush(ctxt, -1);
9068 else
9069 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009070#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009071 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009072#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009073 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009074#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009075 else
9076 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009077#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009078 if (name == NULL) {
9079 spacePop(ctxt);
9080 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009081 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9082 ctxt->sax->endDocument(ctxt->userData);
9083 goto done;
9084 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009085#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009086 /*
9087 * [ VC: Root Element Type ]
9088 * The Name in the document type declaration must match
9089 * the element type of the root element.
9090 */
9091 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9092 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9093 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009094#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009095
9096 /*
9097 * Check for an Empty Element.
9098 */
9099 if ((RAW == '/') && (NXT(1) == '>')) {
9100 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009101
9102 if (ctxt->sax2) {
9103 if ((ctxt->sax != NULL) &&
9104 (ctxt->sax->endElementNs != NULL) &&
9105 (!ctxt->disableSAX))
9106 ctxt->sax->endElementNs(ctxt->userData, name,
9107 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009108#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009109 } else {
9110 if ((ctxt->sax != NULL) &&
9111 (ctxt->sax->endElement != NULL) &&
9112 (!ctxt->disableSAX))
9113 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009114#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009115 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009116 spacePop(ctxt);
9117 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009118 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009119 } else {
9120 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009121 }
9122 break;
9123 }
9124 if (RAW == '>') {
9125 NEXT;
9126 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009127 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009128 "Couldn't find end of Start Tag %s\n",
9129 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009130 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009131 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009133 if (ctxt->sax2)
9134 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009135#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009136 else
9137 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009138#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009139
Daniel Veillarda880b122003-04-21 21:36:41 +00009140 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009141 break;
9142 }
9143 case XML_PARSER_CONTENT: {
9144 const xmlChar *test;
9145 unsigned int cons;
9146 if ((avail < 2) && (ctxt->inputNr == 1))
9147 goto done;
9148 cur = ctxt->input->cur[0];
9149 next = ctxt->input->cur[1];
9150
9151 test = CUR_PTR;
9152 cons = ctxt->input->consumed;
9153 if ((cur == '<') && (next == '/')) {
9154 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009155 break;
9156 } else if ((cur == '<') && (next == '?')) {
9157 if ((!terminate) &&
9158 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9159 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009160 xmlParsePI(ctxt);
9161 } else if ((cur == '<') && (next != '!')) {
9162 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009163 break;
9164 } else if ((cur == '<') && (next == '!') &&
9165 (ctxt->input->cur[2] == '-') &&
9166 (ctxt->input->cur[3] == '-')) {
9167 if ((!terminate) &&
9168 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9169 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009170 xmlParseComment(ctxt);
9171 ctxt->instate = XML_PARSER_CONTENT;
9172 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9173 (ctxt->input->cur[2] == '[') &&
9174 (ctxt->input->cur[3] == 'C') &&
9175 (ctxt->input->cur[4] == 'D') &&
9176 (ctxt->input->cur[5] == 'A') &&
9177 (ctxt->input->cur[6] == 'T') &&
9178 (ctxt->input->cur[7] == 'A') &&
9179 (ctxt->input->cur[8] == '[')) {
9180 SKIP(9);
9181 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009182 break;
9183 } else if ((cur == '<') && (next == '!') &&
9184 (avail < 9)) {
9185 goto done;
9186 } else if (cur == '&') {
9187 if ((!terminate) &&
9188 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9189 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009190 xmlParseReference(ctxt);
9191 } else {
9192 /* TODO Avoid the extra copy, handle directly !!! */
9193 /*
9194 * Goal of the following test is:
9195 * - minimize calls to the SAX 'character' callback
9196 * when they are mergeable
9197 * - handle an problem for isBlank when we only parse
9198 * a sequence of blank chars and the next one is
9199 * not available to check against '<' presence.
9200 * - tries to homogenize the differences in SAX
9201 * callbacks between the push and pull versions
9202 * of the parser.
9203 */
9204 if ((ctxt->inputNr == 1) &&
9205 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9206 if (!terminate) {
9207 if (ctxt->progressive) {
9208 if ((lastlt == NULL) ||
9209 (ctxt->input->cur > lastlt))
9210 goto done;
9211 } else if (xmlParseLookupSequence(ctxt,
9212 '<', 0, 0) < 0) {
9213 goto done;
9214 }
9215 }
9216 }
9217 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009218 xmlParseCharData(ctxt, 0);
9219 }
9220 /*
9221 * Pop-up of finished entities.
9222 */
9223 while ((RAW == 0) && (ctxt->inputNr > 1))
9224 xmlPopInput(ctxt);
9225 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009226 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9227 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009228 ctxt->instate = XML_PARSER_EOF;
9229 break;
9230 }
9231 break;
9232 }
9233 case XML_PARSER_END_TAG:
9234 if (avail < 2)
9235 goto done;
9236 if (!terminate) {
9237 if (ctxt->progressive) {
9238 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9239 goto done;
9240 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9241 goto done;
9242 }
9243 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009244 if (ctxt->sax2) {
9245 xmlParseEndTag2(ctxt,
9246 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9247 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009248 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009249 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009250 }
9251#ifdef LIBXML_SAX1_ENABLED
9252 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009253 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009254#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009256 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009257 } else {
9258 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009259 }
9260 break;
9261 case XML_PARSER_CDATA_SECTION: {
9262 /*
9263 * The Push mode need to have the SAX callback for
9264 * cdataBlock merge back contiguous callbacks.
9265 */
9266 int base;
9267
9268 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9269 if (base < 0) {
9270 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9271 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9272 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009273 ctxt->sax->cdataBlock(ctxt->userData,
9274 ctxt->input->cur,
9275 XML_PARSER_BIG_BUFFER_SIZE);
9276 else if (ctxt->sax->characters != NULL)
9277 ctxt->sax->characters(ctxt->userData,
9278 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009279 XML_PARSER_BIG_BUFFER_SIZE);
9280 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009281 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009282 ctxt->checkIndex = 0;
9283 }
9284 goto done;
9285 } else {
9286 if ((ctxt->sax != NULL) && (base > 0) &&
9287 (!ctxt->disableSAX)) {
9288 if (ctxt->sax->cdataBlock != NULL)
9289 ctxt->sax->cdataBlock(ctxt->userData,
9290 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009291 else if (ctxt->sax->characters != NULL)
9292 ctxt->sax->characters(ctxt->userData,
9293 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009294 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009295 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009296 ctxt->checkIndex = 0;
9297 ctxt->instate = XML_PARSER_CONTENT;
9298#ifdef DEBUG_PUSH
9299 xmlGenericError(xmlGenericErrorContext,
9300 "PP: entering CONTENT\n");
9301#endif
9302 }
9303 break;
9304 }
Owen Taylor3473f882001-02-23 17:55:21 +00009305 case XML_PARSER_MISC:
9306 SKIP_BLANKS;
9307 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009308 avail = ctxt->input->length -
9309 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009310 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 avail = ctxt->input->buf->buffer->use -
9312 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009313 if (avail < 2)
9314 goto done;
9315 cur = ctxt->input->cur[0];
9316 next = ctxt->input->cur[1];
9317 if ((cur == '<') && (next == '?')) {
9318 if ((!terminate) &&
9319 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9320 goto done;
9321#ifdef DEBUG_PUSH
9322 xmlGenericError(xmlGenericErrorContext,
9323 "PP: Parsing PI\n");
9324#endif
9325 xmlParsePI(ctxt);
9326 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009327 (ctxt->input->cur[2] == '-') &&
9328 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009329 if ((!terminate) &&
9330 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9331 goto done;
9332#ifdef DEBUG_PUSH
9333 xmlGenericError(xmlGenericErrorContext,
9334 "PP: Parsing Comment\n");
9335#endif
9336 xmlParseComment(ctxt);
9337 ctxt->instate = XML_PARSER_MISC;
9338 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009339 (ctxt->input->cur[2] == 'D') &&
9340 (ctxt->input->cur[3] == 'O') &&
9341 (ctxt->input->cur[4] == 'C') &&
9342 (ctxt->input->cur[5] == 'T') &&
9343 (ctxt->input->cur[6] == 'Y') &&
9344 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009345 (ctxt->input->cur[8] == 'E')) {
9346 if ((!terminate) &&
9347 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9348 goto done;
9349#ifdef DEBUG_PUSH
9350 xmlGenericError(xmlGenericErrorContext,
9351 "PP: Parsing internal subset\n");
9352#endif
9353 ctxt->inSubset = 1;
9354 xmlParseDocTypeDecl(ctxt);
9355 if (RAW == '[') {
9356 ctxt->instate = XML_PARSER_DTD;
9357#ifdef DEBUG_PUSH
9358 xmlGenericError(xmlGenericErrorContext,
9359 "PP: entering DTD\n");
9360#endif
9361 } else {
9362 /*
9363 * Create and update the external subset.
9364 */
9365 ctxt->inSubset = 2;
9366 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9367 (ctxt->sax->externalSubset != NULL))
9368 ctxt->sax->externalSubset(ctxt->userData,
9369 ctxt->intSubName, ctxt->extSubSystem,
9370 ctxt->extSubURI);
9371 ctxt->inSubset = 0;
9372 ctxt->instate = XML_PARSER_PROLOG;
9373#ifdef DEBUG_PUSH
9374 xmlGenericError(xmlGenericErrorContext,
9375 "PP: entering PROLOG\n");
9376#endif
9377 }
9378 } else if ((cur == '<') && (next == '!') &&
9379 (avail < 9)) {
9380 goto done;
9381 } else {
9382 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009383 ctxt->progressive = 1;
9384 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009385#ifdef DEBUG_PUSH
9386 xmlGenericError(xmlGenericErrorContext,
9387 "PP: entering START_TAG\n");
9388#endif
9389 }
9390 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009391 case XML_PARSER_PROLOG:
9392 SKIP_BLANKS;
9393 if (ctxt->input->buf == NULL)
9394 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9395 else
9396 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9397 if (avail < 2)
9398 goto done;
9399 cur = ctxt->input->cur[0];
9400 next = ctxt->input->cur[1];
9401 if ((cur == '<') && (next == '?')) {
9402 if ((!terminate) &&
9403 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9404 goto done;
9405#ifdef DEBUG_PUSH
9406 xmlGenericError(xmlGenericErrorContext,
9407 "PP: Parsing PI\n");
9408#endif
9409 xmlParsePI(ctxt);
9410 } else if ((cur == '<') && (next == '!') &&
9411 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9412 if ((!terminate) &&
9413 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9414 goto done;
9415#ifdef DEBUG_PUSH
9416 xmlGenericError(xmlGenericErrorContext,
9417 "PP: Parsing Comment\n");
9418#endif
9419 xmlParseComment(ctxt);
9420 ctxt->instate = XML_PARSER_PROLOG;
9421 } else if ((cur == '<') && (next == '!') &&
9422 (avail < 4)) {
9423 goto done;
9424 } else {
9425 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009426 ctxt->progressive = 1;
9427 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009428#ifdef DEBUG_PUSH
9429 xmlGenericError(xmlGenericErrorContext,
9430 "PP: entering START_TAG\n");
9431#endif
9432 }
9433 break;
9434 case XML_PARSER_EPILOG:
9435 SKIP_BLANKS;
9436 if (ctxt->input->buf == NULL)
9437 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9438 else
9439 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9440 if (avail < 2)
9441 goto done;
9442 cur = ctxt->input->cur[0];
9443 next = ctxt->input->cur[1];
9444 if ((cur == '<') && (next == '?')) {
9445 if ((!terminate) &&
9446 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9447 goto done;
9448#ifdef DEBUG_PUSH
9449 xmlGenericError(xmlGenericErrorContext,
9450 "PP: Parsing PI\n");
9451#endif
9452 xmlParsePI(ctxt);
9453 ctxt->instate = XML_PARSER_EPILOG;
9454 } else if ((cur == '<') && (next == '!') &&
9455 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9456 if ((!terminate) &&
9457 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9458 goto done;
9459#ifdef DEBUG_PUSH
9460 xmlGenericError(xmlGenericErrorContext,
9461 "PP: Parsing Comment\n");
9462#endif
9463 xmlParseComment(ctxt);
9464 ctxt->instate = XML_PARSER_EPILOG;
9465 } else if ((cur == '<') && (next == '!') &&
9466 (avail < 4)) {
9467 goto done;
9468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009469 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009470 ctxt->instate = XML_PARSER_EOF;
9471#ifdef DEBUG_PUSH
9472 xmlGenericError(xmlGenericErrorContext,
9473 "PP: entering EOF\n");
9474#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009476 ctxt->sax->endDocument(ctxt->userData);
9477 goto done;
9478 }
9479 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009480 case XML_PARSER_DTD: {
9481 /*
9482 * Sorry but progressive parsing of the internal subset
9483 * is not expected to be supported. We first check that
9484 * the full content of the internal subset is available and
9485 * the parsing is launched only at that point.
9486 * Internal subset ends up with "']' S? '>'" in an unescaped
9487 * section and not in a ']]>' sequence which are conditional
9488 * sections (whoever argued to keep that crap in XML deserve
9489 * a place in hell !).
9490 */
9491 int base, i;
9492 xmlChar *buf;
9493 xmlChar quote = 0;
9494
9495 base = ctxt->input->cur - ctxt->input->base;
9496 if (base < 0) return(0);
9497 if (ctxt->checkIndex > base)
9498 base = ctxt->checkIndex;
9499 buf = ctxt->input->buf->buffer->content;
9500 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9501 base++) {
9502 if (quote != 0) {
9503 if (buf[base] == quote)
9504 quote = 0;
9505 continue;
9506 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009507 if ((quote == 0) && (buf[base] == '<')) {
9508 int found = 0;
9509 /* special handling of comments */
9510 if (((unsigned int) base + 4 <
9511 ctxt->input->buf->buffer->use) &&
9512 (buf[base + 1] == '!') &&
9513 (buf[base + 2] == '-') &&
9514 (buf[base + 3] == '-')) {
9515 for (;(unsigned int) base + 3 <
9516 ctxt->input->buf->buffer->use; base++) {
9517 if ((buf[base] == '-') &&
9518 (buf[base + 1] == '-') &&
9519 (buf[base + 2] == '>')) {
9520 found = 1;
9521 base += 2;
9522 break;
9523 }
9524 }
9525 if (!found)
9526 break;
9527 continue;
9528 }
9529 }
Owen Taylor3473f882001-02-23 17:55:21 +00009530 if (buf[base] == '"') {
9531 quote = '"';
9532 continue;
9533 }
9534 if (buf[base] == '\'') {
9535 quote = '\'';
9536 continue;
9537 }
9538 if (buf[base] == ']') {
9539 if ((unsigned int) base +1 >=
9540 ctxt->input->buf->buffer->use)
9541 break;
9542 if (buf[base + 1] == ']') {
9543 /* conditional crap, skip both ']' ! */
9544 base++;
9545 continue;
9546 }
9547 for (i = 0;
9548 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9549 i++) {
9550 if (buf[base + i] == '>')
9551 goto found_end_int_subset;
9552 }
9553 break;
9554 }
9555 }
9556 /*
9557 * We didn't found the end of the Internal subset
9558 */
9559 if (quote == 0)
9560 ctxt->checkIndex = base;
9561#ifdef DEBUG_PUSH
9562 if (next == 0)
9563 xmlGenericError(xmlGenericErrorContext,
9564 "PP: lookup of int subset end filed\n");
9565#endif
9566 goto done;
9567
9568found_end_int_subset:
9569 xmlParseInternalSubset(ctxt);
9570 ctxt->inSubset = 2;
9571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9572 (ctxt->sax->externalSubset != NULL))
9573 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9574 ctxt->extSubSystem, ctxt->extSubURI);
9575 ctxt->inSubset = 0;
9576 ctxt->instate = XML_PARSER_PROLOG;
9577 ctxt->checkIndex = 0;
9578#ifdef DEBUG_PUSH
9579 xmlGenericError(xmlGenericErrorContext,
9580 "PP: entering PROLOG\n");
9581#endif
9582 break;
9583 }
9584 case XML_PARSER_COMMENT:
9585 xmlGenericError(xmlGenericErrorContext,
9586 "PP: internal error, state == COMMENT\n");
9587 ctxt->instate = XML_PARSER_CONTENT;
9588#ifdef DEBUG_PUSH
9589 xmlGenericError(xmlGenericErrorContext,
9590 "PP: entering CONTENT\n");
9591#endif
9592 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009593 case XML_PARSER_IGNORE:
9594 xmlGenericError(xmlGenericErrorContext,
9595 "PP: internal error, state == IGNORE");
9596 ctxt->instate = XML_PARSER_DTD;
9597#ifdef DEBUG_PUSH
9598 xmlGenericError(xmlGenericErrorContext,
9599 "PP: entering DTD\n");
9600#endif
9601 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009602 case XML_PARSER_PI:
9603 xmlGenericError(xmlGenericErrorContext,
9604 "PP: internal error, state == PI\n");
9605 ctxt->instate = XML_PARSER_CONTENT;
9606#ifdef DEBUG_PUSH
9607 xmlGenericError(xmlGenericErrorContext,
9608 "PP: entering CONTENT\n");
9609#endif
9610 break;
9611 case XML_PARSER_ENTITY_DECL:
9612 xmlGenericError(xmlGenericErrorContext,
9613 "PP: internal error, state == ENTITY_DECL\n");
9614 ctxt->instate = XML_PARSER_DTD;
9615#ifdef DEBUG_PUSH
9616 xmlGenericError(xmlGenericErrorContext,
9617 "PP: entering DTD\n");
9618#endif
9619 break;
9620 case XML_PARSER_ENTITY_VALUE:
9621 xmlGenericError(xmlGenericErrorContext,
9622 "PP: internal error, state == ENTITY_VALUE\n");
9623 ctxt->instate = XML_PARSER_CONTENT;
9624#ifdef DEBUG_PUSH
9625 xmlGenericError(xmlGenericErrorContext,
9626 "PP: entering DTD\n");
9627#endif
9628 break;
9629 case XML_PARSER_ATTRIBUTE_VALUE:
9630 xmlGenericError(xmlGenericErrorContext,
9631 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9632 ctxt->instate = XML_PARSER_START_TAG;
9633#ifdef DEBUG_PUSH
9634 xmlGenericError(xmlGenericErrorContext,
9635 "PP: entering START_TAG\n");
9636#endif
9637 break;
9638 case XML_PARSER_SYSTEM_LITERAL:
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: internal error, state == SYSTEM_LITERAL\n");
9641 ctxt->instate = XML_PARSER_START_TAG;
9642#ifdef DEBUG_PUSH
9643 xmlGenericError(xmlGenericErrorContext,
9644 "PP: entering START_TAG\n");
9645#endif
9646 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009647 case XML_PARSER_PUBLIC_LITERAL:
9648 xmlGenericError(xmlGenericErrorContext,
9649 "PP: internal error, state == PUBLIC_LITERAL\n");
9650 ctxt->instate = XML_PARSER_START_TAG;
9651#ifdef DEBUG_PUSH
9652 xmlGenericError(xmlGenericErrorContext,
9653 "PP: entering START_TAG\n");
9654#endif
9655 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009656 }
9657 }
9658done:
9659#ifdef DEBUG_PUSH
9660 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9661#endif
9662 return(ret);
9663}
9664
9665/**
Owen Taylor3473f882001-02-23 17:55:21 +00009666 * xmlParseChunk:
9667 * @ctxt: an XML parser context
9668 * @chunk: an char array
9669 * @size: the size in byte of the chunk
9670 * @terminate: last chunk indicator
9671 *
9672 * Parse a Chunk of memory
9673 *
9674 * Returns zero if no error, the xmlParserErrors otherwise.
9675 */
9676int
9677xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9678 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009679 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9680 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009681 if (ctxt->instate == XML_PARSER_START)
9682 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009683 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9684 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9685 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9686 int cur = ctxt->input->cur - ctxt->input->base;
9687
9688 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9689 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9690 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009691 ctxt->input->end =
9692 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009693#ifdef DEBUG_PUSH
9694 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9695#endif
9696
Owen Taylor3473f882001-02-23 17:55:21 +00009697 } else if (ctxt->instate != XML_PARSER_EOF) {
9698 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9699 xmlParserInputBufferPtr in = ctxt->input->buf;
9700 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9701 (in->raw != NULL)) {
9702 int nbchars;
9703
9704 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9705 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009706 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009707 xmlGenericError(xmlGenericErrorContext,
9708 "xmlParseChunk: encoder error\n");
9709 return(XML_ERR_INVALID_ENCODING);
9710 }
9711 }
9712 }
9713 }
9714 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009715 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9716 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009717 if (terminate) {
9718 /*
9719 * Check for termination
9720 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009721 int avail = 0;
9722 if (ctxt->input->buf == NULL)
9723 avail = ctxt->input->length -
9724 (ctxt->input->cur - ctxt->input->base);
9725 else
9726 avail = ctxt->input->buf->buffer->use -
9727 (ctxt->input->cur - ctxt->input->base);
9728
Owen Taylor3473f882001-02-23 17:55:21 +00009729 if ((ctxt->instate != XML_PARSER_EOF) &&
9730 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009731 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009733 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009734 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009735 }
Owen Taylor3473f882001-02-23 17:55:21 +00009736 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009737 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009738 ctxt->sax->endDocument(ctxt->userData);
9739 }
9740 ctxt->instate = XML_PARSER_EOF;
9741 }
9742 return((xmlParserErrors) ctxt->errNo);
9743}
9744
9745/************************************************************************
9746 * *
9747 * I/O front end functions to the parser *
9748 * *
9749 ************************************************************************/
9750
9751/**
9752 * xmlStopParser:
9753 * @ctxt: an XML parser context
9754 *
9755 * Blocks further parser processing
9756 */
9757void
9758xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009759 if (ctxt == NULL)
9760 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009761 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009762 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009763 if (ctxt->input != NULL)
9764 ctxt->input->cur = BAD_CAST"";
9765}
9766
9767/**
9768 * xmlCreatePushParserCtxt:
9769 * @sax: a SAX handler
9770 * @user_data: The user data returned on SAX callbacks
9771 * @chunk: a pointer to an array of chars
9772 * @size: number of chars in the array
9773 * @filename: an optional file name or URI
9774 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009775 * Create a parser context for using the XML parser in push mode.
9776 * If @buffer and @size are non-NULL, the data is used to detect
9777 * the encoding. The remaining characters will be parsed so they
9778 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009779 * To allow content encoding detection, @size should be >= 4
9780 * The value of @filename is used for fetching external entities
9781 * and error/warning reports.
9782 *
9783 * Returns the new parser context or NULL
9784 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009785
Owen Taylor3473f882001-02-23 17:55:21 +00009786xmlParserCtxtPtr
9787xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9788 const char *chunk, int size, const char *filename) {
9789 xmlParserCtxtPtr ctxt;
9790 xmlParserInputPtr inputStream;
9791 xmlParserInputBufferPtr buf;
9792 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9793
9794 /*
9795 * plug some encoding conversion routines
9796 */
9797 if ((chunk != NULL) && (size >= 4))
9798 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9799
9800 buf = xmlAllocParserInputBuffer(enc);
9801 if (buf == NULL) return(NULL);
9802
9803 ctxt = xmlNewParserCtxt();
9804 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009805 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009806 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009807 return(NULL);
9808 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009809 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9810 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009811 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009812 xmlFreeParserInputBuffer(buf);
9813 xmlFreeParserCtxt(ctxt);
9814 return(NULL);
9815 }
Owen Taylor3473f882001-02-23 17:55:21 +00009816 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009817#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009818 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009819#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009820 xmlFree(ctxt->sax);
9821 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9822 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009823 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009824 xmlFreeParserInputBuffer(buf);
9825 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009826 return(NULL);
9827 }
9828 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9829 if (user_data != NULL)
9830 ctxt->userData = user_data;
9831 }
9832 if (filename == NULL) {
9833 ctxt->directory = NULL;
9834 } else {
9835 ctxt->directory = xmlParserGetDirectory(filename);
9836 }
9837
9838 inputStream = xmlNewInputStream(ctxt);
9839 if (inputStream == NULL) {
9840 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009841 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009842 return(NULL);
9843 }
9844
9845 if (filename == NULL)
9846 inputStream->filename = NULL;
9847 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009848 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009849 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009850 inputStream->buf = buf;
9851 inputStream->base = inputStream->buf->buffer->content;
9852 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009853 inputStream->end =
9854 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009855
9856 inputPush(ctxt, inputStream);
9857
9858 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9859 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009860 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9861 int cur = ctxt->input->cur - ctxt->input->base;
9862
Owen Taylor3473f882001-02-23 17:55:21 +00009863 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009864
9865 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9866 ctxt->input->cur = ctxt->input->base + cur;
9867 ctxt->input->end =
9868 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009869#ifdef DEBUG_PUSH
9870 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9871#endif
9872 }
9873
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009874 if (enc != XML_CHAR_ENCODING_NONE) {
9875 xmlSwitchEncoding(ctxt, enc);
9876 }
9877
Owen Taylor3473f882001-02-23 17:55:21 +00009878 return(ctxt);
9879}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009880#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009881
9882/**
9883 * xmlCreateIOParserCtxt:
9884 * @sax: a SAX handler
9885 * @user_data: The user data returned on SAX callbacks
9886 * @ioread: an I/O read function
9887 * @ioclose: an I/O close function
9888 * @ioctx: an I/O handler
9889 * @enc: the charset encoding if known
9890 *
9891 * Create a parser context for using the XML parser with an existing
9892 * I/O stream
9893 *
9894 * Returns the new parser context or NULL
9895 */
9896xmlParserCtxtPtr
9897xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9898 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9899 void *ioctx, xmlCharEncoding enc) {
9900 xmlParserCtxtPtr ctxt;
9901 xmlParserInputPtr inputStream;
9902 xmlParserInputBufferPtr buf;
9903
9904 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9905 if (buf == NULL) return(NULL);
9906
9907 ctxt = xmlNewParserCtxt();
9908 if (ctxt == NULL) {
9909 xmlFree(buf);
9910 return(NULL);
9911 }
9912 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009913#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009914 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009915#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009916 xmlFree(ctxt->sax);
9917 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9918 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009919 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009920 xmlFree(ctxt);
9921 return(NULL);
9922 }
9923 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9924 if (user_data != NULL)
9925 ctxt->userData = user_data;
9926 }
9927
9928 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9929 if (inputStream == NULL) {
9930 xmlFreeParserCtxt(ctxt);
9931 return(NULL);
9932 }
9933 inputPush(ctxt, inputStream);
9934
9935 return(ctxt);
9936}
9937
Daniel Veillard4432df22003-09-28 18:58:27 +00009938#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009939/************************************************************************
9940 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009941 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009942 * *
9943 ************************************************************************/
9944
9945/**
9946 * xmlIOParseDTD:
9947 * @sax: the SAX handler block or NULL
9948 * @input: an Input Buffer
9949 * @enc: the charset encoding if known
9950 *
9951 * Load and parse a DTD
9952 *
9953 * Returns the resulting xmlDtdPtr or NULL in case of error.
9954 * @input will be freed at parsing end.
9955 */
9956
9957xmlDtdPtr
9958xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9959 xmlCharEncoding enc) {
9960 xmlDtdPtr ret = NULL;
9961 xmlParserCtxtPtr ctxt;
9962 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009963 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009964
9965 if (input == NULL)
9966 return(NULL);
9967
9968 ctxt = xmlNewParserCtxt();
9969 if (ctxt == NULL) {
9970 return(NULL);
9971 }
9972
9973 /*
9974 * Set-up the SAX context
9975 */
9976 if (sax != NULL) {
9977 if (ctxt->sax != NULL)
9978 xmlFree(ctxt->sax);
9979 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +00009980 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009981 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009982 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009983
9984 /*
9985 * generate a parser input from the I/O handler
9986 */
9987
Daniel Veillard43caefb2003-12-07 19:32:22 +00009988 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009989 if (pinput == NULL) {
9990 if (sax != NULL) ctxt->sax = NULL;
9991 xmlFreeParserCtxt(ctxt);
9992 return(NULL);
9993 }
9994
9995 /*
9996 * plug some encoding conversion routines here.
9997 */
9998 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009999 if (enc != XML_CHAR_ENCODING_NONE) {
10000 xmlSwitchEncoding(ctxt, enc);
10001 }
Owen Taylor3473f882001-02-23 17:55:21 +000010002
10003 pinput->filename = NULL;
10004 pinput->line = 1;
10005 pinput->col = 1;
10006 pinput->base = ctxt->input->cur;
10007 pinput->cur = ctxt->input->cur;
10008 pinput->free = NULL;
10009
10010 /*
10011 * let's parse that entity knowing it's an external subset.
10012 */
10013 ctxt->inSubset = 2;
10014 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10015 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10016 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010017
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010018 if ((enc == XML_CHAR_ENCODING_NONE) &&
10019 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010020 /*
10021 * Get the 4 first bytes and decode the charset
10022 * if enc != XML_CHAR_ENCODING_NONE
10023 * plug some encoding conversion routines.
10024 */
10025 start[0] = RAW;
10026 start[1] = NXT(1);
10027 start[2] = NXT(2);
10028 start[3] = NXT(3);
10029 enc = xmlDetectCharEncoding(start, 4);
10030 if (enc != XML_CHAR_ENCODING_NONE) {
10031 xmlSwitchEncoding(ctxt, enc);
10032 }
10033 }
10034
Owen Taylor3473f882001-02-23 17:55:21 +000010035 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10036
10037 if (ctxt->myDoc != NULL) {
10038 if (ctxt->wellFormed) {
10039 ret = ctxt->myDoc->extSubset;
10040 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010041 if (ret != NULL) {
10042 xmlNodePtr tmp;
10043
10044 ret->doc = NULL;
10045 tmp = ret->children;
10046 while (tmp != NULL) {
10047 tmp->doc = NULL;
10048 tmp = tmp->next;
10049 }
10050 }
Owen Taylor3473f882001-02-23 17:55:21 +000010051 } else {
10052 ret = NULL;
10053 }
10054 xmlFreeDoc(ctxt->myDoc);
10055 ctxt->myDoc = NULL;
10056 }
10057 if (sax != NULL) ctxt->sax = NULL;
10058 xmlFreeParserCtxt(ctxt);
10059
10060 return(ret);
10061}
10062
10063/**
10064 * xmlSAXParseDTD:
10065 * @sax: the SAX handler block
10066 * @ExternalID: a NAME* containing the External ID of the DTD
10067 * @SystemID: a NAME* containing the URL to the DTD
10068 *
10069 * Load and parse an external subset.
10070 *
10071 * Returns the resulting xmlDtdPtr or NULL in case of error.
10072 */
10073
10074xmlDtdPtr
10075xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10076 const xmlChar *SystemID) {
10077 xmlDtdPtr ret = NULL;
10078 xmlParserCtxtPtr ctxt;
10079 xmlParserInputPtr input = NULL;
10080 xmlCharEncoding enc;
10081
10082 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10083
10084 ctxt = xmlNewParserCtxt();
10085 if (ctxt == NULL) {
10086 return(NULL);
10087 }
10088
10089 /*
10090 * Set-up the SAX context
10091 */
10092 if (sax != NULL) {
10093 if (ctxt->sax != NULL)
10094 xmlFree(ctxt->sax);
10095 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010096 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010097 }
10098
10099 /*
10100 * Ask the Entity resolver to load the damn thing
10101 */
10102
10103 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010104 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010105 if (input == NULL) {
10106 if (sax != NULL) ctxt->sax = NULL;
10107 xmlFreeParserCtxt(ctxt);
10108 return(NULL);
10109 }
10110
10111 /*
10112 * plug some encoding conversion routines here.
10113 */
10114 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010115 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10116 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10117 xmlSwitchEncoding(ctxt, enc);
10118 }
Owen Taylor3473f882001-02-23 17:55:21 +000010119
10120 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010121 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010122 input->line = 1;
10123 input->col = 1;
10124 input->base = ctxt->input->cur;
10125 input->cur = ctxt->input->cur;
10126 input->free = NULL;
10127
10128 /*
10129 * let's parse that entity knowing it's an external subset.
10130 */
10131 ctxt->inSubset = 2;
10132 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10133 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10134 ExternalID, SystemID);
10135 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10136
10137 if (ctxt->myDoc != NULL) {
10138 if (ctxt->wellFormed) {
10139 ret = ctxt->myDoc->extSubset;
10140 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010141 if (ret != NULL) {
10142 xmlNodePtr tmp;
10143
10144 ret->doc = NULL;
10145 tmp = ret->children;
10146 while (tmp != NULL) {
10147 tmp->doc = NULL;
10148 tmp = tmp->next;
10149 }
10150 }
Owen Taylor3473f882001-02-23 17:55:21 +000010151 } else {
10152 ret = NULL;
10153 }
10154 xmlFreeDoc(ctxt->myDoc);
10155 ctxt->myDoc = NULL;
10156 }
10157 if (sax != NULL) ctxt->sax = NULL;
10158 xmlFreeParserCtxt(ctxt);
10159
10160 return(ret);
10161}
10162
Daniel Veillard4432df22003-09-28 18:58:27 +000010163
Owen Taylor3473f882001-02-23 17:55:21 +000010164/**
10165 * xmlParseDTD:
10166 * @ExternalID: a NAME* containing the External ID of the DTD
10167 * @SystemID: a NAME* containing the URL to the DTD
10168 *
10169 * Load and parse an external subset.
10170 *
10171 * Returns the resulting xmlDtdPtr or NULL in case of error.
10172 */
10173
10174xmlDtdPtr
10175xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10176 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10177}
Daniel Veillard4432df22003-09-28 18:58:27 +000010178#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010179
10180/************************************************************************
10181 * *
10182 * Front ends when parsing an Entity *
10183 * *
10184 ************************************************************************/
10185
10186/**
Owen Taylor3473f882001-02-23 17:55:21 +000010187 * xmlParseCtxtExternalEntity:
10188 * @ctx: the existing parsing context
10189 * @URL: the URL for the entity to load
10190 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010191 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010192 *
10193 * Parse an external general entity within an existing parsing context
10194 * An external general parsed entity is well-formed if it matches the
10195 * production labeled extParsedEnt.
10196 *
10197 * [78] extParsedEnt ::= TextDecl? content
10198 *
10199 * Returns 0 if the entity is well formed, -1 in case of args problem and
10200 * the parser error code otherwise
10201 */
10202
10203int
10204xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010205 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010206 xmlParserCtxtPtr ctxt;
10207 xmlDocPtr newDoc;
10208 xmlSAXHandlerPtr oldsax = NULL;
10209 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010210 xmlChar start[4];
10211 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010212
10213 if (ctx->depth > 40) {
10214 return(XML_ERR_ENTITY_LOOP);
10215 }
10216
Daniel Veillardcda96922001-08-21 10:56:31 +000010217 if (lst != NULL)
10218 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010219 if ((URL == NULL) && (ID == NULL))
10220 return(-1);
10221 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10222 return(-1);
10223
10224
10225 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10226 if (ctxt == NULL) return(-1);
10227 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010228 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010229 oldsax = ctxt->sax;
10230 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010231 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010232 newDoc = xmlNewDoc(BAD_CAST "1.0");
10233 if (newDoc == NULL) {
10234 xmlFreeParserCtxt(ctxt);
10235 return(-1);
10236 }
10237 if (ctx->myDoc != NULL) {
10238 newDoc->intSubset = ctx->myDoc->intSubset;
10239 newDoc->extSubset = ctx->myDoc->extSubset;
10240 }
10241 if (ctx->myDoc->URL != NULL) {
10242 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10243 }
10244 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10245 if (newDoc->children == NULL) {
10246 ctxt->sax = oldsax;
10247 xmlFreeParserCtxt(ctxt);
10248 newDoc->intSubset = NULL;
10249 newDoc->extSubset = NULL;
10250 xmlFreeDoc(newDoc);
10251 return(-1);
10252 }
10253 nodePush(ctxt, newDoc->children);
10254 if (ctx->myDoc == NULL) {
10255 ctxt->myDoc = newDoc;
10256 } else {
10257 ctxt->myDoc = ctx->myDoc;
10258 newDoc->children->doc = ctx->myDoc;
10259 }
10260
Daniel Veillard87a764e2001-06-20 17:41:10 +000010261 /*
10262 * Get the 4 first bytes and decode the charset
10263 * if enc != XML_CHAR_ENCODING_NONE
10264 * plug some encoding conversion routines.
10265 */
10266 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010267 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10268 start[0] = RAW;
10269 start[1] = NXT(1);
10270 start[2] = NXT(2);
10271 start[3] = NXT(3);
10272 enc = xmlDetectCharEncoding(start, 4);
10273 if (enc != XML_CHAR_ENCODING_NONE) {
10274 xmlSwitchEncoding(ctxt, enc);
10275 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010276 }
10277
Owen Taylor3473f882001-02-23 17:55:21 +000010278 /*
10279 * Parse a possible text declaration first
10280 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010281 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010282 xmlParseTextDecl(ctxt);
10283 }
10284
10285 /*
10286 * Doing validity checking on chunk doesn't make sense
10287 */
10288 ctxt->instate = XML_PARSER_CONTENT;
10289 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010290 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010291 ctxt->loadsubset = ctx->loadsubset;
10292 ctxt->depth = ctx->depth + 1;
10293 ctxt->replaceEntities = ctx->replaceEntities;
10294 if (ctxt->validate) {
10295 ctxt->vctxt.error = ctx->vctxt.error;
10296 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010297 } else {
10298 ctxt->vctxt.error = NULL;
10299 ctxt->vctxt.warning = NULL;
10300 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010301 ctxt->vctxt.nodeTab = NULL;
10302 ctxt->vctxt.nodeNr = 0;
10303 ctxt->vctxt.nodeMax = 0;
10304 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010305 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10306 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010307 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10308 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10309 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010310 ctxt->dictNames = ctx->dictNames;
10311 ctxt->attsDefault = ctx->attsDefault;
10312 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010313
10314 xmlParseContent(ctxt);
10315
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010316 ctx->validate = ctxt->validate;
10317 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010318 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010319 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010320 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010321 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 }
10323 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010324 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010325 }
10326
10327 if (!ctxt->wellFormed) {
10328 if (ctxt->errNo == 0)
10329 ret = 1;
10330 else
10331 ret = ctxt->errNo;
10332 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010333 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010334 xmlNodePtr cur;
10335
10336 /*
10337 * Return the newly created nodeset after unlinking it from
10338 * they pseudo parent.
10339 */
10340 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010341 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010342 while (cur != NULL) {
10343 cur->parent = NULL;
10344 cur = cur->next;
10345 }
10346 newDoc->children->children = NULL;
10347 }
10348 ret = 0;
10349 }
10350 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010351 ctxt->dict = NULL;
10352 ctxt->attsDefault = NULL;
10353 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010354 xmlFreeParserCtxt(ctxt);
10355 newDoc->intSubset = NULL;
10356 newDoc->extSubset = NULL;
10357 xmlFreeDoc(newDoc);
10358
10359 return(ret);
10360}
10361
10362/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010363 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010364 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010365 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010366 * @sax: the SAX handler bloc (possibly NULL)
10367 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10368 * @depth: Used for loop detection, use 0
10369 * @URL: the URL for the entity to load
10370 * @ID: the System ID for the entity to load
10371 * @list: the return value for the set of parsed nodes
10372 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010373 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010374 *
10375 * Returns 0 if the entity is well formed, -1 in case of args problem and
10376 * the parser error code otherwise
10377 */
10378
Daniel Veillard7d515752003-09-26 19:12:37 +000010379static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010380xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10381 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010382 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010383 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010384 xmlParserCtxtPtr ctxt;
10385 xmlDocPtr newDoc;
10386 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010387 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010388 xmlChar start[4];
10389 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010390
10391 if (depth > 40) {
10392 return(XML_ERR_ENTITY_LOOP);
10393 }
10394
10395
10396
10397 if (list != NULL)
10398 *list = NULL;
10399 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010400 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010401 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010402 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010403
10404
10405 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010406 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010407 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010408 if (oldctxt != NULL) {
10409 ctxt->_private = oldctxt->_private;
10410 ctxt->loadsubset = oldctxt->loadsubset;
10411 ctxt->validate = oldctxt->validate;
10412 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010413 ctxt->record_info = oldctxt->record_info;
10414 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10415 ctxt->node_seq.length = oldctxt->node_seq.length;
10416 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010417 } else {
10418 /*
10419 * Doing validity checking on chunk without context
10420 * doesn't make sense
10421 */
10422 ctxt->_private = NULL;
10423 ctxt->validate = 0;
10424 ctxt->external = 2;
10425 ctxt->loadsubset = 0;
10426 }
Owen Taylor3473f882001-02-23 17:55:21 +000010427 if (sax != NULL) {
10428 oldsax = ctxt->sax;
10429 ctxt->sax = sax;
10430 if (user_data != NULL)
10431 ctxt->userData = user_data;
10432 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010433 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010434 newDoc = xmlNewDoc(BAD_CAST "1.0");
10435 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010436 ctxt->node_seq.maximum = 0;
10437 ctxt->node_seq.length = 0;
10438 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010439 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010440 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010441 }
10442 if (doc != NULL) {
10443 newDoc->intSubset = doc->intSubset;
10444 newDoc->extSubset = doc->extSubset;
10445 }
10446 if (doc->URL != NULL) {
10447 newDoc->URL = xmlStrdup(doc->URL);
10448 }
10449 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10450 if (newDoc->children == NULL) {
10451 if (sax != NULL)
10452 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010453 ctxt->node_seq.maximum = 0;
10454 ctxt->node_seq.length = 0;
10455 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010456 xmlFreeParserCtxt(ctxt);
10457 newDoc->intSubset = NULL;
10458 newDoc->extSubset = NULL;
10459 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010460 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010461 }
10462 nodePush(ctxt, newDoc->children);
10463 if (doc == NULL) {
10464 ctxt->myDoc = newDoc;
10465 } else {
10466 ctxt->myDoc = doc;
10467 newDoc->children->doc = doc;
10468 }
10469
Daniel Veillard87a764e2001-06-20 17:41:10 +000010470 /*
10471 * Get the 4 first bytes and decode the charset
10472 * if enc != XML_CHAR_ENCODING_NONE
10473 * plug some encoding conversion routines.
10474 */
10475 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010476 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10477 start[0] = RAW;
10478 start[1] = NXT(1);
10479 start[2] = NXT(2);
10480 start[3] = NXT(3);
10481 enc = xmlDetectCharEncoding(start, 4);
10482 if (enc != XML_CHAR_ENCODING_NONE) {
10483 xmlSwitchEncoding(ctxt, enc);
10484 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010485 }
10486
Owen Taylor3473f882001-02-23 17:55:21 +000010487 /*
10488 * Parse a possible text declaration first
10489 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010490 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010491 xmlParseTextDecl(ctxt);
10492 }
10493
Owen Taylor3473f882001-02-23 17:55:21 +000010494 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010495 ctxt->depth = depth;
10496
10497 xmlParseContent(ctxt);
10498
Daniel Veillard561b7f82002-03-20 21:55:57 +000010499 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010500 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010501 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010502 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010503 }
10504 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010505 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 }
10507
10508 if (!ctxt->wellFormed) {
10509 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010510 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010511 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010512 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010513 } else {
10514 if (list != NULL) {
10515 xmlNodePtr cur;
10516
10517 /*
10518 * Return the newly created nodeset after unlinking it from
10519 * they pseudo parent.
10520 */
10521 cur = newDoc->children->children;
10522 *list = cur;
10523 while (cur != NULL) {
10524 cur->parent = NULL;
10525 cur = cur->next;
10526 }
10527 newDoc->children->children = NULL;
10528 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010529 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010530 }
10531 if (sax != NULL)
10532 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010533 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10534 oldctxt->node_seq.length = ctxt->node_seq.length;
10535 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010536 ctxt->node_seq.maximum = 0;
10537 ctxt->node_seq.length = 0;
10538 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010539 xmlFreeParserCtxt(ctxt);
10540 newDoc->intSubset = NULL;
10541 newDoc->extSubset = NULL;
10542 xmlFreeDoc(newDoc);
10543
10544 return(ret);
10545}
10546
Daniel Veillard81273902003-09-30 00:43:48 +000010547#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010548/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010549 * xmlParseExternalEntity:
10550 * @doc: the document the chunk pertains to
10551 * @sax: the SAX handler bloc (possibly NULL)
10552 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10553 * @depth: Used for loop detection, use 0
10554 * @URL: the URL for the entity to load
10555 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010556 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010557 *
10558 * Parse an external general entity
10559 * An external general parsed entity is well-formed if it matches the
10560 * production labeled extParsedEnt.
10561 *
10562 * [78] extParsedEnt ::= TextDecl? content
10563 *
10564 * Returns 0 if the entity is well formed, -1 in case of args problem and
10565 * the parser error code otherwise
10566 */
10567
10568int
10569xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010570 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010571 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010572 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010573}
10574
10575/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010576 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010577 * @doc: the document the chunk pertains to
10578 * @sax: the SAX handler bloc (possibly NULL)
10579 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10580 * @depth: Used for loop detection, use 0
10581 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010582 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010583 *
10584 * Parse a well-balanced chunk of an XML document
10585 * called by the parser
10586 * The allowed sequence for the Well Balanced Chunk is the one defined by
10587 * the content production in the XML grammar:
10588 *
10589 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10590 *
10591 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10592 * the parser error code otherwise
10593 */
10594
10595int
10596xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010597 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010598 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10599 depth, string, lst, 0 );
10600}
Daniel Veillard81273902003-09-30 00:43:48 +000010601#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010602
10603/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010604 * xmlParseBalancedChunkMemoryInternal:
10605 * @oldctxt: the existing parsing context
10606 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10607 * @user_data: the user data field for the parser context
10608 * @lst: the return value for the set of parsed nodes
10609 *
10610 *
10611 * Parse a well-balanced chunk of an XML document
10612 * called by the parser
10613 * The allowed sequence for the Well Balanced Chunk is the one defined by
10614 * the content production in the XML grammar:
10615 *
10616 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10617 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010618 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10619 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010620 *
10621 * In case recover is set to 1, the nodelist will not be empty even if
10622 * the parsed chunk is not well balanced.
10623 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010624static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010625xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10626 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10627 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010628 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010629 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010630 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010631 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010632 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010633
10634 if (oldctxt->depth > 40) {
10635 return(XML_ERR_ENTITY_LOOP);
10636 }
10637
10638
10639 if (lst != NULL)
10640 *lst = NULL;
10641 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010642 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010643
10644 size = xmlStrlen(string);
10645
10646 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010647 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010648 if (user_data != NULL)
10649 ctxt->userData = user_data;
10650 else
10651 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010652 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10653 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010654 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10655 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10656 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010657
10658 oldsax = ctxt->sax;
10659 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010660 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010661 ctxt->replaceEntities = oldctxt->replaceEntities;
10662 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010663
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010664 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010665 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010666 newDoc = xmlNewDoc(BAD_CAST "1.0");
10667 if (newDoc == NULL) {
10668 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010669 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010670 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010671 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010672 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010673 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010674 } else {
10675 ctxt->myDoc = oldctxt->myDoc;
10676 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010677 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010678 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010679 BAD_CAST "pseudoroot", NULL);
10680 if (ctxt->myDoc->children == NULL) {
10681 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010682 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010683 xmlFreeParserCtxt(ctxt);
10684 if (newDoc != NULL)
10685 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010686 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010687 }
10688 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010689 ctxt->instate = XML_PARSER_CONTENT;
10690 ctxt->depth = oldctxt->depth + 1;
10691
Daniel Veillard328f48c2002-11-15 15:24:34 +000010692 ctxt->validate = 0;
10693 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010694 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10695 /*
10696 * ID/IDREF registration will be done in xmlValidateElement below
10697 */
10698 ctxt->loadsubset |= XML_SKIP_IDS;
10699 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010700 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010701 ctxt->attsDefault = oldctxt->attsDefault;
10702 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010703
Daniel Veillard68e9e742002-11-16 15:35:11 +000010704 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010705 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010706 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010707 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010708 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010709 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010710 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010711 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010712 }
10713
10714 if (!ctxt->wellFormed) {
10715 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010716 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010717 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010718 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010719 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010720 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010721 }
10722
William M. Brack7b9154b2003-09-27 19:23:50 +000010723 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010724 xmlNodePtr cur;
10725
10726 /*
10727 * Return the newly created nodeset after unlinking it from
10728 * they pseudo parent.
10729 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010730 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010731 *lst = cur;
10732 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010733#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010734 if (oldctxt->validate && oldctxt->wellFormed &&
10735 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10736 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10737 oldctxt->myDoc, cur);
10738 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010739#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010740 cur->parent = NULL;
10741 cur = cur->next;
10742 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010743 ctxt->myDoc->children->children = NULL;
10744 }
10745 if (ctxt->myDoc != NULL) {
10746 xmlFreeNode(ctxt->myDoc->children);
10747 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010748 }
10749
10750 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010751 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010752 ctxt->attsDefault = NULL;
10753 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010754 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010755 if (newDoc != NULL)
10756 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010757
10758 return(ret);
10759}
10760
Daniel Veillard81273902003-09-30 00:43:48 +000010761#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010762/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010763 * xmlParseBalancedChunkMemoryRecover:
10764 * @doc: the document the chunk pertains to
10765 * @sax: the SAX handler bloc (possibly NULL)
10766 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10767 * @depth: Used for loop detection, use 0
10768 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10769 * @lst: the return value for the set of parsed nodes
10770 * @recover: return nodes even if the data is broken (use 0)
10771 *
10772 *
10773 * Parse a well-balanced chunk of an XML document
10774 * called by the parser
10775 * The allowed sequence for the Well Balanced Chunk is the one defined by
10776 * the content production in the XML grammar:
10777 *
10778 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10779 *
10780 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10781 * the parser error code otherwise
10782 *
10783 * In case recover is set to 1, the nodelist will not be empty even if
10784 * the parsed chunk is not well balanced.
10785 */
10786int
10787xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10788 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10789 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010790 xmlParserCtxtPtr ctxt;
10791 xmlDocPtr newDoc;
10792 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010793 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010794 int size;
10795 int ret = 0;
10796
10797 if (depth > 40) {
10798 return(XML_ERR_ENTITY_LOOP);
10799 }
10800
10801
Daniel Veillardcda96922001-08-21 10:56:31 +000010802 if (lst != NULL)
10803 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010804 if (string == NULL)
10805 return(-1);
10806
10807 size = xmlStrlen(string);
10808
10809 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10810 if (ctxt == NULL) return(-1);
10811 ctxt->userData = ctxt;
10812 if (sax != NULL) {
10813 oldsax = ctxt->sax;
10814 ctxt->sax = sax;
10815 if (user_data != NULL)
10816 ctxt->userData = user_data;
10817 }
10818 newDoc = xmlNewDoc(BAD_CAST "1.0");
10819 if (newDoc == NULL) {
10820 xmlFreeParserCtxt(ctxt);
10821 return(-1);
10822 }
10823 if (doc != NULL) {
10824 newDoc->intSubset = doc->intSubset;
10825 newDoc->extSubset = doc->extSubset;
10826 }
10827 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10828 if (newDoc->children == NULL) {
10829 if (sax != NULL)
10830 ctxt->sax = oldsax;
10831 xmlFreeParserCtxt(ctxt);
10832 newDoc->intSubset = NULL;
10833 newDoc->extSubset = NULL;
10834 xmlFreeDoc(newDoc);
10835 return(-1);
10836 }
10837 nodePush(ctxt, newDoc->children);
10838 if (doc == NULL) {
10839 ctxt->myDoc = newDoc;
10840 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010841 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010842 newDoc->children->doc = doc;
10843 }
10844 ctxt->instate = XML_PARSER_CONTENT;
10845 ctxt->depth = depth;
10846
10847 /*
10848 * Doing validity checking on chunk doesn't make sense
10849 */
10850 ctxt->validate = 0;
10851 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010852 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010853
Daniel Veillardb39bc392002-10-26 19:29:51 +000010854 if ( doc != NULL ){
10855 content = doc->children;
10856 doc->children = NULL;
10857 xmlParseContent(ctxt);
10858 doc->children = content;
10859 }
10860 else {
10861 xmlParseContent(ctxt);
10862 }
Owen Taylor3473f882001-02-23 17:55:21 +000010863 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010864 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010865 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010866 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010867 }
10868 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010869 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010870 }
10871
10872 if (!ctxt->wellFormed) {
10873 if (ctxt->errNo == 0)
10874 ret = 1;
10875 else
10876 ret = ctxt->errNo;
10877 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010878 ret = 0;
10879 }
10880
10881 if (lst != NULL && (ret == 0 || recover == 1)) {
10882 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010883
10884 /*
10885 * Return the newly created nodeset after unlinking it from
10886 * they pseudo parent.
10887 */
10888 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010889 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010890 while (cur != NULL) {
10891 cur->parent = NULL;
10892 cur = cur->next;
10893 }
10894 newDoc->children->children = NULL;
10895 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010896
Owen Taylor3473f882001-02-23 17:55:21 +000010897 if (sax != NULL)
10898 ctxt->sax = oldsax;
10899 xmlFreeParserCtxt(ctxt);
10900 newDoc->intSubset = NULL;
10901 newDoc->extSubset = NULL;
10902 xmlFreeDoc(newDoc);
10903
10904 return(ret);
10905}
10906
10907/**
10908 * xmlSAXParseEntity:
10909 * @sax: the SAX handler block
10910 * @filename: the filename
10911 *
10912 * parse an XML external entity out of context and build a tree.
10913 * It use the given SAX function block to handle the parsing callback.
10914 * If sax is NULL, fallback to the default DOM tree building routines.
10915 *
10916 * [78] extParsedEnt ::= TextDecl? content
10917 *
10918 * This correspond to a "Well Balanced" chunk
10919 *
10920 * Returns the resulting document tree
10921 */
10922
10923xmlDocPtr
10924xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10925 xmlDocPtr ret;
10926 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010927
10928 ctxt = xmlCreateFileParserCtxt(filename);
10929 if (ctxt == NULL) {
10930 return(NULL);
10931 }
10932 if (sax != NULL) {
10933 if (ctxt->sax != NULL)
10934 xmlFree(ctxt->sax);
10935 ctxt->sax = sax;
10936 ctxt->userData = NULL;
10937 }
10938
Owen Taylor3473f882001-02-23 17:55:21 +000010939 xmlParseExtParsedEnt(ctxt);
10940
10941 if (ctxt->wellFormed)
10942 ret = ctxt->myDoc;
10943 else {
10944 ret = NULL;
10945 xmlFreeDoc(ctxt->myDoc);
10946 ctxt->myDoc = NULL;
10947 }
10948 if (sax != NULL)
10949 ctxt->sax = NULL;
10950 xmlFreeParserCtxt(ctxt);
10951
10952 return(ret);
10953}
10954
10955/**
10956 * xmlParseEntity:
10957 * @filename: the filename
10958 *
10959 * parse an XML external entity out of context and build a tree.
10960 *
10961 * [78] extParsedEnt ::= TextDecl? content
10962 *
10963 * This correspond to a "Well Balanced" chunk
10964 *
10965 * Returns the resulting document tree
10966 */
10967
10968xmlDocPtr
10969xmlParseEntity(const char *filename) {
10970 return(xmlSAXParseEntity(NULL, filename));
10971}
Daniel Veillard81273902003-09-30 00:43:48 +000010972#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010973
10974/**
10975 * xmlCreateEntityParserCtxt:
10976 * @URL: the entity URL
10977 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010978 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010979 *
10980 * Create a parser context for an external entity
10981 * Automatic support for ZLIB/Compress compressed document is provided
10982 * by default if found at compile-time.
10983 *
10984 * Returns the new parser context or NULL
10985 */
10986xmlParserCtxtPtr
10987xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10988 const xmlChar *base) {
10989 xmlParserCtxtPtr ctxt;
10990 xmlParserInputPtr inputStream;
10991 char *directory = NULL;
10992 xmlChar *uri;
10993
10994 ctxt = xmlNewParserCtxt();
10995 if (ctxt == NULL) {
10996 return(NULL);
10997 }
10998
10999 uri = xmlBuildURI(URL, base);
11000
11001 if (uri == NULL) {
11002 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11003 if (inputStream == NULL) {
11004 xmlFreeParserCtxt(ctxt);
11005 return(NULL);
11006 }
11007
11008 inputPush(ctxt, inputStream);
11009
11010 if ((ctxt->directory == NULL) && (directory == NULL))
11011 directory = xmlParserGetDirectory((char *)URL);
11012 if ((ctxt->directory == NULL) && (directory != NULL))
11013 ctxt->directory = directory;
11014 } else {
11015 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11016 if (inputStream == NULL) {
11017 xmlFree(uri);
11018 xmlFreeParserCtxt(ctxt);
11019 return(NULL);
11020 }
11021
11022 inputPush(ctxt, inputStream);
11023
11024 if ((ctxt->directory == NULL) && (directory == NULL))
11025 directory = xmlParserGetDirectory((char *)uri);
11026 if ((ctxt->directory == NULL) && (directory != NULL))
11027 ctxt->directory = directory;
11028 xmlFree(uri);
11029 }
Owen Taylor3473f882001-02-23 17:55:21 +000011030 return(ctxt);
11031}
11032
11033/************************************************************************
11034 * *
11035 * Front ends when parsing from a file *
11036 * *
11037 ************************************************************************/
11038
11039/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011040 * xmlCreateURLParserCtxt:
11041 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011042 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011043 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011044 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011045 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011046 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011047 *
11048 * Returns the new parser context or NULL
11049 */
11050xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011051xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011052{
11053 xmlParserCtxtPtr ctxt;
11054 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011055 char *directory = NULL;
11056
Owen Taylor3473f882001-02-23 17:55:21 +000011057 ctxt = xmlNewParserCtxt();
11058 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011059 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011060 return(NULL);
11061 }
11062
Daniel Veillard61b93382003-11-03 14:28:31 +000011063 if (options != 0)
11064 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011065
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011066 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011067 if (inputStream == NULL) {
11068 xmlFreeParserCtxt(ctxt);
11069 return(NULL);
11070 }
11071
Owen Taylor3473f882001-02-23 17:55:21 +000011072 inputPush(ctxt, inputStream);
11073 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011074 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011075 if ((ctxt->directory == NULL) && (directory != NULL))
11076 ctxt->directory = directory;
11077
11078 return(ctxt);
11079}
11080
Daniel Veillard61b93382003-11-03 14:28:31 +000011081/**
11082 * xmlCreateFileParserCtxt:
11083 * @filename: the filename
11084 *
11085 * Create a parser context for a file content.
11086 * Automatic support for ZLIB/Compress compressed document is provided
11087 * by default if found at compile-time.
11088 *
11089 * Returns the new parser context or NULL
11090 */
11091xmlParserCtxtPtr
11092xmlCreateFileParserCtxt(const char *filename)
11093{
11094 return(xmlCreateURLParserCtxt(filename, 0));
11095}
11096
Daniel Veillard81273902003-09-30 00:43:48 +000011097#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011098/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011099 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011100 * @sax: the SAX handler block
11101 * @filename: the filename
11102 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11103 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011104 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011105 *
11106 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11107 * compressed document is provided by default if found at compile-time.
11108 * It use the given SAX function block to handle the parsing callback.
11109 * If sax is NULL, fallback to the default DOM tree building routines.
11110 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011111 * User data (void *) is stored within the parser context in the
11112 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011113 *
Owen Taylor3473f882001-02-23 17:55:21 +000011114 * Returns the resulting document tree
11115 */
11116
11117xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011118xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11119 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011120 xmlDocPtr ret;
11121 xmlParserCtxtPtr ctxt;
11122 char *directory = NULL;
11123
Daniel Veillard635ef722001-10-29 11:48:19 +000011124 xmlInitParser();
11125
Owen Taylor3473f882001-02-23 17:55:21 +000011126 ctxt = xmlCreateFileParserCtxt(filename);
11127 if (ctxt == NULL) {
11128 return(NULL);
11129 }
11130 if (sax != NULL) {
11131 if (ctxt->sax != NULL)
11132 xmlFree(ctxt->sax);
11133 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011135 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011136 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011137 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011138 }
Owen Taylor3473f882001-02-23 17:55:21 +000011139
11140 if ((ctxt->directory == NULL) && (directory == NULL))
11141 directory = xmlParserGetDirectory(filename);
11142 if ((ctxt->directory == NULL) && (directory != NULL))
11143 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11144
Daniel Veillarddad3f682002-11-17 16:47:27 +000011145 ctxt->recovery = recovery;
11146
Owen Taylor3473f882001-02-23 17:55:21 +000011147 xmlParseDocument(ctxt);
11148
William M. Brackc07329e2003-09-08 01:57:30 +000011149 if ((ctxt->wellFormed) || recovery) {
11150 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011151 if (ret != NULL) {
11152 if (ctxt->input->buf->compressed > 0)
11153 ret->compression = 9;
11154 else
11155 ret->compression = ctxt->input->buf->compressed;
11156 }
William M. Brackc07329e2003-09-08 01:57:30 +000011157 }
Owen Taylor3473f882001-02-23 17:55:21 +000011158 else {
11159 ret = NULL;
11160 xmlFreeDoc(ctxt->myDoc);
11161 ctxt->myDoc = NULL;
11162 }
11163 if (sax != NULL)
11164 ctxt->sax = NULL;
11165 xmlFreeParserCtxt(ctxt);
11166
11167 return(ret);
11168}
11169
11170/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011171 * xmlSAXParseFile:
11172 * @sax: the SAX handler block
11173 * @filename: the filename
11174 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11175 * documents
11176 *
11177 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11178 * compressed document is provided by default if found at compile-time.
11179 * It use the given SAX function block to handle the parsing callback.
11180 * If sax is NULL, fallback to the default DOM tree building routines.
11181 *
11182 * Returns the resulting document tree
11183 */
11184
11185xmlDocPtr
11186xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11187 int recovery) {
11188 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11189}
11190
11191/**
Owen Taylor3473f882001-02-23 17:55:21 +000011192 * xmlRecoverDoc:
11193 * @cur: a pointer to an array of xmlChar
11194 *
11195 * parse an XML in-memory document and build a tree.
11196 * In the case the document is not Well Formed, a tree is built anyway
11197 *
11198 * Returns the resulting document tree
11199 */
11200
11201xmlDocPtr
11202xmlRecoverDoc(xmlChar *cur) {
11203 return(xmlSAXParseDoc(NULL, cur, 1));
11204}
11205
11206/**
11207 * xmlParseFile:
11208 * @filename: the filename
11209 *
11210 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11211 * compressed document is provided by default if found at compile-time.
11212 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011213 * Returns the resulting document tree if the file was wellformed,
11214 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011215 */
11216
11217xmlDocPtr
11218xmlParseFile(const char *filename) {
11219 return(xmlSAXParseFile(NULL, filename, 0));
11220}
11221
11222/**
11223 * xmlRecoverFile:
11224 * @filename: the filename
11225 *
11226 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11227 * compressed document is provided by default if found at compile-time.
11228 * In the case the document is not Well Formed, a tree is built anyway
11229 *
11230 * Returns the resulting document tree
11231 */
11232
11233xmlDocPtr
11234xmlRecoverFile(const char *filename) {
11235 return(xmlSAXParseFile(NULL, filename, 1));
11236}
11237
11238
11239/**
11240 * xmlSetupParserForBuffer:
11241 * @ctxt: an XML parser context
11242 * @buffer: a xmlChar * buffer
11243 * @filename: a file name
11244 *
11245 * Setup the parser context to parse a new buffer; Clears any prior
11246 * contents from the parser context. The buffer parameter must not be
11247 * NULL, but the filename parameter can be
11248 */
11249void
11250xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11251 const char* filename)
11252{
11253 xmlParserInputPtr input;
11254
11255 input = xmlNewInputStream(ctxt);
11256 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011257 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011258 xmlFree(ctxt);
11259 return;
11260 }
11261
11262 xmlClearParserCtxt(ctxt);
11263 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011264 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011265 input->base = buffer;
11266 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011267 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011268 inputPush(ctxt, input);
11269}
11270
11271/**
11272 * xmlSAXUserParseFile:
11273 * @sax: a SAX handler
11274 * @user_data: The user data returned on SAX callbacks
11275 * @filename: a file name
11276 *
11277 * parse an XML file and call the given SAX handler routines.
11278 * Automatic support for ZLIB/Compress compressed document is provided
11279 *
11280 * Returns 0 in case of success or a error number otherwise
11281 */
11282int
11283xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11284 const char *filename) {
11285 int ret = 0;
11286 xmlParserCtxtPtr ctxt;
11287
11288 ctxt = xmlCreateFileParserCtxt(filename);
11289 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011290#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011291 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011292#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011293 xmlFree(ctxt->sax);
11294 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011295 xmlDetectSAX2(ctxt);
11296
Owen Taylor3473f882001-02-23 17:55:21 +000011297 if (user_data != NULL)
11298 ctxt->userData = user_data;
11299
11300 xmlParseDocument(ctxt);
11301
11302 if (ctxt->wellFormed)
11303 ret = 0;
11304 else {
11305 if (ctxt->errNo != 0)
11306 ret = ctxt->errNo;
11307 else
11308 ret = -1;
11309 }
11310 if (sax != NULL)
11311 ctxt->sax = NULL;
11312 xmlFreeParserCtxt(ctxt);
11313
11314 return ret;
11315}
Daniel Veillard81273902003-09-30 00:43:48 +000011316#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011317
11318/************************************************************************
11319 * *
11320 * Front ends when parsing from memory *
11321 * *
11322 ************************************************************************/
11323
11324/**
11325 * xmlCreateMemoryParserCtxt:
11326 * @buffer: a pointer to a char array
11327 * @size: the size of the array
11328 *
11329 * Create a parser context for an XML in-memory document.
11330 *
11331 * Returns the new parser context or NULL
11332 */
11333xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011334xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011335 xmlParserCtxtPtr ctxt;
11336 xmlParserInputPtr input;
11337 xmlParserInputBufferPtr buf;
11338
11339 if (buffer == NULL)
11340 return(NULL);
11341 if (size <= 0)
11342 return(NULL);
11343
11344 ctxt = xmlNewParserCtxt();
11345 if (ctxt == NULL)
11346 return(NULL);
11347
Daniel Veillard53350552003-09-18 13:35:51 +000011348 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011349 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011350 if (buf == NULL) {
11351 xmlFreeParserCtxt(ctxt);
11352 return(NULL);
11353 }
Owen Taylor3473f882001-02-23 17:55:21 +000011354
11355 input = xmlNewInputStream(ctxt);
11356 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011357 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011358 xmlFreeParserCtxt(ctxt);
11359 return(NULL);
11360 }
11361
11362 input->filename = NULL;
11363 input->buf = buf;
11364 input->base = input->buf->buffer->content;
11365 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011366 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011367
11368 inputPush(ctxt, input);
11369 return(ctxt);
11370}
11371
Daniel Veillard81273902003-09-30 00:43:48 +000011372#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011373/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011374 * xmlSAXParseMemoryWithData:
11375 * @sax: the SAX handler block
11376 * @buffer: an pointer to a char array
11377 * @size: the size of the array
11378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11379 * documents
11380 * @data: the userdata
11381 *
11382 * parse an XML in-memory block and use the given SAX function block
11383 * to handle the parsing callback. If sax is NULL, fallback to the default
11384 * DOM tree building routines.
11385 *
11386 * User data (void *) is stored within the parser context in the
11387 * context's _private member, so it is available nearly everywhere in libxml
11388 *
11389 * Returns the resulting document tree
11390 */
11391
11392xmlDocPtr
11393xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11394 int size, int recovery, void *data) {
11395 xmlDocPtr ret;
11396 xmlParserCtxtPtr ctxt;
11397
11398 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11399 if (ctxt == NULL) return(NULL);
11400 if (sax != NULL) {
11401 if (ctxt->sax != NULL)
11402 xmlFree(ctxt->sax);
11403 ctxt->sax = sax;
11404 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011405 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011406 if (data!=NULL) {
11407 ctxt->_private=data;
11408 }
11409
Daniel Veillardadba5f12003-04-04 16:09:01 +000011410 ctxt->recovery = recovery;
11411
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011412 xmlParseDocument(ctxt);
11413
11414 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11415 else {
11416 ret = NULL;
11417 xmlFreeDoc(ctxt->myDoc);
11418 ctxt->myDoc = NULL;
11419 }
11420 if (sax != NULL)
11421 ctxt->sax = NULL;
11422 xmlFreeParserCtxt(ctxt);
11423
11424 return(ret);
11425}
11426
11427/**
Owen Taylor3473f882001-02-23 17:55:21 +000011428 * xmlSAXParseMemory:
11429 * @sax: the SAX handler block
11430 * @buffer: an pointer to a char array
11431 * @size: the size of the array
11432 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11433 * documents
11434 *
11435 * parse an XML in-memory block and use the given SAX function block
11436 * to handle the parsing callback. If sax is NULL, fallback to the default
11437 * DOM tree building routines.
11438 *
11439 * Returns the resulting document tree
11440 */
11441xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011442xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11443 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011444 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011445}
11446
11447/**
11448 * xmlParseMemory:
11449 * @buffer: an pointer to a char array
11450 * @size: the size of the array
11451 *
11452 * parse an XML in-memory block and build a tree.
11453 *
11454 * Returns the resulting document tree
11455 */
11456
Daniel Veillard50822cb2001-07-26 20:05:51 +000011457xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011458 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11459}
11460
11461/**
11462 * xmlRecoverMemory:
11463 * @buffer: an pointer to a char array
11464 * @size: the size of the array
11465 *
11466 * parse an XML in-memory block and build a tree.
11467 * In the case the document is not Well Formed, a tree is built anyway
11468 *
11469 * Returns the resulting document tree
11470 */
11471
Daniel Veillard50822cb2001-07-26 20:05:51 +000011472xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011473 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11474}
11475
11476/**
11477 * xmlSAXUserParseMemory:
11478 * @sax: a SAX handler
11479 * @user_data: The user data returned on SAX callbacks
11480 * @buffer: an in-memory XML document input
11481 * @size: the length of the XML document in bytes
11482 *
11483 * A better SAX parsing routine.
11484 * parse an XML in-memory buffer and call the given SAX handler routines.
11485 *
11486 * Returns 0 in case of success or a error number otherwise
11487 */
11488int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011489 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011490 int ret = 0;
11491 xmlParserCtxtPtr ctxt;
11492 xmlSAXHandlerPtr oldsax = NULL;
11493
Daniel Veillard9e923512002-08-14 08:48:52 +000011494 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011495 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11496 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011497 oldsax = ctxt->sax;
11498 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011499 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011500 if (user_data != NULL)
11501 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011502
11503 xmlParseDocument(ctxt);
11504
11505 if (ctxt->wellFormed)
11506 ret = 0;
11507 else {
11508 if (ctxt->errNo != 0)
11509 ret = ctxt->errNo;
11510 else
11511 ret = -1;
11512 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011513 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011514 xmlFreeParserCtxt(ctxt);
11515
11516 return ret;
11517}
Daniel Veillard81273902003-09-30 00:43:48 +000011518#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011519
11520/**
11521 * xmlCreateDocParserCtxt:
11522 * @cur: a pointer to an array of xmlChar
11523 *
11524 * Creates a parser context for an XML in-memory document.
11525 *
11526 * Returns the new parser context or NULL
11527 */
11528xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011529xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011530 int len;
11531
11532 if (cur == NULL)
11533 return(NULL);
11534 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011535 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011536}
11537
Daniel Veillard81273902003-09-30 00:43:48 +000011538#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011539/**
11540 * xmlSAXParseDoc:
11541 * @sax: the SAX handler block
11542 * @cur: a pointer to an array of xmlChar
11543 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11544 * documents
11545 *
11546 * parse an XML in-memory document and build a tree.
11547 * It use the given SAX function block to handle the parsing callback.
11548 * If sax is NULL, fallback to the default DOM tree building routines.
11549 *
11550 * Returns the resulting document tree
11551 */
11552
11553xmlDocPtr
11554xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11555 xmlDocPtr ret;
11556 xmlParserCtxtPtr ctxt;
11557
11558 if (cur == NULL) return(NULL);
11559
11560
11561 ctxt = xmlCreateDocParserCtxt(cur);
11562 if (ctxt == NULL) return(NULL);
11563 if (sax != NULL) {
11564 ctxt->sax = sax;
11565 ctxt->userData = NULL;
11566 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011567 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011568
11569 xmlParseDocument(ctxt);
11570 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11571 else {
11572 ret = NULL;
11573 xmlFreeDoc(ctxt->myDoc);
11574 ctxt->myDoc = NULL;
11575 }
11576 if (sax != NULL)
11577 ctxt->sax = NULL;
11578 xmlFreeParserCtxt(ctxt);
11579
11580 return(ret);
11581}
11582
11583/**
11584 * xmlParseDoc:
11585 * @cur: a pointer to an array of xmlChar
11586 *
11587 * parse an XML in-memory document and build a tree.
11588 *
11589 * Returns the resulting document tree
11590 */
11591
11592xmlDocPtr
11593xmlParseDoc(xmlChar *cur) {
11594 return(xmlSAXParseDoc(NULL, cur, 0));
11595}
Daniel Veillard81273902003-09-30 00:43:48 +000011596#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011597
Daniel Veillard81273902003-09-30 00:43:48 +000011598#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011599/************************************************************************
11600 * *
11601 * Specific function to keep track of entities references *
11602 * and used by the XSLT debugger *
11603 * *
11604 ************************************************************************/
11605
11606static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11607
11608/**
11609 * xmlAddEntityReference:
11610 * @ent : A valid entity
11611 * @firstNode : A valid first node for children of entity
11612 * @lastNode : A valid last node of children entity
11613 *
11614 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11615 */
11616static void
11617xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11618 xmlNodePtr lastNode)
11619{
11620 if (xmlEntityRefFunc != NULL) {
11621 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11622 }
11623}
11624
11625
11626/**
11627 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011628 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011629 *
11630 * Set the function to call call back when a xml reference has been made
11631 */
11632void
11633xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11634{
11635 xmlEntityRefFunc = func;
11636}
Daniel Veillard81273902003-09-30 00:43:48 +000011637#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011638
11639/************************************************************************
11640 * *
11641 * Miscellaneous *
11642 * *
11643 ************************************************************************/
11644
11645#ifdef LIBXML_XPATH_ENABLED
11646#include <libxml/xpath.h>
11647#endif
11648
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011649extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011650static int xmlParserInitialized = 0;
11651
11652/**
11653 * xmlInitParser:
11654 *
11655 * Initialization function for the XML parser.
11656 * This is not reentrant. Call once before processing in case of
11657 * use in multithreaded programs.
11658 */
11659
11660void
11661xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011662 if (xmlParserInitialized != 0)
11663 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011664
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011665 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11666 (xmlGenericError == NULL))
11667 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011668 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011669 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011670 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011671 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011672 xmlDefaultSAXHandlerInit();
11673 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011674#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011675 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011676#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011677#ifdef LIBXML_HTML_ENABLED
11678 htmlInitAutoClose();
11679 htmlDefaultSAXHandlerInit();
11680#endif
11681#ifdef LIBXML_XPATH_ENABLED
11682 xmlXPathInit();
11683#endif
11684 xmlParserInitialized = 1;
11685}
11686
11687/**
11688 * xmlCleanupParser:
11689 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011690 * Cleanup function for the XML library. It tries to reclaim all
11691 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011692 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011693 * function should not prevent reusing the library but one should
11694 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011695 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011696 */
11697
11698void
11699xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011700 if (!xmlParserInitialized)
11701 return;
11702
Owen Taylor3473f882001-02-23 17:55:21 +000011703 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011704#ifdef LIBXML_CATALOG_ENABLED
11705 xmlCatalogCleanup();
11706#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011707 xmlCleanupInputCallbacks();
11708#ifdef LIBXML_OUTPUT_ENABLED
11709 xmlCleanupOutputCallbacks();
11710#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011711 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011712 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011713 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011714 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011715 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011716}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011717
11718/************************************************************************
11719 * *
11720 * New set (2.6.0) of simpler and more flexible APIs *
11721 * *
11722 ************************************************************************/
11723
11724/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011725 * DICT_FREE:
11726 * @str: a string
11727 *
11728 * Free a string if it is not owned by the "dict" dictionnary in the
11729 * current scope
11730 */
11731#define DICT_FREE(str) \
11732 if ((str) && ((!dict) || \
11733 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11734 xmlFree((char *)(str));
11735
11736/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011737 * xmlCtxtReset:
11738 * @ctxt: an XML parser context
11739 *
11740 * Reset a parser context
11741 */
11742void
11743xmlCtxtReset(xmlParserCtxtPtr ctxt)
11744{
11745 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011746 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011747
11748 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11749 xmlFreeInputStream(input);
11750 }
11751 ctxt->inputNr = 0;
11752 ctxt->input = NULL;
11753
11754 ctxt->spaceNr = 0;
11755 ctxt->spaceTab[0] = -1;
11756 ctxt->space = &ctxt->spaceTab[0];
11757
11758
11759 ctxt->nodeNr = 0;
11760 ctxt->node = NULL;
11761
11762 ctxt->nameNr = 0;
11763 ctxt->name = NULL;
11764
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011765 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011766 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011767 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011768 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011769 DICT_FREE(ctxt->directory);
11770 ctxt->directory = NULL;
11771 DICT_FREE(ctxt->extSubURI);
11772 ctxt->extSubURI = NULL;
11773 DICT_FREE(ctxt->extSubSystem);
11774 ctxt->extSubSystem = NULL;
11775 if (ctxt->myDoc != NULL)
11776 xmlFreeDoc(ctxt->myDoc);
11777 ctxt->myDoc = NULL;
11778
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011779 ctxt->standalone = -1;
11780 ctxt->hasExternalSubset = 0;
11781 ctxt->hasPErefs = 0;
11782 ctxt->html = 0;
11783 ctxt->external = 0;
11784 ctxt->instate = XML_PARSER_START;
11785 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011786
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011787 ctxt->wellFormed = 1;
11788 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000011789 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011790 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011791#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011792 ctxt->vctxt.userData = ctxt;
11793 ctxt->vctxt.error = xmlParserValidityError;
11794 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000011795#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011796 ctxt->record_info = 0;
11797 ctxt->nbChars = 0;
11798 ctxt->checkIndex = 0;
11799 ctxt->inSubset = 0;
11800 ctxt->errNo = XML_ERR_OK;
11801 ctxt->depth = 0;
11802 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11803 ctxt->catalogs = NULL;
11804 xmlInitNodeInfoSeq(&ctxt->node_seq);
11805
11806 if (ctxt->attsDefault != NULL) {
11807 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11808 ctxt->attsDefault = NULL;
11809 }
11810 if (ctxt->attsSpecial != NULL) {
11811 xmlHashFree(ctxt->attsSpecial, NULL);
11812 ctxt->attsSpecial = NULL;
11813 }
11814
Daniel Veillard4432df22003-09-28 18:58:27 +000011815#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011816 if (ctxt->catalogs != NULL)
11817 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011818#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011819 if (ctxt->lastError.code != XML_ERR_OK)
11820 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011821}
11822
11823/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011824 * xmlCtxtResetPush:
11825 * @ctxt: an XML parser context
11826 * @chunk: a pointer to an array of chars
11827 * @size: number of chars in the array
11828 * @filename: an optional file name or URI
11829 * @encoding: the document encoding, or NULL
11830 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011831 * Reset a push parser context
11832 *
11833 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011834 */
11835int
11836xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11837 int size, const char *filename, const char *encoding)
11838{
11839 xmlParserInputPtr inputStream;
11840 xmlParserInputBufferPtr buf;
11841 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11842
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011843 if (ctxt == NULL)
11844 return(1);
11845
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011846 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11847 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11848
11849 buf = xmlAllocParserInputBuffer(enc);
11850 if (buf == NULL)
11851 return(1);
11852
11853 if (ctxt == NULL) {
11854 xmlFreeParserInputBuffer(buf);
11855 return(1);
11856 }
11857
11858 xmlCtxtReset(ctxt);
11859
11860 if (ctxt->pushTab == NULL) {
11861 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11862 sizeof(xmlChar *));
11863 if (ctxt->pushTab == NULL) {
11864 xmlErrMemory(ctxt, NULL);
11865 xmlFreeParserInputBuffer(buf);
11866 return(1);
11867 }
11868 }
11869
11870 if (filename == NULL) {
11871 ctxt->directory = NULL;
11872 } else {
11873 ctxt->directory = xmlParserGetDirectory(filename);
11874 }
11875
11876 inputStream = xmlNewInputStream(ctxt);
11877 if (inputStream == NULL) {
11878 xmlFreeParserInputBuffer(buf);
11879 return(1);
11880 }
11881
11882 if (filename == NULL)
11883 inputStream->filename = NULL;
11884 else
11885 inputStream->filename = (char *)
11886 xmlCanonicPath((const xmlChar *) filename);
11887 inputStream->buf = buf;
11888 inputStream->base = inputStream->buf->buffer->content;
11889 inputStream->cur = inputStream->buf->buffer->content;
11890 inputStream->end =
11891 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11892
11893 inputPush(ctxt, inputStream);
11894
11895 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11896 (ctxt->input->buf != NULL)) {
11897 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11898 int cur = ctxt->input->cur - ctxt->input->base;
11899
11900 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11901
11902 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11903 ctxt->input->cur = ctxt->input->base + cur;
11904 ctxt->input->end =
11905 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11906 use];
11907#ifdef DEBUG_PUSH
11908 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11909#endif
11910 }
11911
11912 if (encoding != NULL) {
11913 xmlCharEncodingHandlerPtr hdlr;
11914
11915 hdlr = xmlFindCharEncodingHandler(encoding);
11916 if (hdlr != NULL) {
11917 xmlSwitchToEncoding(ctxt, hdlr);
11918 } else {
11919 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11920 "Unsupported encoding %s\n", BAD_CAST encoding);
11921 }
11922 } else if (enc != XML_CHAR_ENCODING_NONE) {
11923 xmlSwitchEncoding(ctxt, enc);
11924 }
11925
11926 return(0);
11927}
11928
11929/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011930 * xmlCtxtUseOptions:
11931 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011932 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011933 *
11934 * Applies the options to the parser context
11935 *
11936 * Returns 0 in case of success, the set of unknown or unimplemented options
11937 * in case of error.
11938 */
11939int
11940xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11941{
11942 if (options & XML_PARSE_RECOVER) {
11943 ctxt->recovery = 1;
11944 options -= XML_PARSE_RECOVER;
11945 } else
11946 ctxt->recovery = 0;
11947 if (options & XML_PARSE_DTDLOAD) {
11948 ctxt->loadsubset = XML_DETECT_IDS;
11949 options -= XML_PARSE_DTDLOAD;
11950 } else
11951 ctxt->loadsubset = 0;
11952 if (options & XML_PARSE_DTDATTR) {
11953 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11954 options -= XML_PARSE_DTDATTR;
11955 }
11956 if (options & XML_PARSE_NOENT) {
11957 ctxt->replaceEntities = 1;
11958 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11959 options -= XML_PARSE_NOENT;
11960 } else
11961 ctxt->replaceEntities = 0;
11962 if (options & XML_PARSE_NOWARNING) {
11963 ctxt->sax->warning = NULL;
11964 options -= XML_PARSE_NOWARNING;
11965 }
11966 if (options & XML_PARSE_NOERROR) {
11967 ctxt->sax->error = NULL;
11968 ctxt->sax->fatalError = NULL;
11969 options -= XML_PARSE_NOERROR;
11970 }
11971 if (options & XML_PARSE_PEDANTIC) {
11972 ctxt->pedantic = 1;
11973 options -= XML_PARSE_PEDANTIC;
11974 } else
11975 ctxt->pedantic = 0;
11976 if (options & XML_PARSE_NOBLANKS) {
11977 ctxt->keepBlanks = 0;
11978 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11979 options -= XML_PARSE_NOBLANKS;
11980 } else
11981 ctxt->keepBlanks = 1;
11982 if (options & XML_PARSE_DTDVALID) {
11983 ctxt->validate = 1;
11984 if (options & XML_PARSE_NOWARNING)
11985 ctxt->vctxt.warning = NULL;
11986 if (options & XML_PARSE_NOERROR)
11987 ctxt->vctxt.error = NULL;
11988 options -= XML_PARSE_DTDVALID;
11989 } else
11990 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011991#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011992 if (options & XML_PARSE_SAX1) {
11993 ctxt->sax->startElement = xmlSAX2StartElement;
11994 ctxt->sax->endElement = xmlSAX2EndElement;
11995 ctxt->sax->startElementNs = NULL;
11996 ctxt->sax->endElementNs = NULL;
11997 ctxt->sax->initialized = 1;
11998 options -= XML_PARSE_SAX1;
11999 }
Daniel Veillard81273902003-09-30 00:43:48 +000012000#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012001 if (options & XML_PARSE_NODICT) {
12002 ctxt->dictNames = 0;
12003 options -= XML_PARSE_NODICT;
12004 } else {
12005 ctxt->dictNames = 1;
12006 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012007 if (options & XML_PARSE_NOCDATA) {
12008 ctxt->sax->cdataBlock = NULL;
12009 options -= XML_PARSE_NOCDATA;
12010 }
12011 if (options & XML_PARSE_NSCLEAN) {
12012 ctxt->options |= XML_PARSE_NSCLEAN;
12013 options -= XML_PARSE_NSCLEAN;
12014 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012015 if (options & XML_PARSE_NONET) {
12016 ctxt->options |= XML_PARSE_NONET;
12017 options -= XML_PARSE_NONET;
12018 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012019 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012020 return (options);
12021}
12022
12023/**
12024 * xmlDoRead:
12025 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012026 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012027 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012028 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012029 * @reuse: keep the context for reuse
12030 *
12031 * Common front-end for the xmlRead functions
12032 *
12033 * Returns the resulting document tree or NULL
12034 */
12035static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012036xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12037 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012038{
12039 xmlDocPtr ret;
12040
12041 xmlCtxtUseOptions(ctxt, options);
12042 if (encoding != NULL) {
12043 xmlCharEncodingHandlerPtr hdlr;
12044
12045 hdlr = xmlFindCharEncodingHandler(encoding);
12046 if (hdlr != NULL)
12047 xmlSwitchToEncoding(ctxt, hdlr);
12048 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012049 if ((URL != NULL) && (ctxt->input != NULL) &&
12050 (ctxt->input->filename == NULL))
12051 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052 xmlParseDocument(ctxt);
12053 if ((ctxt->wellFormed) || ctxt->recovery)
12054 ret = ctxt->myDoc;
12055 else {
12056 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012057 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012058 xmlFreeDoc(ctxt->myDoc);
12059 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012060 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012061 ctxt->myDoc = NULL;
12062 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012063 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012064 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012065
12066 return (ret);
12067}
12068
12069/**
12070 * xmlReadDoc:
12071 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012072 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012073 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012074 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012075 *
12076 * parse an XML in-memory document and build a tree.
12077 *
12078 * Returns the resulting document tree
12079 */
12080xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012081xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012082{
12083 xmlParserCtxtPtr ctxt;
12084
12085 if (cur == NULL)
12086 return (NULL);
12087
12088 ctxt = xmlCreateDocParserCtxt(cur);
12089 if (ctxt == NULL)
12090 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012091 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012092}
12093
12094/**
12095 * xmlReadFile:
12096 * @filename: a file or URL
12097 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012098 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012099 *
12100 * parse an XML file from the filesystem or the network.
12101 *
12102 * Returns the resulting document tree
12103 */
12104xmlDocPtr
12105xmlReadFile(const char *filename, const char *encoding, int options)
12106{
12107 xmlParserCtxtPtr ctxt;
12108
Daniel Veillard61b93382003-11-03 14:28:31 +000012109 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012110 if (ctxt == NULL)
12111 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012112 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012113}
12114
12115/**
12116 * xmlReadMemory:
12117 * @buffer: a pointer to a char array
12118 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012119 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012120 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012121 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012122 *
12123 * parse an XML in-memory document and build a tree.
12124 *
12125 * Returns the resulting document tree
12126 */
12127xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012128xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012129{
12130 xmlParserCtxtPtr ctxt;
12131
12132 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12133 if (ctxt == NULL)
12134 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012135 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012136}
12137
12138/**
12139 * xmlReadFd:
12140 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012141 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012142 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012143 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012144 *
12145 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012146 * NOTE that the file descriptor will not be closed when the
12147 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012148 *
12149 * Returns the resulting document tree
12150 */
12151xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012152xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012153{
12154 xmlParserCtxtPtr ctxt;
12155 xmlParserInputBufferPtr input;
12156 xmlParserInputPtr stream;
12157
12158 if (fd < 0)
12159 return (NULL);
12160
12161 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12162 if (input == NULL)
12163 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012164 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012165 ctxt = xmlNewParserCtxt();
12166 if (ctxt == NULL) {
12167 xmlFreeParserInputBuffer(input);
12168 return (NULL);
12169 }
12170 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12171 if (stream == NULL) {
12172 xmlFreeParserInputBuffer(input);
12173 xmlFreeParserCtxt(ctxt);
12174 return (NULL);
12175 }
12176 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012177 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012178}
12179
12180/**
12181 * xmlReadIO:
12182 * @ioread: an I/O read function
12183 * @ioclose: an I/O close function
12184 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012185 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012186 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012187 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012188 *
12189 * parse an XML document from I/O functions and source and build a tree.
12190 *
12191 * Returns the resulting document tree
12192 */
12193xmlDocPtr
12194xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012195 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012196{
12197 xmlParserCtxtPtr ctxt;
12198 xmlParserInputBufferPtr input;
12199 xmlParserInputPtr stream;
12200
12201 if (ioread == NULL)
12202 return (NULL);
12203
12204 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12205 XML_CHAR_ENCODING_NONE);
12206 if (input == NULL)
12207 return (NULL);
12208 ctxt = xmlNewParserCtxt();
12209 if (ctxt == NULL) {
12210 xmlFreeParserInputBuffer(input);
12211 return (NULL);
12212 }
12213 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12214 if (stream == NULL) {
12215 xmlFreeParserInputBuffer(input);
12216 xmlFreeParserCtxt(ctxt);
12217 return (NULL);
12218 }
12219 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012220 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012221}
12222
12223/**
12224 * xmlCtxtReadDoc:
12225 * @ctxt: an XML parser context
12226 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012227 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012228 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012229 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012230 *
12231 * parse an XML in-memory document and build a tree.
12232 * This reuses the existing @ctxt parser context
12233 *
12234 * Returns the resulting document tree
12235 */
12236xmlDocPtr
12237xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012238 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012239{
12240 xmlParserInputPtr stream;
12241
12242 if (cur == NULL)
12243 return (NULL);
12244 if (ctxt == NULL)
12245 return (NULL);
12246
12247 xmlCtxtReset(ctxt);
12248
12249 stream = xmlNewStringInputStream(ctxt, cur);
12250 if (stream == NULL) {
12251 return (NULL);
12252 }
12253 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012254 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012255}
12256
12257/**
12258 * xmlCtxtReadFile:
12259 * @ctxt: an XML parser context
12260 * @filename: a file or URL
12261 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012262 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012263 *
12264 * parse an XML file from the filesystem or the network.
12265 * This reuses the existing @ctxt parser context
12266 *
12267 * Returns the resulting document tree
12268 */
12269xmlDocPtr
12270xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12271 const char *encoding, int options)
12272{
12273 xmlParserInputPtr stream;
12274
12275 if (filename == NULL)
12276 return (NULL);
12277 if (ctxt == NULL)
12278 return (NULL);
12279
12280 xmlCtxtReset(ctxt);
12281
12282 stream = xmlNewInputFromFile(ctxt, filename);
12283 if (stream == NULL) {
12284 return (NULL);
12285 }
12286 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012287 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012288}
12289
12290/**
12291 * xmlCtxtReadMemory:
12292 * @ctxt: an XML parser context
12293 * @buffer: a pointer to a char array
12294 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012295 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012296 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012297 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012298 *
12299 * parse an XML in-memory document and build a tree.
12300 * This reuses the existing @ctxt parser context
12301 *
12302 * Returns the resulting document tree
12303 */
12304xmlDocPtr
12305xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012306 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012307{
12308 xmlParserInputBufferPtr input;
12309 xmlParserInputPtr stream;
12310
12311 if (ctxt == NULL)
12312 return (NULL);
12313 if (buffer == NULL)
12314 return (NULL);
12315
12316 xmlCtxtReset(ctxt);
12317
12318 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12319 if (input == NULL) {
12320 return(NULL);
12321 }
12322
12323 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12324 if (stream == NULL) {
12325 xmlFreeParserInputBuffer(input);
12326 return(NULL);
12327 }
12328
12329 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012330 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012331}
12332
12333/**
12334 * xmlCtxtReadFd:
12335 * @ctxt: an XML parser context
12336 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012337 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012338 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012339 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012340 *
12341 * parse an XML from a file descriptor and build a tree.
12342 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012343 * NOTE that the file descriptor will not be closed when the
12344 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012345 *
12346 * Returns the resulting document tree
12347 */
12348xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012349xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12350 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012351{
12352 xmlParserInputBufferPtr input;
12353 xmlParserInputPtr stream;
12354
12355 if (fd < 0)
12356 return (NULL);
12357 if (ctxt == NULL)
12358 return (NULL);
12359
12360 xmlCtxtReset(ctxt);
12361
12362
12363 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12364 if (input == NULL)
12365 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012366 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012367 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12368 if (stream == NULL) {
12369 xmlFreeParserInputBuffer(input);
12370 return (NULL);
12371 }
12372 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012373 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012374}
12375
12376/**
12377 * xmlCtxtReadIO:
12378 * @ctxt: an XML parser context
12379 * @ioread: an I/O read function
12380 * @ioclose: an I/O close function
12381 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012382 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012383 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012384 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012385 *
12386 * parse an XML document from I/O functions and source and build a tree.
12387 * This reuses the existing @ctxt parser context
12388 *
12389 * Returns the resulting document tree
12390 */
12391xmlDocPtr
12392xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12393 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012394 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012395 const char *encoding, int options)
12396{
12397 xmlParserInputBufferPtr input;
12398 xmlParserInputPtr stream;
12399
12400 if (ioread == NULL)
12401 return (NULL);
12402 if (ctxt == NULL)
12403 return (NULL);
12404
12405 xmlCtxtReset(ctxt);
12406
12407 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12408 XML_CHAR_ENCODING_NONE);
12409 if (input == NULL)
12410 return (NULL);
12411 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12412 if (stream == NULL) {
12413 xmlFreeParserInputBuffer(input);
12414 return (NULL);
12415 }
12416 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012417 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012418}